1
From: Alistair Francis <alistair.francis@wdc.com>
1
The following changes since commit c5ea91da443b458352c1b629b490ee6631775cb4:
2
2
3
The following changes since commit 9cc1bf1ebca550f8d90f967ccd2b6d2e00e81387:
3
Merge tag 'pull-trivial-patches' of https://gitlab.com/mjt0k/qemu into staging (2023-09-08 10:06:25 -0400)
4
5
Merge tag 'pull-xen-20220609' of https://xenbits.xen.org/git-http/people/aperard/qemu-dm into staging (2022-06-09 08:25:17 -0700)
6
4
7
are available in the Git repository at:
5
are available in the Git repository at:
8
6
9
git@github.com:alistair23/qemu.git tags/pull-riscv-to-apply-20220610
7
https://github.com/alistair23/qemu.git tags/pull-riscv-to-apply-20230911
10
8
11
for you to fetch changes up to 07314158f6aa4d2589520c194a7531b9364a8d54:
9
for you to fetch changes up to e7a03409f29e2da59297d55afbaec98c96e43e3a:
12
10
13
target/riscv: trans_rvv: Avoid assert for RV32 and e64 (2022-06-10 09:42:12 +1000)
11
target/riscv: don't read CSR in riscv_csrrw_do64 (2023-09-11 11:45:55 +1000)
14
12
15
----------------------------------------------------------------
13
----------------------------------------------------------------
16
Fourth RISC-V PR for QEMU 7.1
14
First RISC-V PR for 8.2
17
15
18
* Update MAINTAINERS
16
* Remove 'host' CPU from TCG
19
* Add support for Zmmul extension
17
* riscv_htif Fixup printing on big endian hosts
20
* Fixup FDT errors when supplying device tree from the command line for virt machine
18
* Add zmmul isa string
21
* Avoid overflowing the addr_config buffer in the SiFive PLIC
19
* Add smepmp isa string
22
* Support -device loader addresses above 2GB
20
* Fix page_check_range use in fault-only-first
23
* Correctly wake from WFI on VS-level external interrupts
21
* Use existing lookup tables for MixColumns
24
* Fixes for RV128 support
22
* Add RISC-V vector cryptographic instruction set support
25
* Support Vector extension tail agnostic setting elements' bits to all 1s
23
* Implement WARL behaviour for mcountinhibit/mcounteren
26
* Don't expose the CPU properties on named CPUs
24
* Add Zihintntl extension ISA string to DTS
27
* Fix vector extension assert for RV32
25
* Fix zfa fleq.d and fltq.d
26
* Fix upper/lower mtime write calculation
27
* Make rtc variable names consistent
28
* Use abi type for linux-user target_ucontext
29
* Add RISC-V KVM AIA Support
30
* Fix riscv,pmu DT node path in the virt machine
31
* Update CSR bits name for svadu extension
32
* Mark zicond non-experimental
33
* Fix satp_mode_finalize() when satp_mode.supported = 0
34
* Fix non-KVM --enable-debug build
35
* Add new extensions to hwprobe
36
* Use accelerated helper for AES64KS1I
37
* Allocate itrigger timers only once
38
* Respect mseccfg.RLB for pmpaddrX changes
39
* Align the AIA model to v1.0 ratified spec
40
* Don't read the CSR in riscv_csrrw_do64
28
41
29
----------------------------------------------------------------
42
----------------------------------------------------------------
30
Alistair Francis (4):
43
Akihiko Odaki (1):
31
MAINTAINERS: Cover hw/core/uboot_image.h within Generic Loader section
44
target/riscv: Allocate itrigger timers only once
32
hw/intc: sifive_plic: Avoid overflowing the addr_config buffer
33
target/riscv: Don't expose the CPU properties on names CPUs
34
target/riscv: trans_rvv: Avoid assert for RV32 and e64
35
45
36
Andrew Bresticker (1):
46
Ard Biesheuvel (2):
37
target/riscv: Wake on VS-level external interrupts
47
target/riscv: Use existing lookup tables for MixColumns
48
target/riscv: Use accelerated helper for AES64KS1I
38
49
39
Atish Patra (1):
50
Conor Dooley (1):
40
hw/riscv: virt: Generate fw_cfg DT node correctly
51
hw/riscv: virt: Fix riscv,pmu DT node path
41
52
42
Frédéric Pétrot (1):
53
Daniel Henrique Barboza (6):
43
target/riscv/debug.c: keep experimental rv128 support working
54
target/riscv/cpu.c: do not run 'host' CPU with TCG
55
target/riscv/cpu.c: add zmmul isa string
56
target/riscv/cpu.c: add smepmp isa string
57
target/riscv: fix satp_mode_finalize() when satp_mode.supported = 0
58
hw/riscv/virt.c: fix non-KVM --enable-debug build
59
hw/intc/riscv_aplic.c fix non-KVM --enable-debug build
44
60
45
Jamie Iles (1):
61
Dickon Hood (2):
46
hw/core/loader: return image sizes as ssize_t
62
target/riscv: Refactor translation of vector-widening instruction
63
target/riscv: Add Zvbb ISA extension support
64
65
Jason Chien (3):
66
target/riscv: Add Zihintntl extension ISA string to DTS
67
hw/intc: Fix upper/lower mtime write calculation
68
hw/intc: Make rtc variable names consistent
69
70
Kiran Ostrolenk (4):
71
target/riscv: Refactor some of the generic vector functionality
72
target/riscv: Refactor vector-vector translation macro
73
target/riscv: Refactor some of the generic vector functionality
74
target/riscv: Add Zvknh ISA extension support
75
76
LIU Zhiwei (3):
77
target/riscv: Fix page_check_range use in fault-only-first
78
target/riscv: Fix zfa fleq.d and fltq.d
79
linux-user/riscv: Use abi type for target_ucontext
80
81
Lawrence Hunter (2):
82
target/riscv: Add Zvbc ISA extension support
83
target/riscv: Add Zvksh ISA extension support
84
85
Leon Schuermann (1):
86
target/riscv/pmp.c: respect mseccfg.RLB for pmpaddrX changes
87
88
Max Chou (3):
89
crypto: Create sm4_subword
90
crypto: Add SM4 constant parameter CK
91
target/riscv: Add Zvksed ISA extension support
92
93
Nazar Kazakov (4):
94
target/riscv: Remove redundant "cpu_vl == 0" checks
95
target/riscv: Move vector translation checks
96
target/riscv: Add Zvkned ISA extension support
97
target/riscv: Add Zvkg ISA extension support
98
99
Nikita Shubin (1):
100
target/riscv: don't read CSR in riscv_csrrw_do64
101
102
Rob Bradford (1):
103
target/riscv: Implement WARL behaviour for mcountinhibit/mcounteren
104
105
Robbin Ehn (1):
106
linux-user/riscv: Add new extensions to hwprobe
107
108
Thomas Huth (2):
109
hw/char/riscv_htif: Fix printing of console characters on big endian hosts
110
hw/char/riscv_htif: Fix the console syscall on big endian hosts
111
112
Tommy Wu (1):
113
target/riscv: Align the AIA model to v1.0 ratified spec
114
115
Vineet Gupta (1):
116
riscv: zicond: make non-experimental
47
117
48
Weiwei Li (1):
118
Weiwei Li (1):
49
target/riscv: add support for zmmul extension v0.1
119
target/riscv: Update CSR bits name for svadu extension
50
120
51
eopXD (16):
121
Yong-Xuan Wang (5):
52
target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed
122
target/riscv: support the AIA device emulation with KVM enabled
53
target/riscv: rvv: Prune redundant access_type parameter passed
123
target/riscv: check the in-kernel irqchip support
54
target/riscv: rvv: Rename ambiguous esz
124
target/riscv: Create an KVM AIA irqchip
55
target/riscv: rvv: Early exit when vstart >= vl
125
target/riscv: update APLIC and IMSIC to support KVM AIA
56
target/riscv: rvv: Add tail agnostic for vv instructions
126
target/riscv: select KVM AIA in riscv virt machine
57
target/riscv: rvv: Add tail agnostic for vector load / store instructions
58
target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions
59
target/riscv: rvv: Add tail agnostic for vector integer shift instructions
60
target/riscv: rvv: Add tail agnostic for vector integer comparison instructions
61
target/riscv: rvv: Add tail agnostic for vector integer merge and move instructions
62
target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic instructions
63
target/riscv: rvv: Add tail agnostic for vector floating-point instructions
64
target/riscv: rvv: Add tail agnostic for vector reduction instructions
65
target/riscv: rvv: Add tail agnostic for vector mask instructions
66
target/riscv: rvv: Add tail agnostic for vector permutation instructions
67
target/riscv: rvv: Add option 'rvv_ta_all_1s' to enable optional tail agnostic behavior
68
127
69
include/hw/loader.h | 55 +-
128
include/crypto/aes.h | 7 +
70
target/riscv/cpu.h | 4 +
129
include/crypto/sm4.h | 9 +
71
target/riscv/internals.h | 6 +-
130
target/riscv/cpu_bits.h | 8 +-
72
hw/arm/armv7m.c | 2 +-
131
target/riscv/cpu_cfg.h | 9 +
73
hw/arm/boot.c | 8 +-
132
target/riscv/debug.h | 3 +-
74
hw/core/generic-loader.c | 2 +-
133
target/riscv/helper.h | 98 +++
75
hw/core/loader.c | 81 +-
134
target/riscv/kvm_riscv.h | 5 +
76
hw/i386/x86.c | 2 +-
135
target/riscv/vector_internals.h | 228 +++++++
77
hw/intc/sifive_plic.c | 19 +-
136
target/riscv/insn32.decode | 58 ++
78
hw/riscv/boot.c | 5 +-
137
crypto/aes.c | 4 +-
79
hw/riscv/virt.c | 28 +-
138
crypto/sm4.c | 10 +
80
target/riscv/cpu.c | 68 +-
139
hw/char/riscv_htif.c | 12 +-
81
target/riscv/cpu_helper.c | 4 +-
140
hw/intc/riscv_aclint.c | 11 +-
82
target/riscv/debug.c | 2 +
141
hw/intc/riscv_aplic.c | 52 +-
83
target/riscv/translate.c | 4 +
142
hw/intc/riscv_imsic.c | 25 +-
84
target/riscv/vector_helper.c | 1588 +++++++++++++++++++------------
143
hw/riscv/virt.c | 374 ++++++------
85
target/riscv/insn_trans/trans_rvm.c.inc | 18 +-
144
linux-user/riscv/signal.c | 4 +-
86
target/riscv/insn_trans/trans_rvv.c.inc | 106 ++-
145
linux-user/syscall.c | 14 +-
87
MAINTAINERS | 1 +
146
target/arm/tcg/crypto_helper.c | 10 +-
88
19 files changed, 1244 insertions(+), 759 deletions(-)
147
target/riscv/cpu.c | 83 ++-
148
target/riscv/cpu_helper.c | 6 +-
149
target/riscv/crypto_helper.c | 51 +-
150
target/riscv/csr.c | 54 +-
151
target/riscv/debug.c | 15 +-
152
target/riscv/kvm.c | 201 ++++++-
153
target/riscv/pmp.c | 4 +
154
target/riscv/translate.c | 1 +
155
target/riscv/vcrypto_helper.c | 970 ++++++++++++++++++++++++++++++
156
target/riscv/vector_helper.c | 245 +-------
157
target/riscv/vector_internals.c | 81 +++
158
target/riscv/insn_trans/trans_rvv.c.inc | 171 +++---
159
target/riscv/insn_trans/trans_rvvk.c.inc | 606 +++++++++++++++++++
160
target/riscv/insn_trans/trans_rvzfa.c.inc | 4 +-
161
target/riscv/meson.build | 4 +-
162
34 files changed, 2785 insertions(+), 652 deletions(-)
163
create mode 100644 target/riscv/vector_internals.h
164
create mode 100644 target/riscv/vcrypto_helper.c
165
create mode 100644 target/riscv/vector_internals.c
166
create mode 100644 target/riscv/insn_trans/trans_rvvk.c.inc
diff view generated by jsdifflib
New patch
1
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
1
2
3
The 'host' CPU is available in a CONFIG_KVM build and it's currently
4
available for all accels, but is a KVM only CPU. This means that in a
5
RISC-V KVM capable host we can do things like this:
6
7
$ ./build/qemu-system-riscv64 -M virt,accel=tcg -cpu host --nographic
8
qemu-system-riscv64: H extension requires priv spec 1.12.0
9
10
This CPU does not have a priv spec because we don't filter its extensions
11
via priv spec. We shouldn't be reaching riscv_cpu_realize_tcg() at all
12
with the 'host' CPU.
13
14
We don't have a way to filter the 'host' CPU out of the available CPU
15
options (-cpu help) if the build includes both KVM and TCG. What we can
16
do is to error out during riscv_cpu_realize_tcg() if the user chooses
17
the 'host' CPU with accel=tcg:
18
19
$ ./build/qemu-system-riscv64 -M virt,accel=tcg -cpu host --nographic
20
qemu-system-riscv64: 'host' CPU is not compatible with TCG acceleration
21
22
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
23
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
24
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
25
Message-Id: <20230721133411.474105-1-dbarboza@ventanamicro.com>
26
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
27
---
28
target/riscv/cpu.c | 5 +++++
29
1 file changed, 5 insertions(+)
30
31
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/riscv/cpu.c
34
+++ b/target/riscv/cpu.c
35
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize_tcg(DeviceState *dev, Error **errp)
36
CPURISCVState *env = &cpu->env;
37
Error *local_err = NULL;
38
39
+ if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_CPU_HOST)) {
40
+ error_setg(errp, "'host' CPU is not compatible with TCG acceleration");
41
+ return;
42
+ }
43
+
44
riscv_cpu_validate_misa_mxl(cpu, &local_err);
45
if (local_err != NULL) {
46
error_propagate(errp, local_err);
47
--
48
2.41.0
49
50
diff view generated by jsdifflib
New patch
1
From: Thomas Huth <thuth@redhat.com>
1
2
3
The character that should be printed is stored in the 64 bit "payload"
4
variable. The code currently tries to print it by taking the address
5
of the variable and passing this pointer to qemu_chr_fe_write(). However,
6
this only works on little endian hosts where the least significant bits
7
are stored on the lowest address. To do this in a portable way, we have
8
to store the value in an uint8_t variable instead.
9
10
Fixes: 5033606780 ("RISC-V HTIF Console")
11
Signed-off-by: Thomas Huth <thuth@redhat.com>
12
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
13
Reviewed-by: Bin Meng <bmeng@tinylab.org>
14
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
15
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
16
Message-Id: <20230721094720.902454-2-thuth@redhat.com>
17
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
18
---
19
hw/char/riscv_htif.c | 3 ++-
20
1 file changed, 2 insertions(+), 1 deletion(-)
21
22
diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/char/riscv_htif.c
25
+++ b/hw/char/riscv_htif.c
26
@@ -XXX,XX +XXX,XX @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written)
27
s->tohost = 0; /* clear to indicate we read */
28
return;
29
} else if (cmd == HTIF_CONSOLE_CMD_PUTC) {
30
- qemu_chr_fe_write(&s->chr, (uint8_t *)&payload, 1);
31
+ uint8_t ch = (uint8_t)payload;
32
+ qemu_chr_fe_write(&s->chr, &ch, 1);
33
resp = 0x100 | (uint8_t)payload;
34
} else {
35
qemu_log("HTIF device %d: unknown command\n", device);
36
--
37
2.41.0
38
39
diff view generated by jsdifflib
New patch
1
From: Thomas Huth <thuth@redhat.com>
1
2
3
Values that have been read via cpu_physical_memory_read() from the
4
guest's memory have to be swapped in case the host endianess differs
5
from the guest.
6
7
Fixes: a6e13e31d5 ("riscv_htif: Support console output via proxy syscall")
8
Signed-off-by: Thomas Huth <thuth@redhat.com>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
10
Reviewed-by: Bin Meng <bmeng@tinylab.org>
11
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
12
Message-Id: <20230721094720.902454-3-thuth@redhat.com>
13
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
14
---
15
hw/char/riscv_htif.c | 9 +++++----
16
1 file changed, 5 insertions(+), 4 deletions(-)
17
18
diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/char/riscv_htif.c
21
+++ b/hw/char/riscv_htif.c
22
@@ -XXX,XX +XXX,XX @@
23
#include "qemu/timer.h"
24
#include "qemu/error-report.h"
25
#include "exec/address-spaces.h"
26
+#include "exec/tswap.h"
27
#include "sysemu/dma.h"
28
29
#define RISCV_DEBUG_HTIF 0
30
@@ -XXX,XX +XXX,XX @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written)
31
} else {
32
uint64_t syscall[8];
33
cpu_physical_memory_read(payload, syscall, sizeof(syscall));
34
- if (syscall[0] == PK_SYS_WRITE &&
35
- syscall[1] == HTIF_DEV_CONSOLE &&
36
- syscall[3] == HTIF_CONSOLE_CMD_PUTC) {
37
+ if (tswap64(syscall[0]) == PK_SYS_WRITE &&
38
+ tswap64(syscall[1]) == HTIF_DEV_CONSOLE &&
39
+ tswap64(syscall[3]) == HTIF_CONSOLE_CMD_PUTC) {
40
uint8_t ch;
41
- cpu_physical_memory_read(syscall[2], &ch, 1);
42
+ cpu_physical_memory_read(tswap64(syscall[2]), &ch, 1);
43
qemu_chr_fe_write(&s->chr, &ch, 1);
44
resp = 0x100 | (uint8_t)payload;
45
} else {
46
--
47
2.41.0
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
2
2
3
No functional change intended in this commit.
3
zmmul was promoted from experimental to ratified in commit 6d00ffad4e95.
4
Add a riscv,isa string for it.
4
5
5
Signed-off-by: eop Chen <eop.chen@sifive.com>
6
Fixes: 6d00ffad4e95 ("target/riscv: move zmmul out of the experimental properties")
6
Reviewed-by: Frank Chang <frank.chang@sifive.com>
7
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
7
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
8
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
9
Message-Id: <165449614532.19704.7000832880482980398-3@git.sr.ht>
10
Message-Id: <20230720132424.371132-2-dbarboza@ventanamicro.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
---
12
---
12
target/riscv/vector_helper.c | 76 ++++++++++++++++++------------------
13
target/riscv/cpu.c | 1 +
13
1 file changed, 38 insertions(+), 38 deletions(-)
14
1 file changed, 1 insertion(+)
14
15
15
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
16
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/vector_helper.c
18
--- a/target/riscv/cpu.c
18
+++ b/target/riscv/vector_helper.c
19
+++ b/target/riscv/cpu.c
19
@@ -XXX,XX +XXX,XX @@ static inline int32_t vext_lmul(uint32_t desc)
20
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
20
/*
21
ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr),
21
* Get the maximum number of elements can be operated.
22
ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei),
22
*
23
ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause),
23
- * esz: log2 of element size in bytes.
24
+ ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul),
24
+ * log2_esz: log2 of element size in bytes.
25
ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs),
25
*/
26
ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa),
26
-static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
27
ISA_EXT_DATA_ENTRY(zfbfmin, PRIV_VERSION_1_12_0, ext_zfbfmin),
27
+static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
28
{
29
/*
30
* As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
31
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
32
uint32_t vlenb = simd_maxsz(desc);
33
34
/* Return VLMAX */
35
- int scale = vext_lmul(desc) - esz;
36
+ int scale = vext_lmul(desc) - log2_esz;
37
return scale < 0 ? vlenb >> -scale : vlenb << scale;
38
}
39
40
@@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
41
target_ulong stride, CPURISCVState *env,
42
uint32_t desc, uint32_t vm,
43
vext_ldst_elem_fn *ldst_elem,
44
- uint32_t esz, uintptr_t ra)
45
+ uint32_t log2_esz, uintptr_t ra)
46
{
47
uint32_t i, k;
48
uint32_t nf = vext_nf(desc);
49
- uint32_t max_elems = vext_max_elems(desc, esz);
50
+ uint32_t max_elems = vext_max_elems(desc, log2_esz);
51
52
for (i = env->vstart; i < env->vl; i++, env->vstart++) {
53
if (!vm && !vext_elem_mask(v0, i)) {
54
@@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
55
56
k = 0;
57
while (k < nf) {
58
- target_ulong addr = base + stride * i + (k << esz);
59
+ target_ulong addr = base + stride * i + (k << log2_esz);
60
ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
61
k++;
62
}
63
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
64
/* unmasked unit-stride load and store operation*/
65
static void
66
vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
67
- vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
68
+ vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
69
uintptr_t ra)
70
{
71
uint32_t i, k;
72
uint32_t nf = vext_nf(desc);
73
- uint32_t max_elems = vext_max_elems(desc, esz);
74
+ uint32_t max_elems = vext_max_elems(desc, log2_esz);
75
76
/* load bytes from guest memory */
77
for (i = env->vstart; i < evl; i++, env->vstart++) {
78
k = 0;
79
while (k < nf) {
80
- target_ulong addr = base + ((i * nf + k) << esz);
81
+ target_ulong addr = base + ((i * nf + k) << log2_esz);
82
ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
83
k++;
84
}
85
@@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
86
void *vs2, CPURISCVState *env, uint32_t desc,
87
vext_get_index_addr get_index_addr,
88
vext_ldst_elem_fn *ldst_elem,
89
- uint32_t esz, uintptr_t ra)
90
+ uint32_t log2_esz, uintptr_t ra)
91
{
92
uint32_t i, k;
93
uint32_t nf = vext_nf(desc);
94
uint32_t vm = vext_vm(desc);
95
- uint32_t max_elems = vext_max_elems(desc, esz);
96
+ uint32_t max_elems = vext_max_elems(desc, log2_esz);
97
98
/* load bytes from guest memory */
99
for (i = env->vstart; i < env->vl; i++, env->vstart++) {
100
@@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
101
102
k = 0;
103
while (k < nf) {
104
- abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
105
+ abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
106
ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
107
k++;
108
}
109
@@ -XXX,XX +XXX,XX @@ static inline void
110
vext_ldff(void *vd, void *v0, target_ulong base,
111
CPURISCVState *env, uint32_t desc,
112
vext_ldst_elem_fn *ldst_elem,
113
- uint32_t esz, uintptr_t ra)
114
+ uint32_t log2_esz, uintptr_t ra)
115
{
116
void *host;
117
uint32_t i, k, vl = 0;
118
uint32_t nf = vext_nf(desc);
119
uint32_t vm = vext_vm(desc);
120
- uint32_t max_elems = vext_max_elems(desc, esz);
121
+ uint32_t max_elems = vext_max_elems(desc, log2_esz);
122
target_ulong addr, offset, remain;
123
124
/* probe every access*/
125
@@ -XXX,XX +XXX,XX @@ vext_ldff(void *vd, void *v0, target_ulong base,
126
if (!vm && !vext_elem_mask(v0, i)) {
127
continue;
128
}
129
- addr = adjust_addr(env, base + i * (nf << esz));
130
+ addr = adjust_addr(env, base + i * (nf << log2_esz));
131
if (i == 0) {
132
- probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
133
+ probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
134
} else {
135
/* if it triggers an exception, no need to check watchpoint */
136
- remain = nf << esz;
137
+ remain = nf << log2_esz;
138
while (remain > 0) {
139
offset = -(addr | TARGET_PAGE_MASK);
140
host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
141
@@ -XXX,XX +XXX,XX @@ ProbeSuccess:
142
continue;
143
}
144
while (k < nf) {
145
- target_ulong addr = base + ((i * nf + k) << esz);
146
+ target_ulong addr = base + ((i * nf + k) << log2_esz);
147
ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
148
k++;
149
}
150
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
151
*/
152
static void
153
vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
154
- vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra)
155
+ vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
156
{
157
uint32_t i, k, off, pos;
158
uint32_t nf = vext_nf(desc);
159
uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
160
- uint32_t max_elems = vlenb >> esz;
161
+ uint32_t max_elems = vlenb >> log2_esz;
162
163
k = env->vstart / max_elems;
164
off = env->vstart % max_elems;
165
@@ -XXX,XX +XXX,XX @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
166
if (off) {
167
/* load/store rest of elements of current segment pointed by vstart */
168
for (pos = off; pos < max_elems; pos++, env->vstart++) {
169
- target_ulong addr = base + ((pos + k * max_elems) << esz);
170
+ target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
171
ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
172
}
173
k++;
174
@@ -XXX,XX +XXX,XX @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
175
/* load/store elements for rest of segments */
176
for (; k < nf; k++) {
177
for (i = 0; i < max_elems; i++, env->vstart++) {
178
- target_ulong addr = base + ((i + k * max_elems) << esz);
179
+ target_ulong addr = base + ((i + k * max_elems) << log2_esz);
180
ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
181
}
182
}
183
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
184
GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
185
GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
186
187
-#define GEN_VEXT_VSLIE1UP(ESZ, H) \
188
-static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
189
- CPURISCVState *env, uint32_t desc) \
190
+#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
191
+static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
192
+ void *vs2, CPURISCVState *env, uint32_t desc) \
193
{ \
194
- typedef uint##ESZ##_t ETYPE; \
195
+ typedef uint##BITWIDTH##_t ETYPE; \
196
uint32_t vm = vext_vm(desc); \
197
uint32_t vl = env->vl; \
198
uint32_t i; \
199
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIE1UP(16, H2)
200
GEN_VEXT_VSLIE1UP(32, H4)
201
GEN_VEXT_VSLIE1UP(64, H8)
202
203
-#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \
204
+#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
205
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
206
CPURISCVState *env, uint32_t desc) \
207
{ \
208
- vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
209
+ vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
210
}
211
212
/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
213
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
214
GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
215
GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
216
217
-#define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \
218
-static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
219
- CPURISCVState *env, uint32_t desc) \
220
+#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
221
+static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
222
+ void *vs2, CPURISCVState *env, uint32_t desc) \
223
{ \
224
- typedef uint##ESZ##_t ETYPE; \
225
+ typedef uint##BITWIDTH##_t ETYPE; \
226
uint32_t vm = vext_vm(desc); \
227
uint32_t vl = env->vl; \
228
uint32_t i; \
229
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1DOWN(16, H2)
230
GEN_VEXT_VSLIDE1DOWN(32, H4)
231
GEN_VEXT_VSLIDE1DOWN(64, H8)
232
233
-#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \
234
+#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
235
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
236
CPURISCVState *env, uint32_t desc) \
237
{ \
238
- vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
239
+ vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
240
}
241
242
/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
243
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
244
GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
245
246
/* Vector Floating-Point Slide Instructions */
247
-#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \
248
+#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
249
void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
250
CPURISCVState *env, uint32_t desc) \
251
{ \
252
- vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
253
+ vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
254
}
255
256
/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
257
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
258
GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
259
GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
260
261
-#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \
262
+#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
263
void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
264
CPURISCVState *env, uint32_t desc) \
265
{ \
266
- vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
267
+ vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
268
}
269
270
/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
271
--
28
--
272
2.36.1
29
2.41.0
diff view generated by jsdifflib
1
From: eopXD <eop.chen@sifive.com>
1
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
2
2
3
According to v-spec, tail agnostic behavior can be either kept as
3
The cpu->cfg.epmp extension is still experimental, but it already has a
4
undisturbed or set elements' bits to all 1s. To distinguish the
4
'smepmp' riscv,isa string. Add it.
5
difference of tail policies, QEMU should be able to simulate the tail
6
agnostic behavior as "set tail elements' bits to all 1s".
7
5
8
There are multiple possibility for agnostic elements according to
6
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
9
v-spec. The main intent of this patch-set tries to add option that
10
can distinguish between tail policies. Setting agnostic elements to
11
all 1s allows QEMU to express this.
12
13
This commit adds option 'rvv_ta_all_1s' is added to enable the
14
behavior, it is default as disabled.
15
16
Signed-off-by: eop Chen <eop.chen@sifive.com>
17
Reviewed-by: Frank Chang <frank.chang@sifive.com>
18
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
7
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
19
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
20
Message-Id: <165449614532.19704.7000832880482980398-16@git.sr.ht>
9
Message-Id: <20230720132424.371132-3-dbarboza@ventanamicro.com>
21
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
22
---
11
---
23
target/riscv/cpu.c | 2 ++
12
target/riscv/cpu.c | 1 +
24
1 file changed, 2 insertions(+)
13
1 file changed, 1 insertion(+)
25
14
26
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
15
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
27
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
28
--- a/target/riscv/cpu.c
17
--- a/target/riscv/cpu.c
29
+++ b/target/riscv/cpu.c
18
+++ b/target/riscv/cpu.c
30
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = {
19
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
31
DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC),
20
ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
32
21
ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
33
DEFINE_PROP_BOOL("short-isa-string", RISCVCPU, cfg.short_isa_string, false),
22
ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia),
34
+
23
+ ISA_EXT_DATA_ENTRY(smepmp, PRIV_VERSION_1_12_0, epmp),
35
+ DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false),
24
ISA_EXT_DATA_ENTRY(smstateen, PRIV_VERSION_1_12_0, ext_smstateen),
36
DEFINE_PROP_END_OF_LIST(),
25
ISA_EXT_DATA_ENTRY(ssaia, PRIV_VERSION_1_12_0, ext_ssaia),
37
};
26
ISA_EXT_DATA_ENTRY(sscofpmf, PRIV_VERSION_1_12_0, ext_sscofpmf),
38
39
--
27
--
40
2.36.1
28
2.41.0
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
2
2
3
The tail elements in the destination mask register are updated under
3
Commit bef6f008b98(accel/tcg: Return bool from page_check_range) converts
4
a tail-agnostic policy.
4
integer return value to bool type. However, it wrongly converted the use
5
of the API in riscv fault-only-first, where page_check_range < = 0, should
6
be converted to !page_check_range.
5
7
6
Signed-off-by: eop Chen <eop.chen@sifive.com>
8
Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
7
Reviewed-by: Frank Chang <frank.chang@sifive.com>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
10
Message-ID: <20230729031618.821-1-zhiwei_liu@linux.alibaba.com>
9
Acked-by: Alistair Francis <alistair.francis@wdc.com>
10
Message-Id: <165449614532.19704.7000832880482980398-14@git.sr.ht>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
---
12
---
13
target/riscv/vector_helper.c | 30 +++++++++++++++++++++++++
13
target/riscv/vector_helper.c | 2 +-
14
target/riscv/insn_trans/trans_rvv.c.inc | 6 +++++
14
1 file changed, 1 insertion(+), 1 deletion(-)
15
2 files changed, 36 insertions(+)
16
15
17
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
16
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
18
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
19
--- a/target/riscv/vector_helper.c
18
--- a/target/riscv/vector_helper.c
20
+++ b/target/riscv/vector_helper.c
19
+++ b/target/riscv/vector_helper.c
21
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
20
@@ -XXX,XX +XXX,XX @@ vext_ldff(void *vd, void *v0, target_ulong base,
22
uint32_t desc) \
21
cpu_mmu_index(env, false));
23
{ \
22
if (host) {
24
uint32_t vl = env->vl; \
23
#ifdef CONFIG_USER_ONLY
25
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
24
- if (page_check_range(addr, offset, PAGE_READ)) {
26
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
25
+ if (!page_check_range(addr, offset, PAGE_READ)) {
27
uint32_t i; \
26
vl = i;
28
int a, b; \
27
goto ProbeSuccess;
29
\
28
}
30
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
31
vext_set_elem_mask(vd, i, OP(b, a)); \
32
} \
33
env->vstart = 0; \
34
+ /* mask destination register are always tail- \
35
+ * agnostic \
36
+ */ \
37
+ /* set tail elements to 1s */ \
38
+ if (vta_all_1s) { \
39
+ for (; i < total_elems; i++) { \
40
+ vext_set_elem_mask(vd, i, 1); \
41
+ } \
42
+ } \
43
}
44
45
#define DO_NAND(N, M) (!(N & M))
46
@@ -XXX,XX +XXX,XX @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
47
{
48
uint32_t vm = vext_vm(desc);
49
uint32_t vl = env->vl;
50
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen;
51
+ uint32_t vta_all_1s = vext_vta_all_1s(desc);
52
int i;
53
bool first_mask_bit = false;
54
55
@@ -XXX,XX +XXX,XX @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
56
}
57
}
58
env->vstart = 0;
59
+ /* mask destination register are always tail-agnostic */
60
+ /* set tail elements to 1s */
61
+ if (vta_all_1s) {
62
+ for (; i < total_elems; i++) {
63
+ vext_set_elem_mask(vd, i, 1);
64
+ }
65
+ }
66
}
67
68
void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
69
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
70
{ \
71
uint32_t vm = vext_vm(desc); \
72
uint32_t vl = env->vl; \
73
+ uint32_t esz = sizeof(ETYPE); \
74
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
75
+ uint32_t vta = vext_vta(desc); \
76
uint32_t sum = 0; \
77
int i; \
78
\
79
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
80
} \
81
} \
82
env->vstart = 0; \
83
+ /* set tail elements to 1s */ \
84
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
85
}
86
87
GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
88
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
89
{ \
90
uint32_t vm = vext_vm(desc); \
91
uint32_t vl = env->vl; \
92
+ uint32_t esz = sizeof(ETYPE); \
93
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
94
+ uint32_t vta = vext_vta(desc); \
95
int i; \
96
\
97
for (i = env->vstart; i < vl; i++) { \
98
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
99
*((ETYPE *)vd + H(i)) = i; \
100
} \
101
env->vstart = 0; \
102
+ /* set tail elements to 1s */ \
103
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
104
}
105
106
GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
107
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/riscv/insn_trans/trans_rvv.c.inc
110
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
111
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \
112
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
113
\
114
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
115
+ data = \
116
+ FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
117
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
118
vreg_ofs(s, a->rs1), \
119
vreg_ofs(s, a->rs2), cpu_env, \
120
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
121
\
122
data = FIELD_DP32(data, VDATA, VM, a->vm); \
123
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
124
+ data = \
125
+ FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
126
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \
127
vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \
128
cpu_env, s->cfg_ptr->vlen / 8, \
129
@@ -XXX,XX +XXX,XX @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
130
131
data = FIELD_DP32(data, VDATA, VM, a->vm);
132
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
133
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
134
static gen_helper_gvec_3_ptr * const fns[4] = {
135
gen_helper_viota_m_b, gen_helper_viota_m_h,
136
gen_helper_viota_m_w, gen_helper_viota_m_d,
137
@@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
138
139
data = FIELD_DP32(data, VDATA, VM, a->vm);
140
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
141
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
142
static gen_helper_gvec_2_ptr * const fns[4] = {
143
gen_helper_vid_v_b, gen_helper_vid_v_h,
144
gen_helper_vid_v_w, gen_helper_vid_v_d,
145
--
29
--
146
2.36.1
30
2.41.0
diff view generated by jsdifflib
New patch
1
From: Ard Biesheuvel <ardb@kernel.org>
1
2
3
The AES MixColumns and InvMixColumns operations are relatively
4
expensive 4x4 matrix multiplications in GF(2^8), which is why C
5
implementations usually rely on precomputed lookup tables rather than
6
performing the calculations on demand.
7
8
Given that we already carry those tables in QEMU, we can just grab the
9
right value in the implementation of the RISC-V AES32 instructions. Note
10
that the tables in question are permuted according to the respective
11
Sbox, so we can omit the Sbox lookup as well in this case.
12
13
Cc: Richard Henderson <richard.henderson@linaro.org>
14
Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
15
Cc: Zewen Ye <lustrew@foxmail.com>
16
Cc: Weiwei Li <liweiwei@iscas.ac.cn>
17
Cc: Junqiang Wang <wangjunqiang@iscas.ac.cn>
18
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
19
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
20
Message-ID: <20230731084043.1791984-1-ardb@kernel.org>
21
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
22
---
23
include/crypto/aes.h | 7 +++++++
24
crypto/aes.c | 4 ++--
25
target/riscv/crypto_helper.c | 34 ++++------------------------------
26
3 files changed, 13 insertions(+), 32 deletions(-)
27
28
diff --git a/include/crypto/aes.h b/include/crypto/aes.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/include/crypto/aes.h
31
+++ b/include/crypto/aes.h
32
@@ -XXX,XX +XXX,XX @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
33
extern const uint8_t AES_sbox[256];
34
extern const uint8_t AES_isbox[256];
35
36
+/*
37
+AES_Te0[x] = S [x].[02, 01, 01, 03];
38
+AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
39
+*/
40
+
41
+extern const uint32_t AES_Te0[256], AES_Td0[256];
42
+
43
#endif
44
diff --git a/crypto/aes.c b/crypto/aes.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/crypto/aes.c
47
+++ b/crypto/aes.c
48
@@ -XXX,XX +XXX,XX @@ AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
49
AES_Td4[x] = Si[x].[01, 01, 01, 01];
50
*/
51
52
-static const uint32_t AES_Te0[256] = {
53
+const uint32_t AES_Te0[256] = {
54
0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
55
0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
56
0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
57
@@ -XXX,XX +XXX,XX @@ static const uint32_t AES_Te4[256] = {
58
0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
59
};
60
61
-static const uint32_t AES_Td0[256] = {
62
+const uint32_t AES_Td0[256] = {
63
0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
64
0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
65
0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
66
diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/riscv/crypto_helper.c
69
+++ b/target/riscv/crypto_helper.c
70
@@ -XXX,XX +XXX,XX @@
71
#include "crypto/aes-round.h"
72
#include "crypto/sm4.h"
73
74
-#define AES_XTIME(a) \
75
- ((a << 1) ^ ((a & 0x80) ? 0x1b : 0))
76
-
77
-#define AES_GFMUL(a, b) (( \
78
- (((b) & 0x1) ? (a) : 0) ^ \
79
- (((b) & 0x2) ? AES_XTIME(a) : 0) ^ \
80
- (((b) & 0x4) ? AES_XTIME(AES_XTIME(a)) : 0) ^ \
81
- (((b) & 0x8) ? AES_XTIME(AES_XTIME(AES_XTIME(a))) : 0)) & 0xFF)
82
-
83
-static inline uint32_t aes_mixcolumn_byte(uint8_t x, bool fwd)
84
-{
85
- uint32_t u;
86
-
87
- if (fwd) {
88
- u = (AES_GFMUL(x, 3) << 24) | (x << 16) | (x << 8) |
89
- (AES_GFMUL(x, 2) << 0);
90
- } else {
91
- u = (AES_GFMUL(x, 0xb) << 24) | (AES_GFMUL(x, 0xd) << 16) |
92
- (AES_GFMUL(x, 0x9) << 8) | (AES_GFMUL(x, 0xe) << 0);
93
- }
94
- return u;
95
-}
96
-
97
#define sext32_xlen(x) (target_ulong)(int32_t)(x)
98
99
static inline target_ulong aes32_operation(target_ulong shamt,
100
@@ -XXX,XX +XXX,XX @@ static inline target_ulong aes32_operation(target_ulong shamt,
101
bool enc, bool mix)
102
{
103
uint8_t si = rs2 >> shamt;
104
- uint8_t so;
105
uint32_t mixed;
106
target_ulong res;
107
108
if (enc) {
109
- so = AES_sbox[si];
110
if (mix) {
111
- mixed = aes_mixcolumn_byte(so, true);
112
+ mixed = be32_to_cpu(AES_Te0[si]);
113
} else {
114
- mixed = so;
115
+ mixed = AES_sbox[si];
116
}
117
} else {
118
- so = AES_isbox[si];
119
if (mix) {
120
- mixed = aes_mixcolumn_byte(so, false);
121
+ mixed = be32_to_cpu(AES_Td0[si]);
122
} else {
123
- mixed = so;
124
+ mixed = AES_isbox[si];
125
}
126
}
127
mixed = rol32(mixed, shamt);
128
--
129
2.41.0
130
131
diff view generated by jsdifflib
1
From: eopXD <eop.chen@sifive.com>
1
From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
2
2
3
According to v-spec, tail agnostic behavior can be either kept as
3
Take some functions/macros out of `vector_helper` and put them in a new
4
undisturbed or set elements' bits to all 1s. To distinguish the
4
module called `vector_internals`. This ensures they can be used by both
5
difference of tail policies, QEMU should be able to simulate the tail
5
vector and vector-crypto helpers (latter implemented in proceeding
6
agnostic behavior as "set tail elements' bits to all 1s".
6
commits).
7
7
8
There are multiple possibility for agnostic elements according to
8
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
9
v-spec. The main intent of this patch-set tries to add option that
10
can distinguish between tail policies. Setting agnostic elements to
11
all 1s allows QEMU to express this.
12
13
This is the first commit regarding the optional tail agnostic
14
behavior. Follow-up commits will add this optional behavior
15
for all rvv instructions.
16
17
Signed-off-by: eop Chen <eop.chen@sifive.com>
18
Reviewed-by: Frank Chang <frank.chang@sifive.com>
19
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
9
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
10
Signed-off-by: Max Chou <max.chou@sifive.com>
20
Acked-by: Alistair Francis <alistair.francis@wdc.com>
11
Acked-by: Alistair Francis <alistair.francis@wdc.com>
21
Message-Id: <165449614532.19704.7000832880482980398-5@git.sr.ht>
12
Message-ID: <20230711165917.2629866-2-max.chou@sifive.com>
22
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
23
---
14
---
24
target/riscv/cpu.h | 2 +
15
target/riscv/vector_internals.h | 182 +++++++++++++++++++++++++++++
25
target/riscv/internals.h | 5 +-
16
target/riscv/vector_helper.c | 201 +-------------------------------
26
target/riscv/cpu_helper.c | 2 +
17
target/riscv/vector_internals.c | 81 +++++++++++++
27
target/riscv/translate.c | 2 +
18
target/riscv/meson.build | 1 +
28
target/riscv/vector_helper.c | 296 +++++++++++++-----------
19
4 files changed, 265 insertions(+), 200 deletions(-)
29
target/riscv/insn_trans/trans_rvv.c.inc | 3 +-
20
create mode 100644 target/riscv/vector_internals.h
30
6 files changed, 178 insertions(+), 132 deletions(-)
21
create mode 100644 target/riscv/vector_internals.c
31
22
32
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
23
diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h
33
index XXXXXXX..XXXXXXX 100644
24
new file mode 100644
34
--- a/target/riscv/cpu.h
25
index XXXXXXX..XXXXXXX
35
+++ b/target/riscv/cpu.h
26
--- /dev/null
36
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
27
+++ b/target/riscv/vector_internals.h
37
bool ext_zve32f;
38
bool ext_zve64f;
39
bool ext_zmmul;
40
+ bool rvv_ta_all_1s;
41
42
uint32_t mvendorid;
43
uint64_t marchid;
44
@@ -XXX,XX +XXX,XX @@ FIELD(TB_FLAGS, XL, 20, 2)
45
/* If PointerMasking should be applied */
46
FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1)
47
FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1)
48
+FIELD(TB_FLAGS, VTA, 24, 1)
49
50
#ifdef TARGET_RISCV32
51
#define riscv_cpu_mxl(env) ((void)(env), MXL_RV32)
52
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/target/riscv/internals.h
55
+++ b/target/riscv/internals.h
56
@@ -XXX,XX +XXX,XX @@
28
@@ -XXX,XX +XXX,XX @@
57
/* share data between vector helpers and decode code */
29
+/*
58
FIELD(VDATA, VM, 0, 1)
30
+ * RISC-V Vector Extension Internals
59
FIELD(VDATA, LMUL, 1, 3)
31
+ *
60
-FIELD(VDATA, NF, 4, 4)
32
+ * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
61
-FIELD(VDATA, WD, 4, 1)
33
+ *
62
+FIELD(VDATA, VTA, 4, 1)
34
+ * This program is free software; you can redistribute it and/or modify it
63
+FIELD(VDATA, NF, 5, 4)
35
+ * under the terms and conditions of the GNU General Public License,
64
+FIELD(VDATA, WD, 5, 1)
36
+ * version 2 or later, as published by the Free Software Foundation.
65
37
+ *
66
/* float point classify helpers */
38
+ * This program is distributed in the hope it will be useful, but WITHOUT
67
target_ulong fclass_h(uint64_t frs1);
39
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
68
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
40
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
69
index XXXXXXX..XXXXXXX 100644
41
+ * more details.
70
--- a/target/riscv/cpu_helper.c
42
+ *
71
+++ b/target/riscv/cpu_helper.c
43
+ * You should have received a copy of the GNU General Public License along with
72
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
44
+ * this program. If not, see <http://www.gnu.org/licenses/>.
73
flags = FIELD_DP32(flags, TB_FLAGS, LMUL,
45
+ */
74
FIELD_EX64(env->vtype, VTYPE, VLMUL));
46
+
75
flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
47
+#ifndef TARGET_RISCV_VECTOR_INTERNALS_H
76
+ flags = FIELD_DP32(flags, TB_FLAGS, VTA,
48
+#define TARGET_RISCV_VECTOR_INTERNALS_H
77
+ FIELD_EX64(env->vtype, VTYPE, VTA));
49
+
78
} else {
50
+#include "qemu/osdep.h"
79
flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
51
+#include "qemu/bitops.h"
80
}
52
+#include "cpu.h"
81
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
53
+#include "tcg/tcg-gvec-desc.h"
82
index XXXXXXX..XXXXXXX 100644
54
+#include "internals.h"
83
--- a/target/riscv/translate.c
55
+
84
+++ b/target/riscv/translate.c
56
+static inline uint32_t vext_nf(uint32_t desc)
85
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
57
+{
86
*/
58
+ return FIELD_EX32(simd_data(desc), VDATA, NF);
87
int8_t lmul;
59
+}
88
uint8_t sew;
60
+
89
+ uint8_t vta;
61
+/*
90
target_ulong vstart;
62
+ * Note that vector data is stored in host-endian 64-bit chunks,
91
bool vl_eq_vlmax;
63
+ * so addressing units smaller than that needs a host-endian fixup.
92
uint8_t ntemp;
64
+ */
93
@@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
65
+#if HOST_BIG_ENDIAN
94
ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL);
66
+#define H1(x) ((x) ^ 7)
95
ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
67
+#define H1_2(x) ((x) ^ 6)
96
ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3);
68
+#define H1_4(x) ((x) ^ 4)
97
+ ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s;
69
+#define H2(x) ((x) ^ 3)
98
ctx->vstart = env->vstart;
70
+#define H4(x) ((x) ^ 1)
99
ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
71
+#define H8(x) ((x))
100
ctx->misa_mxl_max = env->misa_mxl_max;
72
+#else
73
+#define H1(x) (x)
74
+#define H1_2(x) (x)
75
+#define H1_4(x) (x)
76
+#define H2(x) (x)
77
+#define H4(x) (x)
78
+#define H8(x) (x)
79
+#endif
80
+
81
+/*
82
+ * Encode LMUL to lmul as following:
83
+ * LMUL vlmul lmul
84
+ * 1 000 0
85
+ * 2 001 1
86
+ * 4 010 2
87
+ * 8 011 3
88
+ * - 100 -
89
+ * 1/8 101 -3
90
+ * 1/4 110 -2
91
+ * 1/2 111 -1
92
+ */
93
+static inline int32_t vext_lmul(uint32_t desc)
94
+{
95
+ return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
96
+}
97
+
98
+static inline uint32_t vext_vm(uint32_t desc)
99
+{
100
+ return FIELD_EX32(simd_data(desc), VDATA, VM);
101
+}
102
+
103
+static inline uint32_t vext_vma(uint32_t desc)
104
+{
105
+ return FIELD_EX32(simd_data(desc), VDATA, VMA);
106
+}
107
+
108
+static inline uint32_t vext_vta(uint32_t desc)
109
+{
110
+ return FIELD_EX32(simd_data(desc), VDATA, VTA);
111
+}
112
+
113
+static inline uint32_t vext_vta_all_1s(uint32_t desc)
114
+{
115
+ return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
116
+}
117
+
118
+/*
119
+ * Earlier designs (pre-0.9) had a varying number of bits
120
+ * per mask value (MLEN). In the 0.9 design, MLEN=1.
121
+ * (Section 4.5)
122
+ */
123
+static inline int vext_elem_mask(void *v0, int index)
124
+{
125
+ int idx = index / 64;
126
+ int pos = index % 64;
127
+ return (((uint64_t *)v0)[idx] >> pos) & 1;
128
+}
129
+
130
+/*
131
+ * Get number of total elements, including prestart, body and tail elements.
132
+ * Note that when LMUL < 1, the tail includes the elements past VLMAX that
133
+ * are held in the same vector register.
134
+ */
135
+static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
136
+ uint32_t esz)
137
+{
138
+ uint32_t vlenb = simd_maxsz(desc);
139
+ uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
140
+ int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
141
+ ctzl(esz) - ctzl(sew) + vext_lmul(desc);
142
+ return (vlenb << emul) / esz;
143
+}
144
+
145
+/* set agnostic elements to 1s */
146
+void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
147
+ uint32_t tot);
148
+
149
+/* expand macro args before macro */
150
+#define RVVCALL(macro, ...) macro(__VA_ARGS__)
151
+
152
+/* (TD, T1, T2, TX1, TX2) */
153
+#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
154
+#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
155
+#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
156
+#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
157
+
158
+/* operation of two vector elements */
159
+typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
160
+
161
+#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
162
+static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
163
+{ \
164
+ TX1 s1 = *((T1 *)vs1 + HS1(i)); \
165
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
166
+ *((TD *)vd + HD(i)) = OP(s2, s1); \
167
+}
168
+
169
+void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
170
+ CPURISCVState *env, uint32_t desc,
171
+ opivv2_fn *fn, uint32_t esz);
172
+
173
+/* generate the helpers for OPIVV */
174
+#define GEN_VEXT_VV(NAME, ESZ) \
175
+void HELPER(NAME)(void *vd, void *v0, void *vs1, \
176
+ void *vs2, CPURISCVState *env, \
177
+ uint32_t desc) \
178
+{ \
179
+ do_vext_vv(vd, v0, vs1, vs2, env, desc, \
180
+ do_##NAME, ESZ); \
181
+}
182
+
183
+typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
184
+
185
+/*
186
+ * (T1)s1 gives the real operator type.
187
+ * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
188
+ */
189
+#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
190
+static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
191
+{ \
192
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
193
+ *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
194
+}
195
+
196
+void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
197
+ CPURISCVState *env, uint32_t desc,
198
+ opivx2_fn fn, uint32_t esz);
199
+
200
+/* generate the helpers for OPIVX */
201
+#define GEN_VEXT_VX(NAME, ESZ) \
202
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
203
+ void *vs2, CPURISCVState *env, \
204
+ uint32_t desc) \
205
+{ \
206
+ do_vext_vx(vd, v0, s1, vs2, env, desc, \
207
+ do_##NAME, ESZ); \
208
+}
209
+
210
+#endif /* TARGET_RISCV_VECTOR_INTERNALS_H */
101
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
211
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
102
index XXXXXXX..XXXXXXX 100644
212
index XXXXXXX..XXXXXXX 100644
103
--- a/target/riscv/vector_helper.c
213
--- a/target/riscv/vector_helper.c
104
+++ b/target/riscv/vector_helper.c
214
+++ b/target/riscv/vector_helper.c
105
@@ -XXX,XX +XXX,XX @@ static inline int32_t vext_lmul(uint32_t desc)
215
@@ -XXX,XX +XXX,XX @@
106
return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
216
#include "fpu/softfloat.h"
217
#include "tcg/tcg-gvec-desc.h"
218
#include "internals.h"
219
+#include "vector_internals.h"
220
#include <math.h>
221
222
target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
223
@@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
224
return vl;
107
}
225
}
108
226
109
+static inline uint32_t vext_vta(uint32_t desc)
227
-/*
110
+{
228
- * Note that vector data is stored in host-endian 64-bit chunks,
111
+ return FIELD_EX32(simd_data(desc), VDATA, VTA);
229
- * so addressing units smaller than that needs a host-endian fixup.
112
+}
230
- */
113
+
231
-#if HOST_BIG_ENDIAN
232
-#define H1(x) ((x) ^ 7)
233
-#define H1_2(x) ((x) ^ 6)
234
-#define H1_4(x) ((x) ^ 4)
235
-#define H2(x) ((x) ^ 3)
236
-#define H4(x) ((x) ^ 1)
237
-#define H8(x) ((x))
238
-#else
239
-#define H1(x) (x)
240
-#define H1_2(x) (x)
241
-#define H1_4(x) (x)
242
-#define H2(x) (x)
243
-#define H4(x) (x)
244
-#define H8(x) (x)
245
-#endif
246
-
247
-static inline uint32_t vext_nf(uint32_t desc)
248
-{
249
- return FIELD_EX32(simd_data(desc), VDATA, NF);
250
-}
251
-
252
-static inline uint32_t vext_vm(uint32_t desc)
253
-{
254
- return FIELD_EX32(simd_data(desc), VDATA, VM);
255
-}
256
-
257
-/*
258
- * Encode LMUL to lmul as following:
259
- * LMUL vlmul lmul
260
- * 1 000 0
261
- * 2 001 1
262
- * 4 010 2
263
- * 8 011 3
264
- * - 100 -
265
- * 1/8 101 -3
266
- * 1/4 110 -2
267
- * 1/2 111 -1
268
- */
269
-static inline int32_t vext_lmul(uint32_t desc)
270
-{
271
- return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
272
-}
273
-
274
-static inline uint32_t vext_vta(uint32_t desc)
275
-{
276
- return FIELD_EX32(simd_data(desc), VDATA, VTA);
277
-}
278
-
279
-static inline uint32_t vext_vma(uint32_t desc)
280
-{
281
- return FIELD_EX32(simd_data(desc), VDATA, VMA);
282
-}
283
-
284
-static inline uint32_t vext_vta_all_1s(uint32_t desc)
285
-{
286
- return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
287
-}
288
-
114
/*
289
/*
115
* Get the maximum number of elements can be operated.
290
* Get the maximum number of elements can be operated.
116
*
291
*
117
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
292
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
118
return scale < 0 ? vlenb >> -scale : vlenb << scale;
293
return scale < 0 ? vlenb >> -scale : vlenb << scale;
119
}
294
}
120
295
121
+/*
296
-/*
122
+ * Get number of total elements, including prestart, body and tail elements.
297
- * Get number of total elements, including prestart, body and tail elements.
123
+ * Note that when LMUL < 1, the tail includes the elements past VLMAX that
298
- * Note that when LMUL < 1, the tail includes the elements past VLMAX that
124
+ * are held in the same vector register.
299
- * are held in the same vector register.
125
+ */
300
- */
126
+static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
301
-static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
127
+ uint32_t esz)
302
- uint32_t esz)
128
+{
303
-{
129
+ uint32_t vlenb = simd_maxsz(desc);
304
- uint32_t vlenb = simd_maxsz(desc);
130
+ uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
305
- uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
131
+ int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
306
- int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
132
+ ctzl(esz) - ctzl(sew) + vext_lmul(desc);
307
- ctzl(esz) - ctzl(sew) + vext_lmul(desc);
133
+ return (vlenb << emul) / esz;
308
- return (vlenb << emul) / esz;
134
+}
309
-}
135
+
310
-
136
static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
311
static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
137
{
312
{
138
return (addr & env->cur_pmmask) | env->cur_pmbase;
313
return (addr & ~env->cur_pmmask) | env->cur_pmbase;
139
@@ -XXX,XX +XXX,XX @@ static void probe_pages(CPURISCVState *env, target_ulong addr,
314
@@ -XXX,XX +XXX,XX @@ static void probe_pages(CPURISCVState *env, target_ulong addr,
140
}
315
}
141
}
316
}
142
317
318
-/* set agnostic elements to 1s */
319
-static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
320
- uint32_t tot)
321
-{
322
- if (is_agnostic == 0) {
323
- /* policy undisturbed */
324
- return;
325
- }
326
- if (tot - cnt == 0) {
327
- return;
328
- }
329
- memset(base + cnt, -1, tot - cnt);
330
-}
331
-
332
static inline void vext_set_elem_mask(void *v0, int index,
333
uint8_t value)
334
{
335
@@ -XXX,XX +XXX,XX @@ static inline void vext_set_elem_mask(void *v0, int index,
336
((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
337
}
338
339
-/*
340
- * Earlier designs (pre-0.9) had a varying number of bits
341
- * per mask value (MLEN). In the 0.9 design, MLEN=1.
342
- * (Section 4.5)
343
- */
344
-static inline int vext_elem_mask(void *v0, int index)
345
-{
346
- int idx = index / 64;
347
- int pos = index % 64;
348
- return (((uint64_t *)v0)[idx] >> pos) & 1;
349
-}
350
-
351
/* elements operations for load and store */
352
typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr,
353
uint32_t idx, void *vd, uintptr_t retaddr);
354
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
355
* Vector Integer Arithmetic Instructions
356
*/
357
358
-/* expand macro args before macro */
359
-#define RVVCALL(macro, ...) macro(__VA_ARGS__)
360
-
361
/* (TD, T1, T2, TX1, TX2) */
362
#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
363
#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
364
#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
365
#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
366
-#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
367
-#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
368
-#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
369
-#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
370
#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
371
#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
372
#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
373
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
374
#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
375
#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
376
377
-/* operation of two vector elements */
378
-typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
379
-
380
-#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
381
-static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
382
-{ \
383
- TX1 s1 = *((T1 *)vs1 + HS1(i)); \
384
- TX2 s2 = *((T2 *)vs2 + HS2(i)); \
385
- *((TD *)vd + HD(i)) = OP(s2, s1); \
386
-}
387
#define DO_SUB(N, M) (N - M)
388
#define DO_RSUB(N, M) (M - N)
389
390
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
391
RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
392
RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
393
394
-static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
395
- CPURISCVState *env, uint32_t desc,
396
- opivv2_fn *fn, uint32_t esz)
397
-{
398
- uint32_t vm = vext_vm(desc);
399
- uint32_t vl = env->vl;
400
- uint32_t total_elems = vext_get_total_elems(env, desc, esz);
401
- uint32_t vta = vext_vta(desc);
402
- uint32_t vma = vext_vma(desc);
403
- uint32_t i;
404
-
405
- for (i = env->vstart; i < vl; i++) {
406
- if (!vm && !vext_elem_mask(v0, i)) {
407
- /* set masked-off elements to 1s */
408
- vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
409
- continue;
410
- }
411
- fn(vd, vs1, vs2, i);
412
- }
413
- env->vstart = 0;
414
- /* set tail elements to 1s */
415
- vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
416
-}
417
-
418
-/* generate the helpers for OPIVV */
419
-#define GEN_VEXT_VV(NAME, ESZ) \
420
-void HELPER(NAME)(void *vd, void *v0, void *vs1, \
421
- void *vs2, CPURISCVState *env, \
422
- uint32_t desc) \
423
-{ \
424
- do_vext_vv(vd, v0, vs1, vs2, env, desc, \
425
- do_##NAME, ESZ); \
426
-}
427
-
428
GEN_VEXT_VV(vadd_vv_b, 1)
429
GEN_VEXT_VV(vadd_vv_h, 2)
430
GEN_VEXT_VV(vadd_vv_w, 4)
431
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VV(vsub_vv_h, 2)
432
GEN_VEXT_VV(vsub_vv_w, 4)
433
GEN_VEXT_VV(vsub_vv_d, 8)
434
435
-typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
436
-
437
-/*
438
- * (T1)s1 gives the real operator type.
439
- * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
440
- */
441
-#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
442
-static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
443
-{ \
444
- TX2 s2 = *((T2 *)vs2 + HS2(i)); \
445
- *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
446
-}
447
448
RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
449
RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
450
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
451
RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
452
RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
453
454
-static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
455
- CPURISCVState *env, uint32_t desc,
456
- opivx2_fn fn, uint32_t esz)
457
-{
458
- uint32_t vm = vext_vm(desc);
459
- uint32_t vl = env->vl;
460
- uint32_t total_elems = vext_get_total_elems(env, desc, esz);
461
- uint32_t vta = vext_vta(desc);
462
- uint32_t vma = vext_vma(desc);
463
- uint32_t i;
464
-
465
- for (i = env->vstart; i < vl; i++) {
466
- if (!vm && !vext_elem_mask(v0, i)) {
467
- /* set masked-off elements to 1s */
468
- vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
469
- continue;
470
- }
471
- fn(vd, s1, vs2, i);
472
- }
473
- env->vstart = 0;
474
- /* set tail elements to 1s */
475
- vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
476
-}
477
-
478
-/* generate the helpers for OPIVX */
479
-#define GEN_VEXT_VX(NAME, ESZ) \
480
-void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
481
- void *vs2, CPURISCVState *env, \
482
- uint32_t desc) \
483
-{ \
484
- do_vext_vx(vd, v0, s1, vs2, env, desc, \
485
- do_##NAME, ESZ); \
486
-}
487
-
488
GEN_VEXT_VX(vadd_vx_b, 1)
489
GEN_VEXT_VX(vadd_vx_h, 2)
490
GEN_VEXT_VX(vadd_vx_w, 4)
491
diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c
492
new file mode 100644
493
index XXXXXXX..XXXXXXX
494
--- /dev/null
495
+++ b/target/riscv/vector_internals.c
496
@@ -XXX,XX +XXX,XX @@
497
+/*
498
+ * RISC-V Vector Extension Internals
499
+ *
500
+ * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
501
+ *
502
+ * This program is free software; you can redistribute it and/or modify it
503
+ * under the terms and conditions of the GNU General Public License,
504
+ * version 2 or later, as published by the Free Software Foundation.
505
+ *
506
+ * This program is distributed in the hope it will be useful, but WITHOUT
507
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
508
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
509
+ * more details.
510
+ *
511
+ * You should have received a copy of the GNU General Public License along with
512
+ * this program. If not, see <http://www.gnu.org/licenses/>.
513
+ */
514
+
515
+#include "vector_internals.h"
516
+
143
+/* set agnostic elements to 1s */
517
+/* set agnostic elements to 1s */
144
+static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
518
+void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
145
+ uint32_t tot)
519
+ uint32_t tot)
146
+{
520
+{
147
+ if (is_agnostic == 0) {
521
+ if (is_agnostic == 0) {
148
+ /* policy undisturbed */
522
+ /* policy undisturbed */
149
+ return;
523
+ return;
150
+ }
524
+ }
151
+ if (tot - cnt == 0) {
525
+ if (tot - cnt == 0) {
152
+ return ;
526
+ return ;
153
+ }
527
+ }
154
+ memset(base + cnt, -1, tot - cnt);
528
+ memset(base + cnt, -1, tot - cnt);
155
+}
529
+}
156
+
530
+
157
static inline void vext_set_elem_mask(void *v0, int index,
531
+void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
158
uint8_t value)
532
+ CPURISCVState *env, uint32_t desc,
159
{
533
+ opivv2_fn *fn, uint32_t esz)
160
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
534
+{
161
535
+ uint32_t vm = vext_vm(desc);
162
static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
536
+ uint32_t vl = env->vl;
163
CPURISCVState *env, uint32_t desc,
164
- opivv2_fn *fn)
165
+ opivv2_fn *fn, uint32_t esz)
166
{
167
uint32_t vm = vext_vm(desc);
168
uint32_t vl = env->vl;
169
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
537
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
170
+ uint32_t vta = vext_vta(desc);
538
+ uint32_t vta = vext_vta(desc);
171
uint32_t i;
539
+ uint32_t vma = vext_vma(desc);
172
540
+ uint32_t i;
173
for (i = env->vstart; i < vl; i++) {
541
+
174
@@ -XXX,XX +XXX,XX @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
542
+ for (i = env->vstart; i < vl; i++) {
175
fn(vd, vs1, vs2, i);
543
+ if (!vm && !vext_elem_mask(v0, i)) {
176
}
544
+ /* set masked-off elements to 1s */
177
env->vstart = 0;
545
+ vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
546
+ continue;
547
+ }
548
+ fn(vd, vs1, vs2, i);
549
+ }
550
+ env->vstart = 0;
178
+ /* set tail elements to 1s */
551
+ /* set tail elements to 1s */
179
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
552
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
180
}
553
+}
181
554
+
182
/* generate the helpers for OPIVV */
555
+void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
183
-#define GEN_VEXT_VV(NAME) \
556
+ CPURISCVState *env, uint32_t desc,
184
+#define GEN_VEXT_VV(NAME, ESZ) \
557
+ opivx2_fn fn, uint32_t esz)
185
void HELPER(NAME)(void *vd, void *v0, void *vs1, \
558
+{
186
void *vs2, CPURISCVState *env, \
559
+ uint32_t vm = vext_vm(desc);
187
uint32_t desc) \
560
+ uint32_t vl = env->vl;
188
{ \
561
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
189
do_vext_vv(vd, v0, vs1, vs2, env, desc, \
562
+ uint32_t vta = vext_vta(desc);
190
- do_##NAME); \
563
+ uint32_t vma = vext_vma(desc);
191
+ do_##NAME, ESZ); \
564
+ uint32_t i;
192
}
565
+
193
566
+ for (i = env->vstart; i < vl; i++) {
194
-GEN_VEXT_VV(vadd_vv_b)
567
+ if (!vm && !vext_elem_mask(v0, i)) {
195
-GEN_VEXT_VV(vadd_vv_h)
568
+ /* set masked-off elements to 1s */
196
-GEN_VEXT_VV(vadd_vv_w)
569
+ vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
197
-GEN_VEXT_VV(vadd_vv_d)
570
+ continue;
198
-GEN_VEXT_VV(vsub_vv_b)
571
+ }
199
-GEN_VEXT_VV(vsub_vv_h)
572
+ fn(vd, s1, vs2, i);
200
-GEN_VEXT_VV(vsub_vv_w)
573
+ }
201
-GEN_VEXT_VV(vsub_vv_d)
574
+ env->vstart = 0;
202
+GEN_VEXT_VV(vadd_vv_b, 1)
575
+ /* set tail elements to 1s */
203
+GEN_VEXT_VV(vadd_vv_h, 2)
576
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
204
+GEN_VEXT_VV(vadd_vv_w, 4)
577
+}
205
+GEN_VEXT_VV(vadd_vv_d, 8)
578
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
206
+GEN_VEXT_VV(vsub_vv_b, 1)
207
+GEN_VEXT_VV(vsub_vv_h, 2)
208
+GEN_VEXT_VV(vsub_vv_w, 4)
209
+GEN_VEXT_VV(vsub_vv_d, 8)
210
211
typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
212
213
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
214
RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
215
RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
216
RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
217
-GEN_VEXT_VV(vwaddu_vv_b)
218
-GEN_VEXT_VV(vwaddu_vv_h)
219
-GEN_VEXT_VV(vwaddu_vv_w)
220
-GEN_VEXT_VV(vwsubu_vv_b)
221
-GEN_VEXT_VV(vwsubu_vv_h)
222
-GEN_VEXT_VV(vwsubu_vv_w)
223
-GEN_VEXT_VV(vwadd_vv_b)
224
-GEN_VEXT_VV(vwadd_vv_h)
225
-GEN_VEXT_VV(vwadd_vv_w)
226
-GEN_VEXT_VV(vwsub_vv_b)
227
-GEN_VEXT_VV(vwsub_vv_h)
228
-GEN_VEXT_VV(vwsub_vv_w)
229
-GEN_VEXT_VV(vwaddu_wv_b)
230
-GEN_VEXT_VV(vwaddu_wv_h)
231
-GEN_VEXT_VV(vwaddu_wv_w)
232
-GEN_VEXT_VV(vwsubu_wv_b)
233
-GEN_VEXT_VV(vwsubu_wv_h)
234
-GEN_VEXT_VV(vwsubu_wv_w)
235
-GEN_VEXT_VV(vwadd_wv_b)
236
-GEN_VEXT_VV(vwadd_wv_h)
237
-GEN_VEXT_VV(vwadd_wv_w)
238
-GEN_VEXT_VV(vwsub_wv_b)
239
-GEN_VEXT_VV(vwsub_wv_h)
240
-GEN_VEXT_VV(vwsub_wv_w)
241
+GEN_VEXT_VV(vwaddu_vv_b, 2)
242
+GEN_VEXT_VV(vwaddu_vv_h, 4)
243
+GEN_VEXT_VV(vwaddu_vv_w, 8)
244
+GEN_VEXT_VV(vwsubu_vv_b, 2)
245
+GEN_VEXT_VV(vwsubu_vv_h, 4)
246
+GEN_VEXT_VV(vwsubu_vv_w, 8)
247
+GEN_VEXT_VV(vwadd_vv_b, 2)
248
+GEN_VEXT_VV(vwadd_vv_h, 4)
249
+GEN_VEXT_VV(vwadd_vv_w, 8)
250
+GEN_VEXT_VV(vwsub_vv_b, 2)
251
+GEN_VEXT_VV(vwsub_vv_h, 4)
252
+GEN_VEXT_VV(vwsub_vv_w, 8)
253
+GEN_VEXT_VV(vwaddu_wv_b, 2)
254
+GEN_VEXT_VV(vwaddu_wv_h, 4)
255
+GEN_VEXT_VV(vwaddu_wv_w, 8)
256
+GEN_VEXT_VV(vwsubu_wv_b, 2)
257
+GEN_VEXT_VV(vwsubu_wv_h, 4)
258
+GEN_VEXT_VV(vwsubu_wv_w, 8)
259
+GEN_VEXT_VV(vwadd_wv_b, 2)
260
+GEN_VEXT_VV(vwadd_wv_h, 4)
261
+GEN_VEXT_VV(vwadd_wv_w, 8)
262
+GEN_VEXT_VV(vwsub_wv_b, 2)
263
+GEN_VEXT_VV(vwsub_wv_h, 4)
264
+GEN_VEXT_VV(vwsub_wv_w, 8)
265
266
RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
267
RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
268
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
269
RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
270
RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
271
RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
272
-GEN_VEXT_VV(vand_vv_b)
273
-GEN_VEXT_VV(vand_vv_h)
274
-GEN_VEXT_VV(vand_vv_w)
275
-GEN_VEXT_VV(vand_vv_d)
276
-GEN_VEXT_VV(vor_vv_b)
277
-GEN_VEXT_VV(vor_vv_h)
278
-GEN_VEXT_VV(vor_vv_w)
279
-GEN_VEXT_VV(vor_vv_d)
280
-GEN_VEXT_VV(vxor_vv_b)
281
-GEN_VEXT_VV(vxor_vv_h)
282
-GEN_VEXT_VV(vxor_vv_w)
283
-GEN_VEXT_VV(vxor_vv_d)
284
+GEN_VEXT_VV(vand_vv_b, 1)
285
+GEN_VEXT_VV(vand_vv_h, 2)
286
+GEN_VEXT_VV(vand_vv_w, 4)
287
+GEN_VEXT_VV(vand_vv_d, 8)
288
+GEN_VEXT_VV(vor_vv_b, 1)
289
+GEN_VEXT_VV(vor_vv_h, 2)
290
+GEN_VEXT_VV(vor_vv_w, 4)
291
+GEN_VEXT_VV(vor_vv_d, 8)
292
+GEN_VEXT_VV(vxor_vv_b, 1)
293
+GEN_VEXT_VV(vxor_vv_h, 2)
294
+GEN_VEXT_VV(vxor_vv_w, 4)
295
+GEN_VEXT_VV(vxor_vv_d, 8)
296
297
RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
298
RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
299
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
300
RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
301
RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
302
RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
303
-GEN_VEXT_VV(vminu_vv_b)
304
-GEN_VEXT_VV(vminu_vv_h)
305
-GEN_VEXT_VV(vminu_vv_w)
306
-GEN_VEXT_VV(vminu_vv_d)
307
-GEN_VEXT_VV(vmin_vv_b)
308
-GEN_VEXT_VV(vmin_vv_h)
309
-GEN_VEXT_VV(vmin_vv_w)
310
-GEN_VEXT_VV(vmin_vv_d)
311
-GEN_VEXT_VV(vmaxu_vv_b)
312
-GEN_VEXT_VV(vmaxu_vv_h)
313
-GEN_VEXT_VV(vmaxu_vv_w)
314
-GEN_VEXT_VV(vmaxu_vv_d)
315
-GEN_VEXT_VV(vmax_vv_b)
316
-GEN_VEXT_VV(vmax_vv_h)
317
-GEN_VEXT_VV(vmax_vv_w)
318
-GEN_VEXT_VV(vmax_vv_d)
319
+GEN_VEXT_VV(vminu_vv_b, 1)
320
+GEN_VEXT_VV(vminu_vv_h, 2)
321
+GEN_VEXT_VV(vminu_vv_w, 4)
322
+GEN_VEXT_VV(vminu_vv_d, 8)
323
+GEN_VEXT_VV(vmin_vv_b, 1)
324
+GEN_VEXT_VV(vmin_vv_h, 2)
325
+GEN_VEXT_VV(vmin_vv_w, 4)
326
+GEN_VEXT_VV(vmin_vv_d, 8)
327
+GEN_VEXT_VV(vmaxu_vv_b, 1)
328
+GEN_VEXT_VV(vmaxu_vv_h, 2)
329
+GEN_VEXT_VV(vmaxu_vv_w, 4)
330
+GEN_VEXT_VV(vmaxu_vv_d, 8)
331
+GEN_VEXT_VV(vmax_vv_b, 1)
332
+GEN_VEXT_VV(vmax_vv_h, 2)
333
+GEN_VEXT_VV(vmax_vv_w, 4)
334
+GEN_VEXT_VV(vmax_vv_d, 8)
335
336
RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
337
RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
338
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
339
RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
340
RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
341
RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
342
-GEN_VEXT_VV(vmul_vv_b)
343
-GEN_VEXT_VV(vmul_vv_h)
344
-GEN_VEXT_VV(vmul_vv_w)
345
-GEN_VEXT_VV(vmul_vv_d)
346
+GEN_VEXT_VV(vmul_vv_b, 1)
347
+GEN_VEXT_VV(vmul_vv_h, 2)
348
+GEN_VEXT_VV(vmul_vv_w, 4)
349
+GEN_VEXT_VV(vmul_vv_d, 8)
350
351
static int8_t do_mulh_b(int8_t s2, int8_t s1)
352
{
353
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
354
RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
355
RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
356
RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
357
-GEN_VEXT_VV(vmulh_vv_b)
358
-GEN_VEXT_VV(vmulh_vv_h)
359
-GEN_VEXT_VV(vmulh_vv_w)
360
-GEN_VEXT_VV(vmulh_vv_d)
361
-GEN_VEXT_VV(vmulhu_vv_b)
362
-GEN_VEXT_VV(vmulhu_vv_h)
363
-GEN_VEXT_VV(vmulhu_vv_w)
364
-GEN_VEXT_VV(vmulhu_vv_d)
365
-GEN_VEXT_VV(vmulhsu_vv_b)
366
-GEN_VEXT_VV(vmulhsu_vv_h)
367
-GEN_VEXT_VV(vmulhsu_vv_w)
368
-GEN_VEXT_VV(vmulhsu_vv_d)
369
+GEN_VEXT_VV(vmulh_vv_b, 1)
370
+GEN_VEXT_VV(vmulh_vv_h, 2)
371
+GEN_VEXT_VV(vmulh_vv_w, 4)
372
+GEN_VEXT_VV(vmulh_vv_d, 8)
373
+GEN_VEXT_VV(vmulhu_vv_b, 1)
374
+GEN_VEXT_VV(vmulhu_vv_h, 2)
375
+GEN_VEXT_VV(vmulhu_vv_w, 4)
376
+GEN_VEXT_VV(vmulhu_vv_d, 8)
377
+GEN_VEXT_VV(vmulhsu_vv_b, 1)
378
+GEN_VEXT_VV(vmulhsu_vv_h, 2)
379
+GEN_VEXT_VV(vmulhsu_vv_w, 4)
380
+GEN_VEXT_VV(vmulhsu_vv_d, 8)
381
382
RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
383
RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
384
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
385
RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
386
RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
387
RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
388
-GEN_VEXT_VV(vdivu_vv_b)
389
-GEN_VEXT_VV(vdivu_vv_h)
390
-GEN_VEXT_VV(vdivu_vv_w)
391
-GEN_VEXT_VV(vdivu_vv_d)
392
-GEN_VEXT_VV(vdiv_vv_b)
393
-GEN_VEXT_VV(vdiv_vv_h)
394
-GEN_VEXT_VV(vdiv_vv_w)
395
-GEN_VEXT_VV(vdiv_vv_d)
396
-GEN_VEXT_VV(vremu_vv_b)
397
-GEN_VEXT_VV(vremu_vv_h)
398
-GEN_VEXT_VV(vremu_vv_w)
399
-GEN_VEXT_VV(vremu_vv_d)
400
-GEN_VEXT_VV(vrem_vv_b)
401
-GEN_VEXT_VV(vrem_vv_h)
402
-GEN_VEXT_VV(vrem_vv_w)
403
-GEN_VEXT_VV(vrem_vv_d)
404
+GEN_VEXT_VV(vdivu_vv_b, 1)
405
+GEN_VEXT_VV(vdivu_vv_h, 2)
406
+GEN_VEXT_VV(vdivu_vv_w, 4)
407
+GEN_VEXT_VV(vdivu_vv_d, 8)
408
+GEN_VEXT_VV(vdiv_vv_b, 1)
409
+GEN_VEXT_VV(vdiv_vv_h, 2)
410
+GEN_VEXT_VV(vdiv_vv_w, 4)
411
+GEN_VEXT_VV(vdiv_vv_d, 8)
412
+GEN_VEXT_VV(vremu_vv_b, 1)
413
+GEN_VEXT_VV(vremu_vv_h, 2)
414
+GEN_VEXT_VV(vremu_vv_w, 4)
415
+GEN_VEXT_VV(vremu_vv_d, 8)
416
+GEN_VEXT_VV(vrem_vv_b, 1)
417
+GEN_VEXT_VV(vrem_vv_h, 2)
418
+GEN_VEXT_VV(vrem_vv_w, 4)
419
+GEN_VEXT_VV(vrem_vv_d, 8)
420
421
RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
422
RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
423
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
424
RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
425
RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
426
RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
427
-GEN_VEXT_VV(vwmul_vv_b)
428
-GEN_VEXT_VV(vwmul_vv_h)
429
-GEN_VEXT_VV(vwmul_vv_w)
430
-GEN_VEXT_VV(vwmulu_vv_b)
431
-GEN_VEXT_VV(vwmulu_vv_h)
432
-GEN_VEXT_VV(vwmulu_vv_w)
433
-GEN_VEXT_VV(vwmulsu_vv_b)
434
-GEN_VEXT_VV(vwmulsu_vv_h)
435
-GEN_VEXT_VV(vwmulsu_vv_w)
436
+GEN_VEXT_VV(vwmul_vv_b, 2)
437
+GEN_VEXT_VV(vwmul_vv_h, 4)
438
+GEN_VEXT_VV(vwmul_vv_w, 8)
439
+GEN_VEXT_VV(vwmulu_vv_b, 2)
440
+GEN_VEXT_VV(vwmulu_vv_h, 4)
441
+GEN_VEXT_VV(vwmulu_vv_w, 8)
442
+GEN_VEXT_VV(vwmulsu_vv_b, 2)
443
+GEN_VEXT_VV(vwmulsu_vv_h, 4)
444
+GEN_VEXT_VV(vwmulsu_vv_w, 8)
445
446
RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
447
RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
448
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
449
RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
450
RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
451
RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
452
-GEN_VEXT_VV(vmacc_vv_b)
453
-GEN_VEXT_VV(vmacc_vv_h)
454
-GEN_VEXT_VV(vmacc_vv_w)
455
-GEN_VEXT_VV(vmacc_vv_d)
456
-GEN_VEXT_VV(vnmsac_vv_b)
457
-GEN_VEXT_VV(vnmsac_vv_h)
458
-GEN_VEXT_VV(vnmsac_vv_w)
459
-GEN_VEXT_VV(vnmsac_vv_d)
460
-GEN_VEXT_VV(vmadd_vv_b)
461
-GEN_VEXT_VV(vmadd_vv_h)
462
-GEN_VEXT_VV(vmadd_vv_w)
463
-GEN_VEXT_VV(vmadd_vv_d)
464
-GEN_VEXT_VV(vnmsub_vv_b)
465
-GEN_VEXT_VV(vnmsub_vv_h)
466
-GEN_VEXT_VV(vnmsub_vv_w)
467
-GEN_VEXT_VV(vnmsub_vv_d)
468
+GEN_VEXT_VV(vmacc_vv_b, 1)
469
+GEN_VEXT_VV(vmacc_vv_h, 2)
470
+GEN_VEXT_VV(vmacc_vv_w, 4)
471
+GEN_VEXT_VV(vmacc_vv_d, 8)
472
+GEN_VEXT_VV(vnmsac_vv_b, 1)
473
+GEN_VEXT_VV(vnmsac_vv_h, 2)
474
+GEN_VEXT_VV(vnmsac_vv_w, 4)
475
+GEN_VEXT_VV(vnmsac_vv_d, 8)
476
+GEN_VEXT_VV(vmadd_vv_b, 1)
477
+GEN_VEXT_VV(vmadd_vv_h, 2)
478
+GEN_VEXT_VV(vmadd_vv_w, 4)
479
+GEN_VEXT_VV(vmadd_vv_d, 8)
480
+GEN_VEXT_VV(vnmsub_vv_b, 1)
481
+GEN_VEXT_VV(vnmsub_vv_h, 2)
482
+GEN_VEXT_VV(vnmsub_vv_w, 4)
483
+GEN_VEXT_VV(vnmsub_vv_d, 8)
484
485
#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
486
static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
487
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
488
RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
489
RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
490
RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
491
-GEN_VEXT_VV(vwmaccu_vv_b)
492
-GEN_VEXT_VV(vwmaccu_vv_h)
493
-GEN_VEXT_VV(vwmaccu_vv_w)
494
-GEN_VEXT_VV(vwmacc_vv_b)
495
-GEN_VEXT_VV(vwmacc_vv_h)
496
-GEN_VEXT_VV(vwmacc_vv_w)
497
-GEN_VEXT_VV(vwmaccsu_vv_b)
498
-GEN_VEXT_VV(vwmaccsu_vv_h)
499
-GEN_VEXT_VV(vwmaccsu_vv_w)
500
+GEN_VEXT_VV(vwmaccu_vv_b, 2)
501
+GEN_VEXT_VV(vwmaccu_vv_h, 4)
502
+GEN_VEXT_VV(vwmaccu_vv_w, 8)
503
+GEN_VEXT_VV(vwmacc_vv_b, 2)
504
+GEN_VEXT_VV(vwmacc_vv_h, 4)
505
+GEN_VEXT_VV(vwmacc_vv_w, 8)
506
+GEN_VEXT_VV(vwmaccsu_vv_b, 2)
507
+GEN_VEXT_VV(vwmaccsu_vv_h, 4)
508
+GEN_VEXT_VV(vwmaccsu_vv_w, 8)
509
510
RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
511
RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
512
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
513
index XXXXXXX..XXXXXXX 100644
579
index XXXXXXX..XXXXXXX 100644
514
--- a/target/riscv/insn_trans/trans_rvv.c.inc
580
--- a/target/riscv/meson.build
515
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
581
+++ b/target/riscv/meson.build
516
@@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
582
@@ -XXX,XX +XXX,XX @@ riscv_ss.add(files(
517
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
583
'gdbstub.c',
518
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
584
'op_helper.c',
519
585
'vector_helper.c',
520
- if (a->vm && s->vl_eq_vlmax) {
586
+ 'vector_internals.c',
521
+ if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
587
'bitmanip_helper.c',
522
gvec_fn(s->sew, vreg_ofs(s, a->rd),
588
'translate.c',
523
vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),
589
'm128_helper.c',
524
MAXSZ(s), MAXSZ(s));
525
@@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
526
527
data = FIELD_DP32(data, VDATA, VM, a->vm);
528
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
529
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
530
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
531
vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
532
cpu_env, s->cfg_ptr->vlen / 8,
533
--
590
--
534
2.36.1
591
2.41.0
diff view generated by jsdifflib
1
From: Alistair Francis <alistair.francis@wdc.com>
1
From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
2
2
3
When running a 32-bit guest, with a e64 vmv.v.x and vl_eq_vlmax set to
3
Refactor the non SEW-specific stuff out of `GEN_OPIVV_TRANS` into
4
true the `tcg_debug_assert(vece <= MO_32)` will be triggered inside
4
function `opivv_trans` (similar to `opivi_trans`). `opivv_trans` will be
5
tcg_gen_gvec_dup_i32().
5
used in proceeding vector-crypto commits.
6
6
7
This patch checks that condition and instead uses tcg_gen_gvec_dup_i64()
7
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
8
is required.
9
10
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1028
11
Suggested-by: Robert Bu <robert.bu@gmail.com>
12
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Message-Id: <20220608234701.369536-1-alistair.francis@opensource.wdc.com>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
10
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
11
Signed-off-by: Max Chou <max.chou@sifive.com>
12
Message-ID: <20230711165917.2629866-3-max.chou@sifive.com>
15
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
16
---
14
---
17
target/riscv/insn_trans/trans_rvv.c.inc | 12 ++++++++++--
15
target/riscv/insn_trans/trans_rvv.c.inc | 62 +++++++++++++------------
18
1 file changed, 10 insertions(+), 2 deletions(-)
16
1 file changed, 32 insertions(+), 30 deletions(-)
19
17
20
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
18
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
21
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
22
--- a/target/riscv/insn_trans/trans_rvv.c.inc
20
--- a/target/riscv/insn_trans/trans_rvv.c.inc
23
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
21
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
24
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
22
@@ -XXX,XX +XXX,XX @@ GEN_OPIWX_WIDEN_TRANS(vwadd_wx)
25
s1 = get_gpr(s, a->rs1, EXT_SIGN);
23
GEN_OPIWX_WIDEN_TRANS(vwsubu_wx)
26
24
GEN_OPIWX_WIDEN_TRANS(vwsub_wx)
27
if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
25
28
- tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd),
26
+static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm,
29
- MAXSZ(s), MAXSZ(s), s1);
27
+ gen_helper_gvec_4_ptr *fn, DisasContext *s)
30
+ if (get_xl(s) == MXL_RV32 && s->sew == MO_64) {
28
+{
31
+ TCGv_i64 s1_i64 = tcg_temp_new_i64();
29
+ uint32_t data = 0;
32
+ tcg_gen_ext_tl_i64(s1_i64, s1);
30
+ TCGLabel *over = gen_new_label();
33
+ tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd),
31
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
34
+ MAXSZ(s), MAXSZ(s), s1_i64);
32
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
35
+ tcg_temp_free_i64(s1_i64);
33
+
36
+ } else {
34
+ data = FIELD_DP32(data, VDATA, VM, vm);
37
+ tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd),
35
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
38
+ MAXSZ(s), MAXSZ(s), s1);
36
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
39
+ }
37
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
40
} else {
38
+ data = FIELD_DP32(data, VDATA, VMA, s->vma);
41
TCGv_i32 desc;
39
+ tcg_gen_gvec_4_ptr(vreg_ofs(s, vd), vreg_ofs(s, 0), vreg_ofs(s, vs1),
42
TCGv_i64 s1_i64 = tcg_temp_new_i64();
40
+ vreg_ofs(s, vs2), cpu_env, s->cfg_ptr->vlen / 8,
41
+ s->cfg_ptr->vlen / 8, data, fn);
42
+ mark_vs_dirty(s);
43
+ gen_set_label(over);
44
+ return true;
45
+}
46
+
47
/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
48
/* OPIVV without GVEC IR */
49
-#define GEN_OPIVV_TRANS(NAME, CHECK) \
50
-static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
51
-{ \
52
- if (CHECK(s, a)) { \
53
- uint32_t data = 0; \
54
- static gen_helper_gvec_4_ptr * const fns[4] = { \
55
- gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
56
- gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
57
- }; \
58
- TCGLabel *over = gen_new_label(); \
59
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
60
- tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
61
- \
62
- data = FIELD_DP32(data, VDATA, VM, a->vm); \
63
- data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
64
- data = FIELD_DP32(data, VDATA, VTA, s->vta); \
65
- data = \
66
- FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
67
- data = FIELD_DP32(data, VDATA, VMA, s->vma); \
68
- tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
69
- vreg_ofs(s, a->rs1), \
70
- vreg_ofs(s, a->rs2), cpu_env, \
71
- s->cfg_ptr->vlen / 8, \
72
- s->cfg_ptr->vlen / 8, data, \
73
- fns[s->sew]); \
74
- mark_vs_dirty(s); \
75
- gen_set_label(over); \
76
- return true; \
77
- } \
78
- return false; \
79
+#define GEN_OPIVV_TRANS(NAME, CHECK) \
80
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
81
+{ \
82
+ if (CHECK(s, a)) { \
83
+ static gen_helper_gvec_4_ptr * const fns[4] = { \
84
+ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
85
+ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
86
+ }; \
87
+ return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
88
+ } \
89
+ return false; \
90
}
91
92
/*
43
--
93
--
44
2.36.1
94
2.41.0
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
2
2
3
According to v-spec (section 5.4):
3
Remove the redundant "vl == 0" check which is already included within the vstart >= vl check, when vl == 0.
4
When vstart ≥ vl, there are no body elements, and no elements are
5
updated in any destination vector register group, including that
6
no tail elements are updated with agnostic values.
7
4
8
vmsbf.m, vmsif.m, vmsof.m, viota.m, vcompress instructions themselves
5
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
9
require vstart to be zero. So they don't need the early exit.
10
11
Signed-off-by: eop Chen <eop.chen@sifive.com>
12
Reviewed-by: Frank Chang <frank.chang@sifive.com>
13
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
6
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
7
Signed-off-by: Max Chou <max.chou@sifive.com>
14
Acked-by: Alistair Francis <alistair.francis@wdc.com>
8
Acked-by: Alistair Francis <alistair.francis@wdc.com>
15
Message-Id: <165449614532.19704.7000832880482980398-4@git.sr.ht>
9
Message-ID: <20230711165917.2629866-4-max.chou@sifive.com>
16
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
17
---
11
---
18
target/riscv/insn_trans/trans_rvv.c.inc | 27 +++++++++++++++++++++++++
12
target/riscv/insn_trans/trans_rvv.c.inc | 31 +------------------------
19
1 file changed, 27 insertions(+)
13
1 file changed, 1 insertion(+), 30 deletions(-)
20
14
21
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
15
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
22
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
23
--- a/target/riscv/insn_trans/trans_rvv.c.inc
17
--- a/target/riscv/insn_trans/trans_rvv.c.inc
24
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
18
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
25
@@ -XXX,XX +XXX,XX @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
19
@@ -XXX,XX +XXX,XX @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
26
20
TCGv_i32 desc;
27
TCGLabel *over = gen_new_label();
21
28
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
22
TCGLabel *over = gen_new_label();
29
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
23
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
30
24
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
31
dest = tcg_temp_new_ptr();
25
32
mask = tcg_temp_new_ptr();
26
dest = tcg_temp_new_ptr();
33
@@ -XXX,XX +XXX,XX @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
27
@@ -XXX,XX +XXX,XX @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
34
28
TCGv_i32 desc;
35
TCGLabel *over = gen_new_label();
29
36
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
30
TCGLabel *over = gen_new_label();
37
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
31
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
38
32
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
39
dest = tcg_temp_new_ptr();
33
40
mask = tcg_temp_new_ptr();
34
dest = tcg_temp_new_ptr();
41
@@ -XXX,XX +XXX,XX @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
35
@@ -XXX,XX +XXX,XX @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
42
36
TCGv_i32 desc;
43
TCGLabel *over = gen_new_label();
37
44
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
38
TCGLabel *over = gen_new_label();
45
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
39
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
46
40
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
47
dest = tcg_temp_new_ptr();
41
48
mask = tcg_temp_new_ptr();
42
dest = tcg_temp_new_ptr();
49
@@ -XXX,XX +XXX,XX @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data,
43
@@ -XXX,XX +XXX,XX @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data,
50
44
TCGv_i32 desc;
51
TCGLabel *over = gen_new_label();
45
52
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
46
TCGLabel *over = gen_new_label();
53
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
47
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
54
48
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
55
dest = tcg_temp_new_ptr();
49
56
mask = tcg_temp_new_ptr();
50
dest = tcg_temp_new_ptr();
57
@@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
51
@@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
52
return false;
58
}
53
}
59
54
60
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
55
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
61
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
56
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
62
57
63
if (a->vm && s->vl_eq_vlmax) {
58
if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
64
gvec_fn(s->sew, vreg_ofs(s, a->rd),
65
@@ -XXX,XX +XXX,XX @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
59
@@ -XXX,XX +XXX,XX @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
66
60
uint32_t data = 0;
67
TCGLabel *over = gen_new_label();
61
68
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
62
TCGLabel *over = gen_new_label();
69
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
63
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
70
64
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
71
dest = tcg_temp_new_ptr();
65
72
mask = tcg_temp_new_ptr();
66
dest = tcg_temp_new_ptr();
73
@@ -XXX,XX +XXX,XX @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
67
@@ -XXX,XX +XXX,XX @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
74
68
uint32_t data = 0;
75
TCGLabel *over = gen_new_label();
69
76
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
70
TCGLabel *over = gen_new_label();
77
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
71
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
78
72
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
79
dest = tcg_temp_new_ptr();
73
80
mask = tcg_temp_new_ptr();
74
dest = tcg_temp_new_ptr();
81
@@ -XXX,XX +XXX,XX @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
75
@@ -XXX,XX +XXX,XX @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
82
uint32_t data = 0;
76
if (checkfn(s, a)) {
83
TCGLabel *over = gen_new_label();
77
uint32_t data = 0;
84
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
78
TCGLabel *over = gen_new_label();
85
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
79
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
86
80
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
87
data = FIELD_DP32(data, VDATA, VM, a->vm);
81
88
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
82
data = FIELD_DP32(data, VDATA, VM, a->vm);
89
@@ -XXX,XX +XXX,XX @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
83
@@ -XXX,XX +XXX,XX @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
90
uint32_t data = 0;
84
if (opiwv_widen_check(s, a)) {
91
TCGLabel *over = gen_new_label();
85
uint32_t data = 0;
92
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
86
TCGLabel *over = gen_new_label();
93
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
87
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
94
88
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
95
data = FIELD_DP32(data, VDATA, VM, a->vm);
89
96
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
90
data = FIELD_DP32(data, VDATA, VM, a->vm);
91
@@ -XXX,XX +XXX,XX @@ static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm,
92
{
93
uint32_t data = 0;
94
TCGLabel *over = gen_new_label();
95
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
96
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
97
98
data = FIELD_DP32(data, VDATA, VM, vm);
97
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
99
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
98
}; \
100
gen_helper_##NAME##_w, \
99
TCGLabel *over = gen_new_label(); \
101
}; \
100
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
102
TCGLabel *over = gen_new_label(); \
101
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
103
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
102
\
104
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
103
data = FIELD_DP32(data, VDATA, VM, a->vm); \
105
\
104
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
106
data = FIELD_DP32(data, VDATA, VM, a->vm); \
105
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
106
}; \
107
TCGLabel *over = gen_new_label(); \
108
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
109
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
110
\
111
data = FIELD_DP32(data, VDATA, VM, a->vm); \
112
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
113
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
107
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
108
gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d,
114
};
109
};
115
TCGLabel *over = gen_new_label();
110
TCGLabel *over = gen_new_label();
116
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
111
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
117
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
112
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
118
113
119
tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
114
tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
120
cpu_env, s->cfg_ptr->vlen / 8,
121
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
115
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
116
vext_check_ss(s, a->rd, 0, 1)) {
122
TCGv s1;
117
TCGv s1;
123
TCGLabel *over = gen_new_label();
118
TCGLabel *over = gen_new_label();
124
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
119
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
125
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
120
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
126
121
127
s1 = get_gpr(s, a->rs1, EXT_SIGN);
122
s1 = get_gpr(s, a->rs1, EXT_SIGN);
128
129
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
123
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
124
gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
130
};
125
};
131
TCGLabel *over = gen_new_label();
126
TCGLabel *over = gen_new_label();
132
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
127
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
133
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
128
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
134
129
135
s1 = tcg_constant_i64(simm);
130
s1 = tcg_constant_i64(simm);
136
dest = tcg_temp_new_ptr();
137
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
131
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
132
}; \
138
TCGLabel *over = gen_new_label(); \
133
TCGLabel *over = gen_new_label(); \
139
gen_set_rm(s, RISCV_FRM_DYN); \
134
gen_set_rm(s, RISCV_FRM_DYN); \
140
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
135
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
141
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
136
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
142
\
137
\
143
data = FIELD_DP32(data, VDATA, VM, a->vm); \
138
data = FIELD_DP32(data, VDATA, VM, a->vm); \
144
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
145
@@ -XXX,XX +XXX,XX @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
139
@@ -XXX,XX +XXX,XX @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
146
140
TCGv_i64 t1;
147
TCGLabel *over = gen_new_label();
141
148
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
142
TCGLabel *over = gen_new_label();
149
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
143
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
150
144
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
151
dest = tcg_temp_new_ptr();
145
152
mask = tcg_temp_new_ptr();
146
dest = tcg_temp_new_ptr();
153
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
147
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
148
}; \
154
TCGLabel *over = gen_new_label(); \
149
TCGLabel *over = gen_new_label(); \
155
gen_set_rm(s, RISCV_FRM_DYN); \
150
gen_set_rm(s, RISCV_FRM_DYN); \
156
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
151
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
157
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\
152
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\
158
\
153
\
159
data = FIELD_DP32(data, VDATA, VM, a->vm); \
154
data = FIELD_DP32(data, VDATA, VM, a->vm); \
160
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
161
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
155
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
156
}; \
162
TCGLabel *over = gen_new_label(); \
157
TCGLabel *over = gen_new_label(); \
163
gen_set_rm(s, RISCV_FRM_DYN); \
158
gen_set_rm(s, RISCV_FRM_DYN); \
164
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
159
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
165
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
160
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
166
\
161
\
167
data = FIELD_DP32(data, VDATA, VM, a->vm); \
162
data = FIELD_DP32(data, VDATA, VM, a->vm); \
168
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
169
@@ -XXX,XX +XXX,XX @@ static bool do_opfv(DisasContext *s, arg_rmr *a,
163
@@ -XXX,XX +XXX,XX @@ static bool do_opfv(DisasContext *s, arg_rmr *a,
170
TCGLabel *over = gen_new_label();
164
uint32_t data = 0;
171
gen_set_rm(s, rm);
165
TCGLabel *over = gen_new_label();
172
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
166
gen_set_rm_chkfrm(s, rm);
173
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
167
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
174
168
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
175
data = FIELD_DP32(data, VDATA, VM, a->vm);
169
176
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
170
data = FIELD_DP32(data, VDATA, VM, a->vm);
177
@@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
171
@@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
172
gen_helper_vmv_v_x_d,
178
};
173
};
179
TCGLabel *over = gen_new_label();
174
TCGLabel *over = gen_new_label();
180
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
175
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
181
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
176
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
182
177
183
t1 = tcg_temp_new_i64();
178
t1 = tcg_temp_new_i64();
184
/* NaN-box f[rs1] */
179
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
185
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
180
}; \
186
TCGLabel *over = gen_new_label(); \
181
TCGLabel *over = gen_new_label(); \
187
gen_set_rm(s, FRM); \
182
gen_set_rm_chkfrm(s, FRM); \
188
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
183
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
189
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
184
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
190
\
185
\
191
data = FIELD_DP32(data, VDATA, VM, a->vm); \
186
data = FIELD_DP32(data, VDATA, VM, a->vm); \
187
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
188
}; \
189
TCGLabel *over = gen_new_label(); \
190
gen_set_rm(s, RISCV_FRM_DYN); \
191
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
192
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
193
\
194
data = FIELD_DP32(data, VDATA, VM, a->vm); \
195
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
196
}; \
197
TCGLabel *over = gen_new_label(); \
198
gen_set_rm_chkfrm(s, FRM); \
199
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
200
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
201
\
202
data = FIELD_DP32(data, VDATA, VM, a->vm); \
203
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
204
}; \
205
TCGLabel *over = gen_new_label(); \
206
gen_set_rm_chkfrm(s, FRM); \
207
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
208
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
209
\
210
data = FIELD_DP32(data, VDATA, VM, a->vm); \
211
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \
212
uint32_t data = 0; \
213
gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \
214
TCGLabel *over = gen_new_label(); \
215
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
216
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
217
\
192
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
218
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
193
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
194
TCGLabel *over = gen_new_label(); \
195
gen_set_rm(s, RISCV_FRM_DYN); \
196
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
197
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
198
\
199
data = FIELD_DP32(data, VDATA, VM, a->vm); \
200
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
201
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
202
TCGLabel *over = gen_new_label(); \
203
gen_set_rm(s, FRM); \
204
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
205
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
206
\
207
data = FIELD_DP32(data, VDATA, VM, a->vm); \
208
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
209
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
210
TCGLabel *over = gen_new_label(); \
211
gen_set_rm(s, FRM); \
212
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
213
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
214
\
215
data = FIELD_DP32(data, VDATA, VM, a->vm); \
216
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
217
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \
218
gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \
219
TCGLabel *over = gen_new_label(); \
220
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
221
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
222
\
223
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
224
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
225
@@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
219
@@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
226
uint32_t data = 0;
220
require_vm(a->vm, a->rd)) {
227
TCGLabel *over = gen_new_label();
221
uint32_t data = 0;
228
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
222
TCGLabel *over = gen_new_label();
229
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
223
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
230
224
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
231
data = FIELD_DP32(data, VDATA, VM, a->vm);
225
232
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
226
data = FIELD_DP32(data, VDATA, VM, a->vm);
227
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
228
TCGv s1;
229
TCGLabel *over = gen_new_label();
230
231
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
232
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
233
234
t1 = tcg_temp_new_i64();
235
@@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
236
TCGv_i64 t1;
237
TCGLabel *over = gen_new_label();
238
239
- /* if vl == 0 or vstart >= vl, skip vector register write back */
240
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
241
+ /* if vstart >= vl, skip vector register write back */
242
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
243
244
/* NaN-box f[rs1] */
233
@@ -XXX,XX +XXX,XX @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq)
245
@@ -XXX,XX +XXX,XX @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq)
246
uint32_t data = 0;
234
gen_helper_gvec_3_ptr *fn;
247
gen_helper_gvec_3_ptr *fn;
235
TCGLabel *over = gen_new_label();
248
TCGLabel *over = gen_new_label();
236
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
249
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
237
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
250
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
238
251
239
static gen_helper_gvec_3_ptr * const fns[6][4] = {
252
static gen_helper_gvec_3_ptr * const fns[6][4] = {
240
{
241
--
253
--
242
2.36.1
254
2.41.0
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
2
2
3
Destination register of unit-stride mask load and store instructions are
3
This commit adds support for the Zvbc vector-crypto extension, which
4
always written with a tail-agnostic policy.
4
consists of the following instructions:
5
5
6
A vector segment load / store instruction may contain fractional lmul
6
* vclmulh.[vx,vv]
7
with nf * lmul > 1. The rest of the elements in the last register should
7
* vclmul.[vx,vv]
8
be treated as tail elements.
8
9
9
Translation functions are defined in
10
Signed-off-by: eop Chen <eop.chen@sifive.com>
10
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
11
Reviewed-by: Frank Chang <frank.chang@sifive.com>
11
`target/riscv/vcrypto_helper.c`.
12
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
12
13
Acked-by: Alistair Francis <alistair.francis@wdc.com>
13
Co-authored-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
14
Message-Id: <165449614532.19704.7000832880482980398-6@git.sr.ht>
14
Co-authored-by: Max Chou <max.chou@sifive.com>
15
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
16
Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
17
Signed-off-by: Max Chou <max.chou@sifive.com>
18
[max.chou@sifive.com: Exposed x-zvbc property]
19
Message-ID: <20230711165917.2629866-5-max.chou@sifive.com>
15
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
20
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
16
---
21
---
17
target/riscv/translate.c | 2 +
22
target/riscv/cpu_cfg.h | 1 +
18
target/riscv/vector_helper.c | 60 +++++++++++++++++++++++++
23
target/riscv/helper.h | 6 +++
19
target/riscv/insn_trans/trans_rvv.c.inc | 6 +++
24
target/riscv/insn32.decode | 6 +++
20
3 files changed, 68 insertions(+)
25
target/riscv/cpu.c | 9 ++++
21
26
target/riscv/translate.c | 1 +
27
target/riscv/vcrypto_helper.c | 59 ++++++++++++++++++++++
28
target/riscv/insn_trans/trans_rvvk.c.inc | 62 ++++++++++++++++++++++++
29
target/riscv/meson.build | 3 +-
30
8 files changed, 146 insertions(+), 1 deletion(-)
31
create mode 100644 target/riscv/vcrypto_helper.c
32
create mode 100644 target/riscv/insn_trans/trans_rvvk.c.inc
33
34
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/target/riscv/cpu_cfg.h
37
+++ b/target/riscv/cpu_cfg.h
38
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
39
bool ext_zve32f;
40
bool ext_zve64f;
41
bool ext_zve64d;
42
+ bool ext_zvbc;
43
bool ext_zmmul;
44
bool ext_zvfbfmin;
45
bool ext_zvfbfwma;
46
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/riscv/helper.h
49
+++ b/target/riscv/helper.h
50
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfwcvtbf16_f_f_v, void, ptr, ptr, ptr, env, i32)
51
52
DEF_HELPER_6(vfwmaccbf16_vv, void, ptr, ptr, ptr, ptr, env, i32)
53
DEF_HELPER_6(vfwmaccbf16_vf, void, ptr, ptr, i64, ptr, env, i32)
54
+
55
+/* Vector crypto functions */
56
+DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
57
+DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
58
+DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32)
59
+DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32)
60
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/riscv/insn32.decode
63
+++ b/target/riscv/insn32.decode
64
@@ -XXX,XX +XXX,XX @@ vfwcvtbf16_f_f_v 010010 . ..... 01101 001 ..... 1010111 @r2_vm
65
# *** Zvfbfwma Standard Extension ***
66
vfwmaccbf16_vv 111011 . ..... ..... 001 ..... 1010111 @r_vm
67
vfwmaccbf16_vf 111011 . ..... ..... 101 ..... 1010111 @r_vm
68
+
69
+# *** Zvbc vector crypto extension ***
70
+vclmul_vv 001100 . ..... ..... 010 ..... 1010111 @r_vm
71
+vclmul_vx 001100 . ..... ..... 110 ..... 1010111 @r_vm
72
+vclmulh_vv 001101 . ..... ..... 010 ..... 1010111 @r_vm
73
+vclmulh_vx 001101 . ..... ..... 110 ..... 1010111 @r_vm
74
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/target/riscv/cpu.c
77
+++ b/target/riscv/cpu.c
78
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
79
ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed),
80
ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh),
81
ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt),
82
+ ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc),
83
ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f),
84
ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f),
85
ISA_EXT_DATA_ENTRY(zve64d, PRIV_VERSION_1_10_0, ext_zve64d),
86
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
87
return;
88
}
89
90
+ if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) {
91
+ error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions");
92
+ return;
93
+ }
94
+
95
if (cpu->cfg.ext_zk) {
96
cpu->cfg.ext_zkn = true;
97
cpu->cfg.ext_zkr = true;
98
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
99
DEFINE_PROP_BOOL("x-zvfbfmin", RISCVCPU, cfg.ext_zvfbfmin, false),
100
DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false),
101
102
+ /* Vector cryptography extensions */
103
+ DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false),
104
+
105
DEFINE_PROP_END_OF_LIST(),
106
};
107
22
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
108
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
23
index XXXXXXX..XXXXXXX 100644
109
index XXXXXXX..XXXXXXX 100644
24
--- a/target/riscv/translate.c
110
--- a/target/riscv/translate.c
25
+++ b/target/riscv/translate.c
111
+++ b/target/riscv/translate.c
26
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
112
@@ -XXX,XX +XXX,XX @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
27
int8_t lmul;
113
#include "insn_trans/trans_rvzfa.c.inc"
28
uint8_t sew;
114
#include "insn_trans/trans_rvzfh.c.inc"
29
uint8_t vta;
115
#include "insn_trans/trans_rvk.c.inc"
30
+ bool cfg_vta_all_1s;
116
+#include "insn_trans/trans_rvvk.c.inc"
31
target_ulong vstart;
117
#include "insn_trans/trans_privileged.c.inc"
32
bool vl_eq_vlmax;
118
#include "insn_trans/trans_svinval.c.inc"
33
uint8_t ntemp;
119
#include "insn_trans/trans_rvbf16.c.inc"
34
@@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
120
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
35
ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
121
new file mode 100644
36
ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3);
122
index XXXXXXX..XXXXXXX
37
ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s;
123
--- /dev/null
38
+ ctx->cfg_vta_all_1s = cpu->cfg.rvv_ta_all_1s;
124
+++ b/target/riscv/vcrypto_helper.c
39
ctx->vstart = env->vstart;
125
@@ -XXX,XX +XXX,XX @@
40
ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
126
+/*
41
ctx->misa_mxl_max = env->misa_mxl_max;
127
+ * RISC-V Vector Crypto Extension Helpers for QEMU.
42
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
128
+ *
43
index XXXXXXX..XXXXXXX 100644
129
+ * Copyright (C) 2023 SiFive, Inc.
44
--- a/target/riscv/vector_helper.c
130
+ * Written by Codethink Ltd and SiFive.
45
+++ b/target/riscv/vector_helper.c
131
+ *
46
@@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
132
+ * This program is free software; you can redistribute it and/or modify it
47
uint32_t i, k;
133
+ * under the terms and conditions of the GNU General Public License,
48
uint32_t nf = vext_nf(desc);
134
+ * version 2 or later, as published by the Free Software Foundation.
49
uint32_t max_elems = vext_max_elems(desc, log2_esz);
135
+ *
50
+ uint32_t esz = 1 << log2_esz;
136
+ * This program is distributed in the hope it will be useful, but WITHOUT
51
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
137
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
52
+ uint32_t vta = vext_vta(desc);
138
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
53
139
+ * more details.
54
for (i = env->vstart; i < env->vl; i++, env->vstart++) {
140
+ *
55
if (!vm && !vext_elem_mask(v0, i)) {
141
+ * You should have received a copy of the GNU General Public License along with
56
@@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
142
+ * this program. If not, see <http://www.gnu.org/licenses/>.
57
}
143
+ */
58
}
144
+
59
env->vstart = 0;
145
+#include "qemu/osdep.h"
60
+ /* set tail elements to 1s */
146
+#include "qemu/host-utils.h"
61
+ for (k = 0; k < nf; ++k) {
147
+#include "qemu/bitops.h"
62
+ vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
148
+#include "cpu.h"
63
+ (k * max_elems + max_elems) * esz);
149
+#include "exec/memop.h"
64
+ }
150
+#include "exec/exec-all.h"
65
+ if (nf * max_elems % total_elems != 0) {
151
+#include "exec/helper-proto.h"
66
+ uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
152
+#include "internals.h"
67
+ uint32_t registers_used =
153
+#include "vector_internals.h"
68
+ ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
154
+
69
+ vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
155
+static uint64_t clmul64(uint64_t y, uint64_t x)
70
+ registers_used * vlenb);
156
+{
71
+ }
157
+ uint64_t result = 0;
72
}
158
+ for (int j = 63; j >= 0; j--) {
73
159
+ if ((y >> j) & 1) {
74
#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
160
+ result ^= (x << j);
75
@@ -XXX,XX +XXX,XX @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
161
+ }
76
uint32_t i, k;
162
+ }
77
uint32_t nf = vext_nf(desc);
163
+ return result;
78
uint32_t max_elems = vext_max_elems(desc, log2_esz);
164
+}
79
+ uint32_t esz = 1 << log2_esz;
165
+
80
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
166
+static uint64_t clmulh64(uint64_t y, uint64_t x)
81
+ uint32_t vta = vext_vta(desc);
167
+{
82
168
+ uint64_t result = 0;
83
/* load bytes from guest memory */
169
+ for (int j = 63; j >= 1; j--) {
84
for (i = env->vstart; i < evl; i++, env->vstart++) {
170
+ if ((y >> j) & 1) {
85
@@ -XXX,XX +XXX,XX @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
171
+ result ^= (x >> (64 - j));
86
}
172
+ }
87
}
173
+ }
88
env->vstart = 0;
174
+ return result;
89
+ /* set tail elements to 1s */
175
+}
90
+ for (k = 0; k < nf; ++k) {
176
+
91
+ vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz,
177
+RVVCALL(OPIVV2, vclmul_vv, OP_UUU_D, H8, H8, H8, clmul64)
92
+ (k * max_elems + max_elems) * esz);
178
+GEN_VEXT_VV(vclmul_vv, 8)
93
+ }
179
+RVVCALL(OPIVX2, vclmul_vx, OP_UUU_D, H8, H8, clmul64)
94
+ if (nf * max_elems % total_elems != 0) {
180
+GEN_VEXT_VX(vclmul_vx, 8)
95
+ uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
181
+RVVCALL(OPIVV2, vclmulh_vv, OP_UUU_D, H8, H8, H8, clmulh64)
96
+ uint32_t registers_used =
182
+GEN_VEXT_VV(vclmulh_vv, 8)
97
+ ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
183
+RVVCALL(OPIVX2, vclmulh_vx, OP_UUU_D, H8, H8, clmulh64)
98
+ vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
184
+GEN_VEXT_VX(vclmulh_vx, 8)
99
+ registers_used * vlenb);
185
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
100
+ }
186
new file mode 100644
101
}
187
index XXXXXXX..XXXXXXX
102
188
--- /dev/null
103
/*
189
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
104
@@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
190
@@ -XXX,XX +XXX,XX @@
105
uint32_t nf = vext_nf(desc);
191
+/*
106
uint32_t vm = vext_vm(desc);
192
+ * RISC-V translation routines for the vector crypto extension.
107
uint32_t max_elems = vext_max_elems(desc, log2_esz);
193
+ *
108
+ uint32_t esz = 1 << log2_esz;
194
+ * Copyright (C) 2023 SiFive, Inc.
109
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
195
+ * Written by Codethink Ltd and SiFive.
110
+ uint32_t vta = vext_vta(desc);
196
+ *
111
197
+ * This program is free software; you can redistribute it and/or modify it
112
/* load bytes from guest memory */
198
+ * under the terms and conditions of the GNU General Public License,
113
for (i = env->vstart; i < env->vl; i++, env->vstart++) {
199
+ * version 2 or later, as published by the Free Software Foundation.
114
@@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
200
+ *
115
}
201
+ * This program is distributed in the hope it will be useful, but WITHOUT
116
}
202
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
117
env->vstart = 0;
203
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
118
+ /* set tail elements to 1s */
204
+ * more details.
119
+ for (k = 0; k < nf; ++k) {
205
+ *
120
+ vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
206
+ * You should have received a copy of the GNU General Public License along with
121
+ (k * max_elems + max_elems) * esz);
207
+ * this program. If not, see <http://www.gnu.org/licenses/>.
122
+ }
208
+ */
123
+ if (nf * max_elems % total_elems != 0) {
209
+
124
+ uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
210
+/*
125
+ uint32_t registers_used =
211
+ * Zvbc
126
+ ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
212
+ */
127
+ vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
213
+
128
+ registers_used * vlenb);
214
+#define GEN_VV_MASKED_TRANS(NAME, CHECK) \
129
+ }
215
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
130
}
216
+ { \
131
217
+ if (CHECK(s, a)) { \
132
#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
218
+ return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, \
133
@@ -XXX,XX +XXX,XX @@ vext_ldff(void *vd, void *v0, target_ulong base,
219
+ gen_helper_##NAME, s); \
134
uint32_t nf = vext_nf(desc);
220
+ } \
135
uint32_t vm = vext_vm(desc);
221
+ return false; \
136
uint32_t max_elems = vext_max_elems(desc, log2_esz);
222
+ }
137
+ uint32_t esz = 1 << log2_esz;
223
+
138
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
224
+static bool vclmul_vv_check(DisasContext *s, arg_rmrr *a)
139
+ uint32_t vta = vext_vta(desc);
225
+{
140
target_ulong addr, offset, remain;
226
+ return opivv_check(s, a) &&
141
227
+ s->cfg_ptr->ext_zvbc == true &&
142
/* probe every access*/
228
+ s->sew == MO_64;
143
@@ -XXX,XX +XXX,XX @@ ProbeSuccess:
229
+}
144
}
230
+
145
}
231
+GEN_VV_MASKED_TRANS(vclmul_vv, vclmul_vv_check)
146
env->vstart = 0;
232
+GEN_VV_MASKED_TRANS(vclmulh_vv, vclmul_vv_check)
147
+ /* set tail elements to 1s */
233
+
148
+ for (k = 0; k < nf; ++k) {
234
+#define GEN_VX_MASKED_TRANS(NAME, CHECK) \
149
+ vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
235
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
150
+ (k * max_elems + max_elems) * esz);
236
+ { \
151
+ }
237
+ if (CHECK(s, a)) { \
152
+ if (nf * max_elems % total_elems != 0) {
238
+ return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, \
153
+ uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
239
+ gen_helper_##NAME, s); \
154
+ uint32_t registers_used =
240
+ } \
155
+ ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
241
+ return false; \
156
+ vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
242
+ }
157
+ registers_used * vlenb);
243
+
158
+ }
244
+static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a)
159
}
245
+{
160
246
+ return opivx_check(s, a) &&
161
#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
247
+ s->cfg_ptr->ext_zvbc == true &&
162
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
248
+ s->sew == MO_64;
163
index XXXXXXX..XXXXXXX 100644
249
+}
164
--- a/target/riscv/insn_trans/trans_rvv.c.inc
250
+
165
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
251
+GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check)
166
@@ -XXX,XX +XXX,XX @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
252
+GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check)
167
data = FIELD_DP32(data, VDATA, VM, a->vm);
253
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
168
data = FIELD_DP32(data, VDATA, LMUL, emul);
254
index XXXXXXX..XXXXXXX 100644
169
data = FIELD_DP32(data, VDATA, NF, a->nf);
255
--- a/target/riscv/meson.build
170
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
256
+++ b/target/riscv/meson.build
171
return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
257
@@ -XXX,XX +XXX,XX @@ riscv_ss.add(files(
172
}
258
'translate.c',
173
259
'm128_helper.c',
174
@@ -XXX,XX +XXX,XX @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew)
260
'crypto_helper.c',
175
/* EMUL = 1, NFIELDS = 1 */
261
- 'zce_helper.c'
176
data = FIELD_DP32(data, VDATA, LMUL, 0);
262
+ 'zce_helper.c',
177
data = FIELD_DP32(data, VDATA, NF, 1);
263
+ 'vcrypto_helper.c'
178
+ /* Mask destination register are always tail-agnostic */
264
))
179
+ data = FIELD_DP32(data, VDATA, VTA, s->cfg_vta_all_1s);
265
riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: files('kvm-stub.c'))
180
return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
181
}
182
183
@@ -XXX,XX +XXX,XX @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
184
data = FIELD_DP32(data, VDATA, VM, a->vm);
185
data = FIELD_DP32(data, VDATA, LMUL, emul);
186
data = FIELD_DP32(data, VDATA, NF, a->nf);
187
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
188
return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
189
}
190
191
@@ -XXX,XX +XXX,XX @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
192
data = FIELD_DP32(data, VDATA, VM, a->vm);
193
data = FIELD_DP32(data, VDATA, LMUL, emul);
194
data = FIELD_DP32(data, VDATA, NF, a->nf);
195
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
196
return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
197
}
198
199
@@ -XXX,XX +XXX,XX @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
200
data = FIELD_DP32(data, VDATA, VM, a->vm);
201
data = FIELD_DP32(data, VDATA, LMUL, emul);
202
data = FIELD_DP32(data, VDATA, NF, a->nf);
203
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
204
return ldff_trans(a->rd, a->rs1, data, fn, s);
205
}
206
266
207
--
267
--
208
2.36.1
268
2.41.0
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
2
2
3
Signed-off-by: eop Chen <eop.chen@sifive.com>
3
Move the checks out of `do_opiv{v,x,i}_gvec{,_shift}` functions
4
Reviewed-by: Frank Chang <frank.chang@sifive.com>
4
and into the corresponding macros. This enables the functions to be
5
reused in proceeding commits without check duplication.
6
7
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
9
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
6
Acked-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Max Chou <max.chou@sifive.com>
7
Message-Id: <165449614532.19704.7000832880482980398-8@git.sr.ht>
11
Message-ID: <20230711165917.2629866-6-max.chou@sifive.com>
8
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
9
---
13
---
10
target/riscv/vector_helper.c | 11 +++++++++++
14
target/riscv/insn_trans/trans_rvv.c.inc | 28 +++++++++++--------------
11
target/riscv/insn_trans/trans_rvv.c.inc | 3 ++-
15
1 file changed, 12 insertions(+), 16 deletions(-)
12
2 files changed, 13 insertions(+), 1 deletion(-)
13
16
14
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/riscv/vector_helper.c
17
+++ b/target/riscv/vector_helper.c
18
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
19
{ \
20
uint32_t vm = vext_vm(desc); \
21
uint32_t vl = env->vl; \
22
+ uint32_t esz = sizeof(TS1); \
23
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
24
+ uint32_t vta = vext_vta(desc); \
25
uint32_t i; \
26
\
27
for (i = env->vstart; i < vl; i++) { \
28
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
29
*((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
30
} \
31
env->vstart = 0; \
32
+ /* set tail elements to 1s */ \
33
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
34
}
35
36
GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
37
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
38
{ \
39
uint32_t vm = vext_vm(desc); \
40
uint32_t vl = env->vl; \
41
+ uint32_t esz = sizeof(TD); \
42
+ uint32_t total_elems = \
43
+ vext_get_total_elems(env, desc, esz); \
44
+ uint32_t vta = vext_vta(desc); \
45
uint32_t i; \
46
\
47
for (i = env->vstart; i < vl; i++) { \
48
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
49
*((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
50
} \
51
env->vstart = 0; \
52
+ /* set tail elements to 1s */ \
53
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
54
}
55
56
GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
57
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
17
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
58
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
59
--- a/target/riscv/insn_trans/trans_rvv.c.inc
19
--- a/target/riscv/insn_trans/trans_rvv.c.inc
60
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
20
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
61
@@ -XXX,XX +XXX,XX @@ do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn,
21
@@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
62
return false;
22
gen_helper_gvec_4_ptr *fn)
63
}
23
{
64
24
TCGLabel *over = gen_new_label();
65
- if (a->vm && s->vl_eq_vlmax) {
25
- if (!opivv_check(s, a)) {
66
+ if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
26
- return false;
27
- }
28
29
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
30
31
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
32
gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
33
gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
34
}; \
35
+ if (!opivv_check(s, a)) { \
36
+ return false; \
37
+ } \
38
return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
39
}
40
41
@@ -XXX,XX +XXX,XX @@ static inline bool
42
do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn,
43
gen_helper_opivx *fn)
44
{
45
- if (!opivx_check(s, a)) {
46
- return false;
47
- }
48
-
49
if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
50
TCGv_i64 src1 = tcg_temp_new_i64();
51
52
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
53
gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
54
gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
55
}; \
56
+ if (!opivx_check(s, a)) { \
57
+ return false; \
58
+ } \
59
return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
60
}
61
62
@@ -XXX,XX +XXX,XX @@ static inline bool
63
do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn,
64
gen_helper_opivx *fn, imm_mode_t imm_mode)
65
{
66
- if (!opivx_check(s, a)) {
67
- return false;
68
- }
69
-
70
if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
71
gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
72
extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s));
73
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
74
gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \
75
gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \
76
}; \
77
+ if (!opivx_check(s, a)) { \
78
+ return false; \
79
+ } \
80
return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \
81
fns[s->sew], IMM_MODE); \
82
}
83
@@ -XXX,XX +XXX,XX @@ static inline bool
84
do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn,
85
gen_helper_opivx *fn)
86
{
87
- if (!opivx_check(s, a)) {
88
- return false;
89
- }
90
-
91
if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
67
TCGv_i32 src1 = tcg_temp_new_i32();
92
TCGv_i32 src1 = tcg_temp_new_i32();
68
93
69
tcg_gen_trunc_tl_i32(src1, get_gpr(s, a->rs1, EXT_NONE));
94
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
70
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
95
gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
71
\
96
gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
72
data = FIELD_DP32(data, VDATA, VM, a->vm); \
97
}; \
73
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
98
- \
74
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
99
+ if (!opivx_check(s, a)) { \
75
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
100
+ return false; \
76
vreg_ofs(s, a->rs1), \
101
+ } \
77
vreg_ofs(s, a->rs2), cpu_env, \
102
return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
103
}
104
78
--
105
--
79
2.36.1
106
2.41.0
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: Dickon Hood <dickon.hood@codethink.co.uk>
2
2
3
No functional change intended in this commit.
3
Zvbb (implemented in later commit) has a widening instruction, which
4
requires an extra check on the enabled extensions. Refactor
5
GEN_OPIVX_WIDEN_TRANS() to take a check function to avoid reimplementing
6
it.
4
7
5
Signed-off-by: eop Chen <eop.chen@sifive.com>
8
Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
6
Reviewed-by: Frank Chang <frank.chang@sifive.com>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
10
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
11
Signed-off-by: Max Chou <max.chou@sifive.com>
9
Message-Id: <165449614532.19704.7000832880482980398-1@git.sr.ht>
12
Message-ID: <20230711165917.2629866-7-max.chou@sifive.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
---
14
---
12
target/riscv/vector_helper.c | 1132 +++++++++++++++++-----------------
15
target/riscv/insn_trans/trans_rvv.c.inc | 52 +++++++++++--------------
13
1 file changed, 565 insertions(+), 567 deletions(-)
16
1 file changed, 23 insertions(+), 29 deletions(-)
14
17
15
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
18
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
16
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/vector_helper.c
20
--- a/target/riscv/insn_trans/trans_rvv.c.inc
18
+++ b/target/riscv/vector_helper.c
21
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
19
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
22
@@ -XXX,XX +XXX,XX @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr *a)
20
23
vext_check_ds(s, a->rd, a->rs2, a->vm);
21
static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
22
CPURISCVState *env, uint32_t desc,
23
- uint32_t esz, uint32_t dsz,
24
opivv2_fn *fn)
25
{
26
uint32_t vm = vext_vm(desc);
27
@@ -XXX,XX +XXX,XX @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
28
}
24
}
29
25
30
/* generate the helpers for OPIVV */
26
-static bool do_opivx_widen(DisasContext *s, arg_rmrr *a,
31
-#define GEN_VEXT_VV(NAME, ESZ, DSZ) \
27
- gen_helper_opivx *fn)
32
+#define GEN_VEXT_VV(NAME) \
28
-{
33
void HELPER(NAME)(void *vd, void *v0, void *vs1, \
29
- if (opivx_widen_check(s, a)) {
34
void *vs2, CPURISCVState *env, \
30
- return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
35
uint32_t desc) \
31
- }
36
{ \
32
- return false;
37
- do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
33
-}
38
+ do_vext_vv(vd, v0, vs1, vs2, env, desc, \
34
-
39
do_##NAME); \
35
-#define GEN_OPIVX_WIDEN_TRANS(NAME) \
36
-static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
37
-{ \
38
- static gen_helper_opivx * const fns[3] = { \
39
- gen_helper_##NAME##_b, \
40
- gen_helper_##NAME##_h, \
41
- gen_helper_##NAME##_w \
42
- }; \
43
- return do_opivx_widen(s, a, fns[s->sew]); \
44
+#define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \
45
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
46
+{ \
47
+ if (CHECK(s, a)) { \
48
+ static gen_helper_opivx * const fns[3] = { \
49
+ gen_helper_##NAME##_b, \
50
+ gen_helper_##NAME##_h, \
51
+ gen_helper_##NAME##_w \
52
+ }; \
53
+ return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); \
54
+ } \
55
+ return false; \
40
}
56
}
41
57
42
-GEN_VEXT_VV(vadd_vv_b, 1, 1)
58
-GEN_OPIVX_WIDEN_TRANS(vwaddu_vx)
43
-GEN_VEXT_VV(vadd_vv_h, 2, 2)
59
-GEN_OPIVX_WIDEN_TRANS(vwadd_vx)
44
-GEN_VEXT_VV(vadd_vv_w, 4, 4)
60
-GEN_OPIVX_WIDEN_TRANS(vwsubu_vx)
45
-GEN_VEXT_VV(vadd_vv_d, 8, 8)
61
-GEN_OPIVX_WIDEN_TRANS(vwsub_vx)
46
-GEN_VEXT_VV(vsub_vv_b, 1, 1)
62
+GEN_OPIVX_WIDEN_TRANS(vwaddu_vx, opivx_widen_check)
47
-GEN_VEXT_VV(vsub_vv_h, 2, 2)
63
+GEN_OPIVX_WIDEN_TRANS(vwadd_vx, opivx_widen_check)
48
-GEN_VEXT_VV(vsub_vv_w, 4, 4)
64
+GEN_OPIVX_WIDEN_TRANS(vwsubu_vx, opivx_widen_check)
49
-GEN_VEXT_VV(vsub_vv_d, 8, 8)
65
+GEN_OPIVX_WIDEN_TRANS(vwsub_vx, opivx_widen_check)
50
+GEN_VEXT_VV(vadd_vv_b)
66
51
+GEN_VEXT_VV(vadd_vv_h)
67
/* WIDEN OPIVV with WIDEN */
52
+GEN_VEXT_VV(vadd_vv_w)
68
static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a)
53
+GEN_VEXT_VV(vadd_vv_d)
69
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vrem_vx, opivx_check)
54
+GEN_VEXT_VV(vsub_vv_b)
70
GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check)
55
+GEN_VEXT_VV(vsub_vv_h)
71
GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check)
56
+GEN_VEXT_VV(vsub_vv_w)
72
GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check)
57
+GEN_VEXT_VV(vsub_vv_d)
73
-GEN_OPIVX_WIDEN_TRANS(vwmul_vx)
58
74
-GEN_OPIVX_WIDEN_TRANS(vwmulu_vx)
59
typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
75
-GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx)
60
76
+GEN_OPIVX_WIDEN_TRANS(vwmul_vx, opivx_widen_check)
61
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
77
+GEN_OPIVX_WIDEN_TRANS(vwmulu_vx, opivx_widen_check)
62
78
+GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx, opivx_widen_check)
63
static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
64
CPURISCVState *env, uint32_t desc,
65
- uint32_t esz, uint32_t dsz,
66
opivx2_fn fn)
67
{
68
uint32_t vm = vext_vm(desc);
69
@@ -XXX,XX +XXX,XX @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
70
}
71
72
/* generate the helpers for OPIVX */
73
-#define GEN_VEXT_VX(NAME, ESZ, DSZ) \
74
+#define GEN_VEXT_VX(NAME) \
75
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
76
void *vs2, CPURISCVState *env, \
77
uint32_t desc) \
78
{ \
79
- do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
80
+ do_vext_vx(vd, v0, s1, vs2, env, desc, \
81
do_##NAME); \
82
}
83
84
-GEN_VEXT_VX(vadd_vx_b, 1, 1)
85
-GEN_VEXT_VX(vadd_vx_h, 2, 2)
86
-GEN_VEXT_VX(vadd_vx_w, 4, 4)
87
-GEN_VEXT_VX(vadd_vx_d, 8, 8)
88
-GEN_VEXT_VX(vsub_vx_b, 1, 1)
89
-GEN_VEXT_VX(vsub_vx_h, 2, 2)
90
-GEN_VEXT_VX(vsub_vx_w, 4, 4)
91
-GEN_VEXT_VX(vsub_vx_d, 8, 8)
92
-GEN_VEXT_VX(vrsub_vx_b, 1, 1)
93
-GEN_VEXT_VX(vrsub_vx_h, 2, 2)
94
-GEN_VEXT_VX(vrsub_vx_w, 4, 4)
95
-GEN_VEXT_VX(vrsub_vx_d, 8, 8)
96
+GEN_VEXT_VX(vadd_vx_b)
97
+GEN_VEXT_VX(vadd_vx_h)
98
+GEN_VEXT_VX(vadd_vx_w)
99
+GEN_VEXT_VX(vadd_vx_d)
100
+GEN_VEXT_VX(vsub_vx_b)
101
+GEN_VEXT_VX(vsub_vx_h)
102
+GEN_VEXT_VX(vsub_vx_w)
103
+GEN_VEXT_VX(vsub_vx_d)
104
+GEN_VEXT_VX(vrsub_vx_b)
105
+GEN_VEXT_VX(vrsub_vx_h)
106
+GEN_VEXT_VX(vrsub_vx_w)
107
+GEN_VEXT_VX(vrsub_vx_d)
108
109
void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
110
{
111
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
112
RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
113
RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
114
RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
115
-GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
116
-GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
117
-GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
118
-GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
119
-GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
120
-GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
121
-GEN_VEXT_VV(vwadd_vv_b, 1, 2)
122
-GEN_VEXT_VV(vwadd_vv_h, 2, 4)
123
-GEN_VEXT_VV(vwadd_vv_w, 4, 8)
124
-GEN_VEXT_VV(vwsub_vv_b, 1, 2)
125
-GEN_VEXT_VV(vwsub_vv_h, 2, 4)
126
-GEN_VEXT_VV(vwsub_vv_w, 4, 8)
127
-GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
128
-GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
129
-GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
130
-GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
131
-GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
132
-GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
133
-GEN_VEXT_VV(vwadd_wv_b, 1, 2)
134
-GEN_VEXT_VV(vwadd_wv_h, 2, 4)
135
-GEN_VEXT_VV(vwadd_wv_w, 4, 8)
136
-GEN_VEXT_VV(vwsub_wv_b, 1, 2)
137
-GEN_VEXT_VV(vwsub_wv_h, 2, 4)
138
-GEN_VEXT_VV(vwsub_wv_w, 4, 8)
139
+GEN_VEXT_VV(vwaddu_vv_b)
140
+GEN_VEXT_VV(vwaddu_vv_h)
141
+GEN_VEXT_VV(vwaddu_vv_w)
142
+GEN_VEXT_VV(vwsubu_vv_b)
143
+GEN_VEXT_VV(vwsubu_vv_h)
144
+GEN_VEXT_VV(vwsubu_vv_w)
145
+GEN_VEXT_VV(vwadd_vv_b)
146
+GEN_VEXT_VV(vwadd_vv_h)
147
+GEN_VEXT_VV(vwadd_vv_w)
148
+GEN_VEXT_VV(vwsub_vv_b)
149
+GEN_VEXT_VV(vwsub_vv_h)
150
+GEN_VEXT_VV(vwsub_vv_w)
151
+GEN_VEXT_VV(vwaddu_wv_b)
152
+GEN_VEXT_VV(vwaddu_wv_h)
153
+GEN_VEXT_VV(vwaddu_wv_w)
154
+GEN_VEXT_VV(vwsubu_wv_b)
155
+GEN_VEXT_VV(vwsubu_wv_h)
156
+GEN_VEXT_VV(vwsubu_wv_w)
157
+GEN_VEXT_VV(vwadd_wv_b)
158
+GEN_VEXT_VV(vwadd_wv_h)
159
+GEN_VEXT_VV(vwadd_wv_w)
160
+GEN_VEXT_VV(vwsub_wv_b)
161
+GEN_VEXT_VV(vwsub_wv_h)
162
+GEN_VEXT_VV(vwsub_wv_w)
163
164
RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
165
RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
166
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
167
RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
168
RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
169
RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
170
-GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
171
-GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
172
-GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
173
-GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
174
-GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
175
-GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
176
-GEN_VEXT_VX(vwadd_vx_b, 1, 2)
177
-GEN_VEXT_VX(vwadd_vx_h, 2, 4)
178
-GEN_VEXT_VX(vwadd_vx_w, 4, 8)
179
-GEN_VEXT_VX(vwsub_vx_b, 1, 2)
180
-GEN_VEXT_VX(vwsub_vx_h, 2, 4)
181
-GEN_VEXT_VX(vwsub_vx_w, 4, 8)
182
-GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
183
-GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
184
-GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
185
-GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
186
-GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
187
-GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
188
-GEN_VEXT_VX(vwadd_wx_b, 1, 2)
189
-GEN_VEXT_VX(vwadd_wx_h, 2, 4)
190
-GEN_VEXT_VX(vwadd_wx_w, 4, 8)
191
-GEN_VEXT_VX(vwsub_wx_b, 1, 2)
192
-GEN_VEXT_VX(vwsub_wx_h, 2, 4)
193
-GEN_VEXT_VX(vwsub_wx_w, 4, 8)
194
+GEN_VEXT_VX(vwaddu_vx_b)
195
+GEN_VEXT_VX(vwaddu_vx_h)
196
+GEN_VEXT_VX(vwaddu_vx_w)
197
+GEN_VEXT_VX(vwsubu_vx_b)
198
+GEN_VEXT_VX(vwsubu_vx_h)
199
+GEN_VEXT_VX(vwsubu_vx_w)
200
+GEN_VEXT_VX(vwadd_vx_b)
201
+GEN_VEXT_VX(vwadd_vx_h)
202
+GEN_VEXT_VX(vwadd_vx_w)
203
+GEN_VEXT_VX(vwsub_vx_b)
204
+GEN_VEXT_VX(vwsub_vx_h)
205
+GEN_VEXT_VX(vwsub_vx_w)
206
+GEN_VEXT_VX(vwaddu_wx_b)
207
+GEN_VEXT_VX(vwaddu_wx_h)
208
+GEN_VEXT_VX(vwaddu_wx_w)
209
+GEN_VEXT_VX(vwsubu_wx_b)
210
+GEN_VEXT_VX(vwsubu_wx_h)
211
+GEN_VEXT_VX(vwsubu_wx_w)
212
+GEN_VEXT_VX(vwadd_wx_b)
213
+GEN_VEXT_VX(vwadd_wx_h)
214
+GEN_VEXT_VX(vwadd_wx_w)
215
+GEN_VEXT_VX(vwsub_wx_b)
216
+GEN_VEXT_VX(vwsub_wx_h)
217
+GEN_VEXT_VX(vwsub_wx_w)
218
219
/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
220
#define DO_VADC(N, M, C) (N + M + C)
221
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
222
RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
223
RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
224
RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
225
-GEN_VEXT_VV(vand_vv_b, 1, 1)
226
-GEN_VEXT_VV(vand_vv_h, 2, 2)
227
-GEN_VEXT_VV(vand_vv_w, 4, 4)
228
-GEN_VEXT_VV(vand_vv_d, 8, 8)
229
-GEN_VEXT_VV(vor_vv_b, 1, 1)
230
-GEN_VEXT_VV(vor_vv_h, 2, 2)
231
-GEN_VEXT_VV(vor_vv_w, 4, 4)
232
-GEN_VEXT_VV(vor_vv_d, 8, 8)
233
-GEN_VEXT_VV(vxor_vv_b, 1, 1)
234
-GEN_VEXT_VV(vxor_vv_h, 2, 2)
235
-GEN_VEXT_VV(vxor_vv_w, 4, 4)
236
-GEN_VEXT_VV(vxor_vv_d, 8, 8)
237
+GEN_VEXT_VV(vand_vv_b)
238
+GEN_VEXT_VV(vand_vv_h)
239
+GEN_VEXT_VV(vand_vv_w)
240
+GEN_VEXT_VV(vand_vv_d)
241
+GEN_VEXT_VV(vor_vv_b)
242
+GEN_VEXT_VV(vor_vv_h)
243
+GEN_VEXT_VV(vor_vv_w)
244
+GEN_VEXT_VV(vor_vv_d)
245
+GEN_VEXT_VV(vxor_vv_b)
246
+GEN_VEXT_VV(vxor_vv_h)
247
+GEN_VEXT_VV(vxor_vv_w)
248
+GEN_VEXT_VV(vxor_vv_d)
249
250
RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
251
RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
252
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
253
RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
254
RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
255
RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
256
-GEN_VEXT_VX(vand_vx_b, 1, 1)
257
-GEN_VEXT_VX(vand_vx_h, 2, 2)
258
-GEN_VEXT_VX(vand_vx_w, 4, 4)
259
-GEN_VEXT_VX(vand_vx_d, 8, 8)
260
-GEN_VEXT_VX(vor_vx_b, 1, 1)
261
-GEN_VEXT_VX(vor_vx_h, 2, 2)
262
-GEN_VEXT_VX(vor_vx_w, 4, 4)
263
-GEN_VEXT_VX(vor_vx_d, 8, 8)
264
-GEN_VEXT_VX(vxor_vx_b, 1, 1)
265
-GEN_VEXT_VX(vxor_vx_h, 2, 2)
266
-GEN_VEXT_VX(vxor_vx_w, 4, 4)
267
-GEN_VEXT_VX(vxor_vx_d, 8, 8)
268
+GEN_VEXT_VX(vand_vx_b)
269
+GEN_VEXT_VX(vand_vx_h)
270
+GEN_VEXT_VX(vand_vx_w)
271
+GEN_VEXT_VX(vand_vx_d)
272
+GEN_VEXT_VX(vor_vx_b)
273
+GEN_VEXT_VX(vor_vx_h)
274
+GEN_VEXT_VX(vor_vx_w)
275
+GEN_VEXT_VX(vor_vx_d)
276
+GEN_VEXT_VX(vxor_vx_b)
277
+GEN_VEXT_VX(vxor_vx_h)
278
+GEN_VEXT_VX(vxor_vx_w)
279
+GEN_VEXT_VX(vxor_vx_d)
280
281
/* Vector Single-Width Bit Shift Instructions */
282
#define DO_SLL(N, M) (N << (M))
283
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
284
RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
285
RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
286
RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
287
-GEN_VEXT_VV(vminu_vv_b, 1, 1)
288
-GEN_VEXT_VV(vminu_vv_h, 2, 2)
289
-GEN_VEXT_VV(vminu_vv_w, 4, 4)
290
-GEN_VEXT_VV(vminu_vv_d, 8, 8)
291
-GEN_VEXT_VV(vmin_vv_b, 1, 1)
292
-GEN_VEXT_VV(vmin_vv_h, 2, 2)
293
-GEN_VEXT_VV(vmin_vv_w, 4, 4)
294
-GEN_VEXT_VV(vmin_vv_d, 8, 8)
295
-GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
296
-GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
297
-GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
298
-GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
299
-GEN_VEXT_VV(vmax_vv_b, 1, 1)
300
-GEN_VEXT_VV(vmax_vv_h, 2, 2)
301
-GEN_VEXT_VV(vmax_vv_w, 4, 4)
302
-GEN_VEXT_VV(vmax_vv_d, 8, 8)
303
+GEN_VEXT_VV(vminu_vv_b)
304
+GEN_VEXT_VV(vminu_vv_h)
305
+GEN_VEXT_VV(vminu_vv_w)
306
+GEN_VEXT_VV(vminu_vv_d)
307
+GEN_VEXT_VV(vmin_vv_b)
308
+GEN_VEXT_VV(vmin_vv_h)
309
+GEN_VEXT_VV(vmin_vv_w)
310
+GEN_VEXT_VV(vmin_vv_d)
311
+GEN_VEXT_VV(vmaxu_vv_b)
312
+GEN_VEXT_VV(vmaxu_vv_h)
313
+GEN_VEXT_VV(vmaxu_vv_w)
314
+GEN_VEXT_VV(vmaxu_vv_d)
315
+GEN_VEXT_VV(vmax_vv_b)
316
+GEN_VEXT_VV(vmax_vv_h)
317
+GEN_VEXT_VV(vmax_vv_w)
318
+GEN_VEXT_VV(vmax_vv_d)
319
320
RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
321
RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
322
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
323
RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
324
RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
325
RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
326
-GEN_VEXT_VX(vminu_vx_b, 1, 1)
327
-GEN_VEXT_VX(vminu_vx_h, 2, 2)
328
-GEN_VEXT_VX(vminu_vx_w, 4, 4)
329
-GEN_VEXT_VX(vminu_vx_d, 8, 8)
330
-GEN_VEXT_VX(vmin_vx_b, 1, 1)
331
-GEN_VEXT_VX(vmin_vx_h, 2, 2)
332
-GEN_VEXT_VX(vmin_vx_w, 4, 4)
333
-GEN_VEXT_VX(vmin_vx_d, 8, 8)
334
-GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
335
-GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
336
-GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
337
-GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
338
-GEN_VEXT_VX(vmax_vx_b, 1, 1)
339
-GEN_VEXT_VX(vmax_vx_h, 2, 2)
340
-GEN_VEXT_VX(vmax_vx_w, 4, 4)
341
-GEN_VEXT_VX(vmax_vx_d, 8, 8)
342
+GEN_VEXT_VX(vminu_vx_b)
343
+GEN_VEXT_VX(vminu_vx_h)
344
+GEN_VEXT_VX(vminu_vx_w)
345
+GEN_VEXT_VX(vminu_vx_d)
346
+GEN_VEXT_VX(vmin_vx_b)
347
+GEN_VEXT_VX(vmin_vx_h)
348
+GEN_VEXT_VX(vmin_vx_w)
349
+GEN_VEXT_VX(vmin_vx_d)
350
+GEN_VEXT_VX(vmaxu_vx_b)
351
+GEN_VEXT_VX(vmaxu_vx_h)
352
+GEN_VEXT_VX(vmaxu_vx_w)
353
+GEN_VEXT_VX(vmaxu_vx_d)
354
+GEN_VEXT_VX(vmax_vx_b)
355
+GEN_VEXT_VX(vmax_vx_h)
356
+GEN_VEXT_VX(vmax_vx_w)
357
+GEN_VEXT_VX(vmax_vx_d)
358
359
/* Vector Single-Width Integer Multiply Instructions */
360
#define DO_MUL(N, M) (N * M)
361
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
362
RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
363
RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
364
RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
365
-GEN_VEXT_VV(vmul_vv_b, 1, 1)
366
-GEN_VEXT_VV(vmul_vv_h, 2, 2)
367
-GEN_VEXT_VV(vmul_vv_w, 4, 4)
368
-GEN_VEXT_VV(vmul_vv_d, 8, 8)
369
+GEN_VEXT_VV(vmul_vv_b)
370
+GEN_VEXT_VV(vmul_vv_h)
371
+GEN_VEXT_VV(vmul_vv_w)
372
+GEN_VEXT_VV(vmul_vv_d)
373
374
static int8_t do_mulh_b(int8_t s2, int8_t s1)
375
{
376
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
377
RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
378
RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
379
RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
380
-GEN_VEXT_VV(vmulh_vv_b, 1, 1)
381
-GEN_VEXT_VV(vmulh_vv_h, 2, 2)
382
-GEN_VEXT_VV(vmulh_vv_w, 4, 4)
383
-GEN_VEXT_VV(vmulh_vv_d, 8, 8)
384
-GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
385
-GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
386
-GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
387
-GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
388
-GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
389
-GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
390
-GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
391
-GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
392
+GEN_VEXT_VV(vmulh_vv_b)
393
+GEN_VEXT_VV(vmulh_vv_h)
394
+GEN_VEXT_VV(vmulh_vv_w)
395
+GEN_VEXT_VV(vmulh_vv_d)
396
+GEN_VEXT_VV(vmulhu_vv_b)
397
+GEN_VEXT_VV(vmulhu_vv_h)
398
+GEN_VEXT_VV(vmulhu_vv_w)
399
+GEN_VEXT_VV(vmulhu_vv_d)
400
+GEN_VEXT_VV(vmulhsu_vv_b)
401
+GEN_VEXT_VV(vmulhsu_vv_h)
402
+GEN_VEXT_VV(vmulhsu_vv_w)
403
+GEN_VEXT_VV(vmulhsu_vv_d)
404
405
RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
406
RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
407
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
408
RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
409
RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
410
RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
411
-GEN_VEXT_VX(vmul_vx_b, 1, 1)
412
-GEN_VEXT_VX(vmul_vx_h, 2, 2)
413
-GEN_VEXT_VX(vmul_vx_w, 4, 4)
414
-GEN_VEXT_VX(vmul_vx_d, 8, 8)
415
-GEN_VEXT_VX(vmulh_vx_b, 1, 1)
416
-GEN_VEXT_VX(vmulh_vx_h, 2, 2)
417
-GEN_VEXT_VX(vmulh_vx_w, 4, 4)
418
-GEN_VEXT_VX(vmulh_vx_d, 8, 8)
419
-GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
420
-GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
421
-GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
422
-GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
423
-GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
424
-GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
425
-GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
426
-GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
427
+GEN_VEXT_VX(vmul_vx_b)
428
+GEN_VEXT_VX(vmul_vx_h)
429
+GEN_VEXT_VX(vmul_vx_w)
430
+GEN_VEXT_VX(vmul_vx_d)
431
+GEN_VEXT_VX(vmulh_vx_b)
432
+GEN_VEXT_VX(vmulh_vx_h)
433
+GEN_VEXT_VX(vmulh_vx_w)
434
+GEN_VEXT_VX(vmulh_vx_d)
435
+GEN_VEXT_VX(vmulhu_vx_b)
436
+GEN_VEXT_VX(vmulhu_vx_h)
437
+GEN_VEXT_VX(vmulhu_vx_w)
438
+GEN_VEXT_VX(vmulhu_vx_d)
439
+GEN_VEXT_VX(vmulhsu_vx_b)
440
+GEN_VEXT_VX(vmulhsu_vx_h)
441
+GEN_VEXT_VX(vmulhsu_vx_w)
442
+GEN_VEXT_VX(vmulhsu_vx_d)
443
444
/* Vector Integer Divide Instructions */
445
#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
446
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
447
RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
448
RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
449
RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
450
-GEN_VEXT_VV(vdivu_vv_b, 1, 1)
451
-GEN_VEXT_VV(vdivu_vv_h, 2, 2)
452
-GEN_VEXT_VV(vdivu_vv_w, 4, 4)
453
-GEN_VEXT_VV(vdivu_vv_d, 8, 8)
454
-GEN_VEXT_VV(vdiv_vv_b, 1, 1)
455
-GEN_VEXT_VV(vdiv_vv_h, 2, 2)
456
-GEN_VEXT_VV(vdiv_vv_w, 4, 4)
457
-GEN_VEXT_VV(vdiv_vv_d, 8, 8)
458
-GEN_VEXT_VV(vremu_vv_b, 1, 1)
459
-GEN_VEXT_VV(vremu_vv_h, 2, 2)
460
-GEN_VEXT_VV(vremu_vv_w, 4, 4)
461
-GEN_VEXT_VV(vremu_vv_d, 8, 8)
462
-GEN_VEXT_VV(vrem_vv_b, 1, 1)
463
-GEN_VEXT_VV(vrem_vv_h, 2, 2)
464
-GEN_VEXT_VV(vrem_vv_w, 4, 4)
465
-GEN_VEXT_VV(vrem_vv_d, 8, 8)
466
+GEN_VEXT_VV(vdivu_vv_b)
467
+GEN_VEXT_VV(vdivu_vv_h)
468
+GEN_VEXT_VV(vdivu_vv_w)
469
+GEN_VEXT_VV(vdivu_vv_d)
470
+GEN_VEXT_VV(vdiv_vv_b)
471
+GEN_VEXT_VV(vdiv_vv_h)
472
+GEN_VEXT_VV(vdiv_vv_w)
473
+GEN_VEXT_VV(vdiv_vv_d)
474
+GEN_VEXT_VV(vremu_vv_b)
475
+GEN_VEXT_VV(vremu_vv_h)
476
+GEN_VEXT_VV(vremu_vv_w)
477
+GEN_VEXT_VV(vremu_vv_d)
478
+GEN_VEXT_VV(vrem_vv_b)
479
+GEN_VEXT_VV(vrem_vv_h)
480
+GEN_VEXT_VV(vrem_vv_w)
481
+GEN_VEXT_VV(vrem_vv_d)
482
483
RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
484
RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
485
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
486
RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
487
RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
488
RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
489
-GEN_VEXT_VX(vdivu_vx_b, 1, 1)
490
-GEN_VEXT_VX(vdivu_vx_h, 2, 2)
491
-GEN_VEXT_VX(vdivu_vx_w, 4, 4)
492
-GEN_VEXT_VX(vdivu_vx_d, 8, 8)
493
-GEN_VEXT_VX(vdiv_vx_b, 1, 1)
494
-GEN_VEXT_VX(vdiv_vx_h, 2, 2)
495
-GEN_VEXT_VX(vdiv_vx_w, 4, 4)
496
-GEN_VEXT_VX(vdiv_vx_d, 8, 8)
497
-GEN_VEXT_VX(vremu_vx_b, 1, 1)
498
-GEN_VEXT_VX(vremu_vx_h, 2, 2)
499
-GEN_VEXT_VX(vremu_vx_w, 4, 4)
500
-GEN_VEXT_VX(vremu_vx_d, 8, 8)
501
-GEN_VEXT_VX(vrem_vx_b, 1, 1)
502
-GEN_VEXT_VX(vrem_vx_h, 2, 2)
503
-GEN_VEXT_VX(vrem_vx_w, 4, 4)
504
-GEN_VEXT_VX(vrem_vx_d, 8, 8)
505
+GEN_VEXT_VX(vdivu_vx_b)
506
+GEN_VEXT_VX(vdivu_vx_h)
507
+GEN_VEXT_VX(vdivu_vx_w)
508
+GEN_VEXT_VX(vdivu_vx_d)
509
+GEN_VEXT_VX(vdiv_vx_b)
510
+GEN_VEXT_VX(vdiv_vx_h)
511
+GEN_VEXT_VX(vdiv_vx_w)
512
+GEN_VEXT_VX(vdiv_vx_d)
513
+GEN_VEXT_VX(vremu_vx_b)
514
+GEN_VEXT_VX(vremu_vx_h)
515
+GEN_VEXT_VX(vremu_vx_w)
516
+GEN_VEXT_VX(vremu_vx_d)
517
+GEN_VEXT_VX(vrem_vx_b)
518
+GEN_VEXT_VX(vrem_vx_h)
519
+GEN_VEXT_VX(vrem_vx_w)
520
+GEN_VEXT_VX(vrem_vx_d)
521
522
/* Vector Widening Integer Multiply Instructions */
523
RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
524
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
525
RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
526
RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
527
RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
528
-GEN_VEXT_VV(vwmul_vv_b, 1, 2)
529
-GEN_VEXT_VV(vwmul_vv_h, 2, 4)
530
-GEN_VEXT_VV(vwmul_vv_w, 4, 8)
531
-GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
532
-GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
533
-GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
534
-GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
535
-GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
536
-GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
537
+GEN_VEXT_VV(vwmul_vv_b)
538
+GEN_VEXT_VV(vwmul_vv_h)
539
+GEN_VEXT_VV(vwmul_vv_w)
540
+GEN_VEXT_VV(vwmulu_vv_b)
541
+GEN_VEXT_VV(vwmulu_vv_h)
542
+GEN_VEXT_VV(vwmulu_vv_w)
543
+GEN_VEXT_VV(vwmulsu_vv_b)
544
+GEN_VEXT_VV(vwmulsu_vv_h)
545
+GEN_VEXT_VV(vwmulsu_vv_w)
546
547
RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
548
RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
549
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
550
RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
551
RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
552
RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
553
-GEN_VEXT_VX(vwmul_vx_b, 1, 2)
554
-GEN_VEXT_VX(vwmul_vx_h, 2, 4)
555
-GEN_VEXT_VX(vwmul_vx_w, 4, 8)
556
-GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
557
-GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
558
-GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
559
-GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
560
-GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
561
-GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
562
+GEN_VEXT_VX(vwmul_vx_b)
563
+GEN_VEXT_VX(vwmul_vx_h)
564
+GEN_VEXT_VX(vwmul_vx_w)
565
+GEN_VEXT_VX(vwmulu_vx_b)
566
+GEN_VEXT_VX(vwmulu_vx_h)
567
+GEN_VEXT_VX(vwmulu_vx_w)
568
+GEN_VEXT_VX(vwmulsu_vx_b)
569
+GEN_VEXT_VX(vwmulsu_vx_h)
570
+GEN_VEXT_VX(vwmulsu_vx_w)
571
79
572
/* Vector Single-Width Integer Multiply-Add Instructions */
80
/* Vector Single-Width Integer Multiply-Add Instructions */
573
#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
81
GEN_OPIVV_TRANS(vmacc_vv, opivv_check)
574
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
82
@@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vnmsub_vx, opivx_check)
575
RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
83
GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check)
576
RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
84
GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check)
577
RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
85
GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check)
578
-GEN_VEXT_VV(vmacc_vv_b, 1, 1)
86
-GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx)
579
-GEN_VEXT_VV(vmacc_vv_h, 2, 2)
87
-GEN_OPIVX_WIDEN_TRANS(vwmacc_vx)
580
-GEN_VEXT_VV(vmacc_vv_w, 4, 4)
88
-GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx)
581
-GEN_VEXT_VV(vmacc_vv_d, 8, 8)
89
-GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx)
582
-GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
90
+GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_widen_check)
583
-GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
91
+GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_widen_check)
584
-GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
92
+GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_widen_check)
585
-GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
93
+GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_widen_check)
586
-GEN_VEXT_VV(vmadd_vv_b, 1, 1)
587
-GEN_VEXT_VV(vmadd_vv_h, 2, 2)
588
-GEN_VEXT_VV(vmadd_vv_w, 4, 4)
589
-GEN_VEXT_VV(vmadd_vv_d, 8, 8)
590
-GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
591
-GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
592
-GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
593
-GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
594
+GEN_VEXT_VV(vmacc_vv_b)
595
+GEN_VEXT_VV(vmacc_vv_h)
596
+GEN_VEXT_VV(vmacc_vv_w)
597
+GEN_VEXT_VV(vmacc_vv_d)
598
+GEN_VEXT_VV(vnmsac_vv_b)
599
+GEN_VEXT_VV(vnmsac_vv_h)
600
+GEN_VEXT_VV(vnmsac_vv_w)
601
+GEN_VEXT_VV(vnmsac_vv_d)
602
+GEN_VEXT_VV(vmadd_vv_b)
603
+GEN_VEXT_VV(vmadd_vv_h)
604
+GEN_VEXT_VV(vmadd_vv_w)
605
+GEN_VEXT_VV(vmadd_vv_d)
606
+GEN_VEXT_VV(vnmsub_vv_b)
607
+GEN_VEXT_VV(vnmsub_vv_h)
608
+GEN_VEXT_VV(vnmsub_vv_w)
609
+GEN_VEXT_VV(vnmsub_vv_d)
610
611
#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
612
static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
613
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
614
RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
615
RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
616
RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
617
-GEN_VEXT_VX(vmacc_vx_b, 1, 1)
618
-GEN_VEXT_VX(vmacc_vx_h, 2, 2)
619
-GEN_VEXT_VX(vmacc_vx_w, 4, 4)
620
-GEN_VEXT_VX(vmacc_vx_d, 8, 8)
621
-GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
622
-GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
623
-GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
624
-GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
625
-GEN_VEXT_VX(vmadd_vx_b, 1, 1)
626
-GEN_VEXT_VX(vmadd_vx_h, 2, 2)
627
-GEN_VEXT_VX(vmadd_vx_w, 4, 4)
628
-GEN_VEXT_VX(vmadd_vx_d, 8, 8)
629
-GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
630
-GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
631
-GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
632
-GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
633
+GEN_VEXT_VX(vmacc_vx_b)
634
+GEN_VEXT_VX(vmacc_vx_h)
635
+GEN_VEXT_VX(vmacc_vx_w)
636
+GEN_VEXT_VX(vmacc_vx_d)
637
+GEN_VEXT_VX(vnmsac_vx_b)
638
+GEN_VEXT_VX(vnmsac_vx_h)
639
+GEN_VEXT_VX(vnmsac_vx_w)
640
+GEN_VEXT_VX(vnmsac_vx_d)
641
+GEN_VEXT_VX(vmadd_vx_b)
642
+GEN_VEXT_VX(vmadd_vx_h)
643
+GEN_VEXT_VX(vmadd_vx_w)
644
+GEN_VEXT_VX(vmadd_vx_d)
645
+GEN_VEXT_VX(vnmsub_vx_b)
646
+GEN_VEXT_VX(vnmsub_vx_h)
647
+GEN_VEXT_VX(vnmsub_vx_w)
648
+GEN_VEXT_VX(vnmsub_vx_d)
649
650
/* Vector Widening Integer Multiply-Add Instructions */
651
RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
652
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
653
RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
654
RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
655
RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
656
-GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
657
-GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
658
-GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
659
-GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
660
-GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
661
-GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
662
-GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
663
-GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
664
-GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
665
+GEN_VEXT_VV(vwmaccu_vv_b)
666
+GEN_VEXT_VV(vwmaccu_vv_h)
667
+GEN_VEXT_VV(vwmaccu_vv_w)
668
+GEN_VEXT_VV(vwmacc_vv_b)
669
+GEN_VEXT_VV(vwmacc_vv_h)
670
+GEN_VEXT_VV(vwmacc_vv_w)
671
+GEN_VEXT_VV(vwmaccsu_vv_b)
672
+GEN_VEXT_VV(vwmaccsu_vv_h)
673
+GEN_VEXT_VV(vwmaccsu_vv_w)
674
675
RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
676
RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
677
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
678
RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
679
RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
680
RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
681
-GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
682
-GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
683
-GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
684
-GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
685
-GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
686
-GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
687
-GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
688
-GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
689
-GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
690
-GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
691
-GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
692
-GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
693
+GEN_VEXT_VX(vwmaccu_vx_b)
694
+GEN_VEXT_VX(vwmaccu_vx_h)
695
+GEN_VEXT_VX(vwmaccu_vx_w)
696
+GEN_VEXT_VX(vwmacc_vx_b)
697
+GEN_VEXT_VX(vwmacc_vx_h)
698
+GEN_VEXT_VX(vwmacc_vx_w)
699
+GEN_VEXT_VX(vwmaccsu_vx_b)
700
+GEN_VEXT_VX(vwmaccsu_vx_h)
701
+GEN_VEXT_VX(vwmaccsu_vx_w)
702
+GEN_VEXT_VX(vwmaccus_vx_b)
703
+GEN_VEXT_VX(vwmaccus_vx_h)
704
+GEN_VEXT_VX(vwmaccus_vx_w)
705
94
706
/* Vector Integer Merge and Move Instructions */
95
/* Vector Integer Merge and Move Instructions */
707
#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
96
static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
708
@@ -XXX,XX +XXX,XX @@ vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
709
static inline void
710
vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
711
CPURISCVState *env,
712
- uint32_t desc, uint32_t esz, uint32_t dsz,
713
+ uint32_t desc,
714
opivv2_rm_fn *fn)
715
{
716
uint32_t vm = vext_vm(desc);
717
@@ -XXX,XX +XXX,XX @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
718
}
719
720
/* generate helpers for fixed point instructions with OPIVV format */
721
-#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \
722
+#define GEN_VEXT_VV_RM(NAME) \
723
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
724
CPURISCVState *env, uint32_t desc) \
725
{ \
726
- vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
727
+ vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
728
do_##NAME); \
729
}
730
731
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
732
RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
733
RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
734
RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
735
-GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
736
-GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
737
-GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
738
-GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
739
+GEN_VEXT_VV_RM(vsaddu_vv_b)
740
+GEN_VEXT_VV_RM(vsaddu_vv_h)
741
+GEN_VEXT_VV_RM(vsaddu_vv_w)
742
+GEN_VEXT_VV_RM(vsaddu_vv_d)
743
744
typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
745
CPURISCVState *env, int vxrm);
746
@@ -XXX,XX +XXX,XX @@ vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
747
static inline void
748
vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
749
CPURISCVState *env,
750
- uint32_t desc, uint32_t esz, uint32_t dsz,
751
+ uint32_t desc,
752
opivx2_rm_fn *fn)
753
{
754
uint32_t vm = vext_vm(desc);
755
@@ -XXX,XX +XXX,XX @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
756
}
757
758
/* generate helpers for fixed point instructions with OPIVX format */
759
-#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \
760
+#define GEN_VEXT_VX_RM(NAME) \
761
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
762
void *vs2, CPURISCVState *env, uint32_t desc) \
763
{ \
764
- vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
765
+ vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
766
do_##NAME); \
767
}
768
769
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
770
RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
771
RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
772
RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
773
-GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
774
-GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
775
-GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
776
-GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
777
+GEN_VEXT_VX_RM(vsaddu_vx_b)
778
+GEN_VEXT_VX_RM(vsaddu_vx_h)
779
+GEN_VEXT_VX_RM(vsaddu_vx_w)
780
+GEN_VEXT_VX_RM(vsaddu_vx_d)
781
782
static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
783
{
784
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
785
RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
786
RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
787
RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
788
-GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
789
-GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
790
-GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
791
-GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
792
+GEN_VEXT_VV_RM(vsadd_vv_b)
793
+GEN_VEXT_VV_RM(vsadd_vv_h)
794
+GEN_VEXT_VV_RM(vsadd_vv_w)
795
+GEN_VEXT_VV_RM(vsadd_vv_d)
796
797
RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
798
RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
799
RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
800
RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
801
-GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
802
-GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
803
-GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
804
-GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
805
+GEN_VEXT_VX_RM(vsadd_vx_b)
806
+GEN_VEXT_VX_RM(vsadd_vx_h)
807
+GEN_VEXT_VX_RM(vsadd_vx_w)
808
+GEN_VEXT_VX_RM(vsadd_vx_d)
809
810
static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
811
{
812
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
813
RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
814
RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
815
RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
816
-GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
817
-GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
818
-GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
819
-GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
820
+GEN_VEXT_VV_RM(vssubu_vv_b)
821
+GEN_VEXT_VV_RM(vssubu_vv_h)
822
+GEN_VEXT_VV_RM(vssubu_vv_w)
823
+GEN_VEXT_VV_RM(vssubu_vv_d)
824
825
RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
826
RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
827
RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
828
RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
829
-GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
830
-GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
831
-GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
832
-GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
833
+GEN_VEXT_VX_RM(vssubu_vx_b)
834
+GEN_VEXT_VX_RM(vssubu_vx_h)
835
+GEN_VEXT_VX_RM(vssubu_vx_w)
836
+GEN_VEXT_VX_RM(vssubu_vx_d)
837
838
static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
839
{
840
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
841
RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
842
RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
843
RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
844
-GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
845
-GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
846
-GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
847
-GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
848
+GEN_VEXT_VV_RM(vssub_vv_b)
849
+GEN_VEXT_VV_RM(vssub_vv_h)
850
+GEN_VEXT_VV_RM(vssub_vv_w)
851
+GEN_VEXT_VV_RM(vssub_vv_d)
852
853
RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
854
RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
855
RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
856
RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
857
-GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
858
-GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
859
-GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
860
-GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
861
+GEN_VEXT_VX_RM(vssub_vx_b)
862
+GEN_VEXT_VX_RM(vssub_vx_h)
863
+GEN_VEXT_VX_RM(vssub_vx_w)
864
+GEN_VEXT_VX_RM(vssub_vx_d)
865
866
/* Vector Single-Width Averaging Add and Subtract */
867
static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
868
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
869
RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
870
RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
871
RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
872
-GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
873
-GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
874
-GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
875
-GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
876
+GEN_VEXT_VV_RM(vaadd_vv_b)
877
+GEN_VEXT_VV_RM(vaadd_vv_h)
878
+GEN_VEXT_VV_RM(vaadd_vv_w)
879
+GEN_VEXT_VV_RM(vaadd_vv_d)
880
881
RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
882
RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
883
RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
884
RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
885
-GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
886
-GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
887
-GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
888
-GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
889
+GEN_VEXT_VX_RM(vaadd_vx_b)
890
+GEN_VEXT_VX_RM(vaadd_vx_h)
891
+GEN_VEXT_VX_RM(vaadd_vx_w)
892
+GEN_VEXT_VX_RM(vaadd_vx_d)
893
894
static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
895
uint32_t a, uint32_t b)
896
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
897
RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
898
RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
899
RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
900
-GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
901
-GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
902
-GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
903
-GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
904
+GEN_VEXT_VV_RM(vaaddu_vv_b)
905
+GEN_VEXT_VV_RM(vaaddu_vv_h)
906
+GEN_VEXT_VV_RM(vaaddu_vv_w)
907
+GEN_VEXT_VV_RM(vaaddu_vv_d)
908
909
RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
910
RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
911
RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
912
RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
913
-GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
914
-GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
915
-GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
916
-GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
917
+GEN_VEXT_VX_RM(vaaddu_vx_b)
918
+GEN_VEXT_VX_RM(vaaddu_vx_h)
919
+GEN_VEXT_VX_RM(vaaddu_vx_w)
920
+GEN_VEXT_VX_RM(vaaddu_vx_d)
921
922
static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
923
{
924
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
925
RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
926
RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
927
RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
928
-GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
929
-GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
930
-GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
931
-GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
932
+GEN_VEXT_VV_RM(vasub_vv_b)
933
+GEN_VEXT_VV_RM(vasub_vv_h)
934
+GEN_VEXT_VV_RM(vasub_vv_w)
935
+GEN_VEXT_VV_RM(vasub_vv_d)
936
937
RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
938
RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
939
RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
940
RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
941
-GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
942
-GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
943
-GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
944
-GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
945
+GEN_VEXT_VX_RM(vasub_vx_b)
946
+GEN_VEXT_VX_RM(vasub_vx_h)
947
+GEN_VEXT_VX_RM(vasub_vx_w)
948
+GEN_VEXT_VX_RM(vasub_vx_d)
949
950
static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
951
uint32_t a, uint32_t b)
952
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
953
RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
954
RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
955
RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
956
-GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
957
-GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
958
-GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
959
-GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
960
+GEN_VEXT_VV_RM(vasubu_vv_b)
961
+GEN_VEXT_VV_RM(vasubu_vv_h)
962
+GEN_VEXT_VV_RM(vasubu_vv_w)
963
+GEN_VEXT_VV_RM(vasubu_vv_d)
964
965
RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
966
RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
967
RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
968
RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
969
-GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
970
-GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
971
-GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
972
-GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
973
+GEN_VEXT_VX_RM(vasubu_vx_b)
974
+GEN_VEXT_VX_RM(vasubu_vx_h)
975
+GEN_VEXT_VX_RM(vasubu_vx_w)
976
+GEN_VEXT_VX_RM(vasubu_vx_d)
977
978
/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
979
static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
980
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
981
RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
982
RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
983
RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
984
-GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
985
-GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
986
-GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
987
-GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
988
+GEN_VEXT_VV_RM(vsmul_vv_b)
989
+GEN_VEXT_VV_RM(vsmul_vv_h)
990
+GEN_VEXT_VV_RM(vsmul_vv_w)
991
+GEN_VEXT_VV_RM(vsmul_vv_d)
992
993
RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
994
RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
995
RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
996
RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
997
-GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
998
-GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
999
-GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
1000
-GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
1001
+GEN_VEXT_VX_RM(vsmul_vx_b)
1002
+GEN_VEXT_VX_RM(vsmul_vx_h)
1003
+GEN_VEXT_VX_RM(vsmul_vx_w)
1004
+GEN_VEXT_VX_RM(vsmul_vx_d)
1005
1006
/* Vector Single-Width Scaling Shift Instructions */
1007
static inline uint8_t
1008
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
1009
RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
1010
RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
1011
RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
1012
-GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
1013
-GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
1014
-GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
1015
-GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
1016
+GEN_VEXT_VV_RM(vssrl_vv_b)
1017
+GEN_VEXT_VV_RM(vssrl_vv_h)
1018
+GEN_VEXT_VV_RM(vssrl_vv_w)
1019
+GEN_VEXT_VV_RM(vssrl_vv_d)
1020
1021
RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
1022
RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
1023
RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
1024
RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
1025
-GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
1026
-GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
1027
-GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
1028
-GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
1029
+GEN_VEXT_VX_RM(vssrl_vx_b)
1030
+GEN_VEXT_VX_RM(vssrl_vx_h)
1031
+GEN_VEXT_VX_RM(vssrl_vx_w)
1032
+GEN_VEXT_VX_RM(vssrl_vx_d)
1033
1034
static inline int8_t
1035
vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
1036
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
1037
RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
1038
RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
1039
RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
1040
-GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
1041
-GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
1042
-GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
1043
-GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
1044
+GEN_VEXT_VV_RM(vssra_vv_b)
1045
+GEN_VEXT_VV_RM(vssra_vv_h)
1046
+GEN_VEXT_VV_RM(vssra_vv_w)
1047
+GEN_VEXT_VV_RM(vssra_vv_d)
1048
1049
RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
1050
RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
1051
RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
1052
RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
1053
-GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
1054
-GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
1055
-GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
1056
-GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
1057
+GEN_VEXT_VX_RM(vssra_vx_b)
1058
+GEN_VEXT_VX_RM(vssra_vx_h)
1059
+GEN_VEXT_VX_RM(vssra_vx_w)
1060
+GEN_VEXT_VX_RM(vssra_vx_d)
1061
1062
/* Vector Narrowing Fixed-Point Clip Instructions */
1063
static inline int8_t
1064
@@ -XXX,XX +XXX,XX @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
1065
RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
1066
RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
1067
RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
1068
-GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
1069
-GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
1070
-GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
1071
+GEN_VEXT_VV_RM(vnclip_wv_b)
1072
+GEN_VEXT_VV_RM(vnclip_wv_h)
1073
+GEN_VEXT_VV_RM(vnclip_wv_w)
1074
1075
RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
1076
RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
1077
RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
1078
-GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
1079
-GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
1080
-GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
1081
+GEN_VEXT_VX_RM(vnclip_wx_b)
1082
+GEN_VEXT_VX_RM(vnclip_wx_h)
1083
+GEN_VEXT_VX_RM(vnclip_wx_w)
1084
1085
static inline uint8_t
1086
vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
1087
@@ -XXX,XX +XXX,XX @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
1088
RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
1089
RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
1090
RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
1091
-GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
1092
-GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
1093
-GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
1094
+GEN_VEXT_VV_RM(vnclipu_wv_b)
1095
+GEN_VEXT_VV_RM(vnclipu_wv_h)
1096
+GEN_VEXT_VV_RM(vnclipu_wv_w)
1097
1098
RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
1099
RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
1100
RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
1101
-GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
1102
-GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
1103
-GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
1104
+GEN_VEXT_VX_RM(vnclipu_wx_b)
1105
+GEN_VEXT_VX_RM(vnclipu_wx_h)
1106
+GEN_VEXT_VX_RM(vnclipu_wx_w)
1107
1108
/*
1109
*** Vector Float Point Arithmetic Instructions
1110
@@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1111
*((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
1112
}
1113
1114
-#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \
1115
+#define GEN_VEXT_VV_ENV(NAME) \
1116
void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1117
void *vs2, CPURISCVState *env, \
1118
uint32_t desc) \
1119
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1120
RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
1121
RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
1122
RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
1123
-GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
1124
-GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
1125
-GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
1126
+GEN_VEXT_VV_ENV(vfadd_vv_h)
1127
+GEN_VEXT_VV_ENV(vfadd_vv_w)
1128
+GEN_VEXT_VV_ENV(vfadd_vv_d)
1129
1130
#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1131
static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
1132
@@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
1133
*((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
1134
}
1135
1136
-#define GEN_VEXT_VF(NAME, ESZ, DSZ) \
1137
+#define GEN_VEXT_VF(NAME) \
1138
void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
1139
void *vs2, CPURISCVState *env, \
1140
uint32_t desc) \
1141
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
1142
RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
1143
RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
1144
RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
1145
-GEN_VEXT_VF(vfadd_vf_h, 2, 2)
1146
-GEN_VEXT_VF(vfadd_vf_w, 4, 4)
1147
-GEN_VEXT_VF(vfadd_vf_d, 8, 8)
1148
+GEN_VEXT_VF(vfadd_vf_h)
1149
+GEN_VEXT_VF(vfadd_vf_w)
1150
+GEN_VEXT_VF(vfadd_vf_d)
1151
1152
RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
1153
RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
1154
RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
1155
-GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
1156
-GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
1157
-GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
1158
+GEN_VEXT_VV_ENV(vfsub_vv_h)
1159
+GEN_VEXT_VV_ENV(vfsub_vv_w)
1160
+GEN_VEXT_VV_ENV(vfsub_vv_d)
1161
RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
1162
RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
1163
RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
1164
-GEN_VEXT_VF(vfsub_vf_h, 2, 2)
1165
-GEN_VEXT_VF(vfsub_vf_w, 4, 4)
1166
-GEN_VEXT_VF(vfsub_vf_d, 8, 8)
1167
+GEN_VEXT_VF(vfsub_vf_h)
1168
+GEN_VEXT_VF(vfsub_vf_w)
1169
+GEN_VEXT_VF(vfsub_vf_d)
1170
1171
static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
1172
{
1173
@@ -XXX,XX +XXX,XX @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
1174
RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
1175
RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
1176
RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
1177
-GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
1178
-GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
1179
-GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
1180
+GEN_VEXT_VF(vfrsub_vf_h)
1181
+GEN_VEXT_VF(vfrsub_vf_w)
1182
+GEN_VEXT_VF(vfrsub_vf_d)
1183
1184
/* Vector Widening Floating-Point Add/Subtract Instructions */
1185
static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
1186
@@ -XXX,XX +XXX,XX @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
1187
1188
RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
1189
RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
1190
-GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
1191
-GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
1192
+GEN_VEXT_VV_ENV(vfwadd_vv_h)
1193
+GEN_VEXT_VV_ENV(vfwadd_vv_w)
1194
RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
1195
RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
1196
-GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
1197
-GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
1198
+GEN_VEXT_VF(vfwadd_vf_h)
1199
+GEN_VEXT_VF(vfwadd_vf_w)
1200
1201
static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
1202
{
1203
@@ -XXX,XX +XXX,XX @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
1204
1205
RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
1206
RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
1207
-GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
1208
-GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
1209
+GEN_VEXT_VV_ENV(vfwsub_vv_h)
1210
+GEN_VEXT_VV_ENV(vfwsub_vv_w)
1211
RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
1212
RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
1213
-GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
1214
-GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
1215
+GEN_VEXT_VF(vfwsub_vf_h)
1216
+GEN_VEXT_VF(vfwsub_vf_w)
1217
1218
static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
1219
{
1220
@@ -XXX,XX +XXX,XX @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
1221
1222
RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
1223
RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
1224
-GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
1225
-GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
1226
+GEN_VEXT_VV_ENV(vfwadd_wv_h)
1227
+GEN_VEXT_VV_ENV(vfwadd_wv_w)
1228
RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
1229
RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
1230
-GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
1231
-GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
1232
+GEN_VEXT_VF(vfwadd_wf_h)
1233
+GEN_VEXT_VF(vfwadd_wf_w)
1234
1235
static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
1236
{
1237
@@ -XXX,XX +XXX,XX @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
1238
1239
RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
1240
RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
1241
-GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
1242
-GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
1243
+GEN_VEXT_VV_ENV(vfwsub_wv_h)
1244
+GEN_VEXT_VV_ENV(vfwsub_wv_w)
1245
RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
1246
RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
1247
-GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
1248
-GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
1249
+GEN_VEXT_VF(vfwsub_wf_h)
1250
+GEN_VEXT_VF(vfwsub_wf_w)
1251
1252
/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
1253
RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
1254
RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
1255
RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
1256
-GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
1257
-GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
1258
-GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
1259
+GEN_VEXT_VV_ENV(vfmul_vv_h)
1260
+GEN_VEXT_VV_ENV(vfmul_vv_w)
1261
+GEN_VEXT_VV_ENV(vfmul_vv_d)
1262
RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
1263
RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
1264
RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
1265
-GEN_VEXT_VF(vfmul_vf_h, 2, 2)
1266
-GEN_VEXT_VF(vfmul_vf_w, 4, 4)
1267
-GEN_VEXT_VF(vfmul_vf_d, 8, 8)
1268
+GEN_VEXT_VF(vfmul_vf_h)
1269
+GEN_VEXT_VF(vfmul_vf_w)
1270
+GEN_VEXT_VF(vfmul_vf_d)
1271
1272
RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
1273
RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
1274
RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
1275
-GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
1276
-GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
1277
-GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
1278
+GEN_VEXT_VV_ENV(vfdiv_vv_h)
1279
+GEN_VEXT_VV_ENV(vfdiv_vv_w)
1280
+GEN_VEXT_VV_ENV(vfdiv_vv_d)
1281
RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
1282
RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
1283
RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
1284
-GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
1285
-GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
1286
-GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
1287
+GEN_VEXT_VF(vfdiv_vf_h)
1288
+GEN_VEXT_VF(vfdiv_vf_w)
1289
+GEN_VEXT_VF(vfdiv_vf_d)
1290
1291
static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
1292
{
1293
@@ -XXX,XX +XXX,XX @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
1294
RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
1295
RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
1296
RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
1297
-GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
1298
-GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
1299
-GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
1300
+GEN_VEXT_VF(vfrdiv_vf_h)
1301
+GEN_VEXT_VF(vfrdiv_vf_w)
1302
+GEN_VEXT_VF(vfrdiv_vf_d)
1303
1304
/* Vector Widening Floating-Point Multiply */
1305
static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
1306
@@ -XXX,XX +XXX,XX @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
1307
}
1308
RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
1309
RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
1310
-GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
1311
-GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
1312
+GEN_VEXT_VV_ENV(vfwmul_vv_h)
1313
+GEN_VEXT_VV_ENV(vfwmul_vv_w)
1314
RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
1315
RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
1316
-GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
1317
-GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
1318
+GEN_VEXT_VF(vfwmul_vf_h)
1319
+GEN_VEXT_VF(vfwmul_vf_w)
1320
1321
/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
1322
#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1323
@@ -XXX,XX +XXX,XX @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
1324
RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
1325
RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
1326
RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
1327
-GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
1328
-GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
1329
-GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
1330
+GEN_VEXT_VV_ENV(vfmacc_vv_h)
1331
+GEN_VEXT_VV_ENV(vfmacc_vv_w)
1332
+GEN_VEXT_VV_ENV(vfmacc_vv_d)
1333
1334
#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1335
static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
1336
@@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
1337
RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
1338
RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
1339
RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
1340
-GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
1341
-GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
1342
-GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
1343
+GEN_VEXT_VF(vfmacc_vf_h)
1344
+GEN_VEXT_VF(vfmacc_vf_w)
1345
+GEN_VEXT_VF(vfmacc_vf_d)
1346
1347
static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
1348
{
1349
@@ -XXX,XX +XXX,XX @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
1350
RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
1351
RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
1352
RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
1353
-GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
1354
-GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
1355
-GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
1356
+GEN_VEXT_VV_ENV(vfnmacc_vv_h)
1357
+GEN_VEXT_VV_ENV(vfnmacc_vv_w)
1358
+GEN_VEXT_VV_ENV(vfnmacc_vv_d)
1359
RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
1360
RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
1361
RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
1362
-GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
1363
-GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
1364
-GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
1365
+GEN_VEXT_VF(vfnmacc_vf_h)
1366
+GEN_VEXT_VF(vfnmacc_vf_w)
1367
+GEN_VEXT_VF(vfnmacc_vf_d)
1368
1369
static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
1370
{
1371
@@ -XXX,XX +XXX,XX @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
1372
RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
1373
RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
1374
RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
1375
-GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
1376
-GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
1377
-GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
1378
+GEN_VEXT_VV_ENV(vfmsac_vv_h)
1379
+GEN_VEXT_VV_ENV(vfmsac_vv_w)
1380
+GEN_VEXT_VV_ENV(vfmsac_vv_d)
1381
RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
1382
RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
1383
RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
1384
-GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
1385
-GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
1386
-GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
1387
+GEN_VEXT_VF(vfmsac_vf_h)
1388
+GEN_VEXT_VF(vfmsac_vf_w)
1389
+GEN_VEXT_VF(vfmsac_vf_d)
1390
1391
static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
1392
{
1393
@@ -XXX,XX +XXX,XX @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
1394
RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
1395
RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
1396
RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
1397
-GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
1398
-GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
1399
-GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
1400
+GEN_VEXT_VV_ENV(vfnmsac_vv_h)
1401
+GEN_VEXT_VV_ENV(vfnmsac_vv_w)
1402
+GEN_VEXT_VV_ENV(vfnmsac_vv_d)
1403
RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
1404
RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
1405
RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
1406
-GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
1407
-GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
1408
-GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
1409
+GEN_VEXT_VF(vfnmsac_vf_h)
1410
+GEN_VEXT_VF(vfnmsac_vf_w)
1411
+GEN_VEXT_VF(vfnmsac_vf_d)
1412
1413
static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
1414
{
1415
@@ -XXX,XX +XXX,XX @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
1416
RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
1417
RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
1418
RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
1419
-GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
1420
-GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
1421
-GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
1422
+GEN_VEXT_VV_ENV(vfmadd_vv_h)
1423
+GEN_VEXT_VV_ENV(vfmadd_vv_w)
1424
+GEN_VEXT_VV_ENV(vfmadd_vv_d)
1425
RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
1426
RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
1427
RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
1428
-GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
1429
-GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
1430
-GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
1431
+GEN_VEXT_VF(vfmadd_vf_h)
1432
+GEN_VEXT_VF(vfmadd_vf_w)
1433
+GEN_VEXT_VF(vfmadd_vf_d)
1434
1435
static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
1436
{
1437
@@ -XXX,XX +XXX,XX @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
1438
RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
1439
RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
1440
RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
1441
-GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
1442
-GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
1443
-GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
1444
+GEN_VEXT_VV_ENV(vfnmadd_vv_h)
1445
+GEN_VEXT_VV_ENV(vfnmadd_vv_w)
1446
+GEN_VEXT_VV_ENV(vfnmadd_vv_d)
1447
RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
1448
RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
1449
RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
1450
-GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
1451
-GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
1452
-GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
1453
+GEN_VEXT_VF(vfnmadd_vf_h)
1454
+GEN_VEXT_VF(vfnmadd_vf_w)
1455
+GEN_VEXT_VF(vfnmadd_vf_d)
1456
1457
static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
1458
{
1459
@@ -XXX,XX +XXX,XX @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
1460
RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
1461
RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
1462
RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
1463
-GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
1464
-GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
1465
-GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
1466
+GEN_VEXT_VV_ENV(vfmsub_vv_h)
1467
+GEN_VEXT_VV_ENV(vfmsub_vv_w)
1468
+GEN_VEXT_VV_ENV(vfmsub_vv_d)
1469
RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
1470
RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
1471
RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
1472
-GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
1473
-GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
1474
-GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
1475
+GEN_VEXT_VF(vfmsub_vf_h)
1476
+GEN_VEXT_VF(vfmsub_vf_w)
1477
+GEN_VEXT_VF(vfmsub_vf_d)
1478
1479
static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
1480
{
1481
@@ -XXX,XX +XXX,XX @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
1482
RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
1483
RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
1484
RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
1485
-GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
1486
-GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
1487
-GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
1488
+GEN_VEXT_VV_ENV(vfnmsub_vv_h)
1489
+GEN_VEXT_VV_ENV(vfnmsub_vv_w)
1490
+GEN_VEXT_VV_ENV(vfnmsub_vv_d)
1491
RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
1492
RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
1493
RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
1494
-GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
1495
-GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
1496
-GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
1497
+GEN_VEXT_VF(vfnmsub_vf_h)
1498
+GEN_VEXT_VF(vfnmsub_vf_w)
1499
+GEN_VEXT_VF(vfnmsub_vf_d)
1500
1501
/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
1502
static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
1503
@@ -XXX,XX +XXX,XX @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
1504
1505
RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
1506
RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
1507
-GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
1508
-GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
1509
+GEN_VEXT_VV_ENV(vfwmacc_vv_h)
1510
+GEN_VEXT_VV_ENV(vfwmacc_vv_w)
1511
RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
1512
RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
1513
-GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
1514
-GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
1515
+GEN_VEXT_VF(vfwmacc_vf_h)
1516
+GEN_VEXT_VF(vfwmacc_vf_w)
1517
1518
static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
1519
{
1520
@@ -XXX,XX +XXX,XX @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
1521
1522
RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
1523
RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
1524
-GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
1525
-GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
1526
+GEN_VEXT_VV_ENV(vfwnmacc_vv_h)
1527
+GEN_VEXT_VV_ENV(vfwnmacc_vv_w)
1528
RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
1529
RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
1530
-GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
1531
-GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
1532
+GEN_VEXT_VF(vfwnmacc_vf_h)
1533
+GEN_VEXT_VF(vfwnmacc_vf_w)
1534
1535
static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
1536
{
1537
@@ -XXX,XX +XXX,XX @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
1538
1539
RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
1540
RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
1541
-GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
1542
-GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
1543
+GEN_VEXT_VV_ENV(vfwmsac_vv_h)
1544
+GEN_VEXT_VV_ENV(vfwmsac_vv_w)
1545
RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
1546
RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
1547
-GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
1548
-GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
1549
+GEN_VEXT_VF(vfwmsac_vf_h)
1550
+GEN_VEXT_VF(vfwmsac_vf_w)
1551
1552
static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
1553
{
1554
@@ -XXX,XX +XXX,XX @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
1555
1556
RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
1557
RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
1558
-GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
1559
-GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
1560
+GEN_VEXT_VV_ENV(vfwnmsac_vv_h)
1561
+GEN_VEXT_VV_ENV(vfwnmsac_vv_w)
1562
RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
1563
RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
1564
-GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
1565
-GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
1566
+GEN_VEXT_VF(vfwnmsac_vf_h)
1567
+GEN_VEXT_VF(vfwnmsac_vf_w)
1568
1569
/* Vector Floating-Point Square-Root Instruction */
1570
/* (TD, T2, TX2) */
1571
@@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i, \
1572
*((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
1573
}
1574
1575
-#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \
1576
+#define GEN_VEXT_V_ENV(NAME) \
1577
void HELPER(NAME)(void *vd, void *v0, void *vs2, \
1578
CPURISCVState *env, uint32_t desc) \
1579
{ \
1580
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \
1581
RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
1582
RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
1583
RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
1584
-GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
1585
-GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
1586
-GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
1587
+GEN_VEXT_V_ENV(vfsqrt_v_h)
1588
+GEN_VEXT_V_ENV(vfsqrt_v_w)
1589
+GEN_VEXT_V_ENV(vfsqrt_v_d)
1590
1591
/*
1592
* Vector Floating-Point Reciprocal Square-Root Estimate Instruction
1593
@@ -XXX,XX +XXX,XX @@ static float64 frsqrt7_d(float64 f, float_status *s)
1594
RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
1595
RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
1596
RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
1597
-GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2)
1598
-GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4)
1599
-GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8)
1600
+GEN_VEXT_V_ENV(vfrsqrt7_v_h)
1601
+GEN_VEXT_V_ENV(vfrsqrt7_v_w)
1602
+GEN_VEXT_V_ENV(vfrsqrt7_v_d)
1603
1604
/*
1605
* Vector Floating-Point Reciprocal Estimate Instruction
1606
@@ -XXX,XX +XXX,XX @@ static float64 frec7_d(float64 f, float_status *s)
1607
RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
1608
RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
1609
RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
1610
-GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2)
1611
-GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4)
1612
-GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8)
1613
+GEN_VEXT_V_ENV(vfrec7_v_h)
1614
+GEN_VEXT_V_ENV(vfrec7_v_w)
1615
+GEN_VEXT_V_ENV(vfrec7_v_d)
1616
1617
/* Vector Floating-Point MIN/MAX Instructions */
1618
RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
1619
RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
1620
RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
1621
-GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
1622
-GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
1623
-GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
1624
+GEN_VEXT_VV_ENV(vfmin_vv_h)
1625
+GEN_VEXT_VV_ENV(vfmin_vv_w)
1626
+GEN_VEXT_VV_ENV(vfmin_vv_d)
1627
RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
1628
RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
1629
RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
1630
-GEN_VEXT_VF(vfmin_vf_h, 2, 2)
1631
-GEN_VEXT_VF(vfmin_vf_w, 4, 4)
1632
-GEN_VEXT_VF(vfmin_vf_d, 8, 8)
1633
+GEN_VEXT_VF(vfmin_vf_h)
1634
+GEN_VEXT_VF(vfmin_vf_w)
1635
+GEN_VEXT_VF(vfmin_vf_d)
1636
1637
RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
1638
RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
1639
RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
1640
-GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
1641
-GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
1642
-GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
1643
+GEN_VEXT_VV_ENV(vfmax_vv_h)
1644
+GEN_VEXT_VV_ENV(vfmax_vv_w)
1645
+GEN_VEXT_VV_ENV(vfmax_vv_d)
1646
RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
1647
RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
1648
RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
1649
-GEN_VEXT_VF(vfmax_vf_h, 2, 2)
1650
-GEN_VEXT_VF(vfmax_vf_w, 4, 4)
1651
-GEN_VEXT_VF(vfmax_vf_d, 8, 8)
1652
+GEN_VEXT_VF(vfmax_vf_h)
1653
+GEN_VEXT_VF(vfmax_vf_w)
1654
+GEN_VEXT_VF(vfmax_vf_d)
1655
1656
/* Vector Floating-Point Sign-Injection Instructions */
1657
static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
1658
@@ -XXX,XX +XXX,XX @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
1659
RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
1660
RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
1661
RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
1662
-GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
1663
-GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
1664
-GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
1665
+GEN_VEXT_VV_ENV(vfsgnj_vv_h)
1666
+GEN_VEXT_VV_ENV(vfsgnj_vv_w)
1667
+GEN_VEXT_VV_ENV(vfsgnj_vv_d)
1668
RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
1669
RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
1670
RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
1671
-GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
1672
-GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
1673
-GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
1674
+GEN_VEXT_VF(vfsgnj_vf_h)
1675
+GEN_VEXT_VF(vfsgnj_vf_w)
1676
+GEN_VEXT_VF(vfsgnj_vf_d)
1677
1678
static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
1679
{
1680
@@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
1681
RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
1682
RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
1683
RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
1684
-GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
1685
-GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
1686
-GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
1687
+GEN_VEXT_VV_ENV(vfsgnjn_vv_h)
1688
+GEN_VEXT_VV_ENV(vfsgnjn_vv_w)
1689
+GEN_VEXT_VV_ENV(vfsgnjn_vv_d)
1690
RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
1691
RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
1692
RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
1693
-GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
1694
-GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
1695
-GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
1696
+GEN_VEXT_VF(vfsgnjn_vf_h)
1697
+GEN_VEXT_VF(vfsgnjn_vf_w)
1698
+GEN_VEXT_VF(vfsgnjn_vf_d)
1699
1700
static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
1701
{
1702
@@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
1703
RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
1704
RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
1705
RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
1706
-GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
1707
-GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
1708
-GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
1709
+GEN_VEXT_VV_ENV(vfsgnjx_vv_h)
1710
+GEN_VEXT_VV_ENV(vfsgnjx_vv_w)
1711
+GEN_VEXT_VV_ENV(vfsgnjx_vv_d)
1712
RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
1713
RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
1714
RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
1715
-GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
1716
-GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
1717
-GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
1718
+GEN_VEXT_VF(vfsgnjx_vf_h)
1719
+GEN_VEXT_VF(vfsgnjx_vf_w)
1720
+GEN_VEXT_VF(vfsgnjx_vf_d)
1721
1722
/* Vector Floating-Point Compare Instructions */
1723
#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
1724
@@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i) \
1725
*((TD *)vd + HD(i)) = OP(s2); \
1726
}
1727
1728
-#define GEN_VEXT_V(NAME, ESZ, DSZ) \
1729
+#define GEN_VEXT_V(NAME) \
1730
void HELPER(NAME)(void *vd, void *v0, void *vs2, \
1731
CPURISCVState *env, uint32_t desc) \
1732
{ \
1733
@@ -XXX,XX +XXX,XX @@ target_ulong fclass_d(uint64_t frs1)
1734
RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
1735
RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
1736
RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
1737
-GEN_VEXT_V(vfclass_v_h, 2, 2)
1738
-GEN_VEXT_V(vfclass_v_w, 4, 4)
1739
-GEN_VEXT_V(vfclass_v_d, 8, 8)
1740
+GEN_VEXT_V(vfclass_v_h)
1741
+GEN_VEXT_V(vfclass_v_w)
1742
+GEN_VEXT_V(vfclass_v_d)
1743
1744
/* Vector Floating-Point Merge Instruction */
1745
#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
1746
@@ -XXX,XX +XXX,XX @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
1747
RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
1748
RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
1749
RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
1750
-GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
1751
-GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
1752
-GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
1753
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_h)
1754
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_w)
1755
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_d)
1756
1757
/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
1758
RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
1759
RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
1760
RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
1761
-GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
1762
-GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
1763
-GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
1764
+GEN_VEXT_V_ENV(vfcvt_x_f_v_h)
1765
+GEN_VEXT_V_ENV(vfcvt_x_f_v_w)
1766
+GEN_VEXT_V_ENV(vfcvt_x_f_v_d)
1767
1768
/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
1769
RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
1770
RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
1771
RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
1772
-GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
1773
-GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
1774
-GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
1775
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_h)
1776
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_w)
1777
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_d)
1778
1779
/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
1780
RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
1781
RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
1782
RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
1783
-GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
1784
-GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
1785
-GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
1786
+GEN_VEXT_V_ENV(vfcvt_f_x_v_h)
1787
+GEN_VEXT_V_ENV(vfcvt_f_x_v_w)
1788
+GEN_VEXT_V_ENV(vfcvt_f_x_v_d)
1789
1790
/* Widening Floating-Point/Integer Type-Convert Instructions */
1791
/* (TD, T2, TX2) */
1792
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
1793
/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
1794
RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
1795
RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
1796
-GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
1797
-GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
1798
+GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h)
1799
+GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w)
1800
1801
/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
1802
RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
1803
RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
1804
-GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
1805
-GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
1806
+GEN_VEXT_V_ENV(vfwcvt_x_f_v_h)
1807
+GEN_VEXT_V_ENV(vfwcvt_x_f_v_w)
1808
1809
/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
1810
RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
1811
RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
1812
RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
1813
-GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
1814
-GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
1815
-GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
1816
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b)
1817
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h)
1818
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w)
1819
1820
/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
1821
RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
1822
RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
1823
RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
1824
-GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
1825
-GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
1826
-GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
1827
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_b)
1828
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_h)
1829
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_w)
1830
1831
/*
1832
* vfwcvt.f.f.v vd, vs2, vm
1833
@@ -XXX,XX +XXX,XX @@ static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
1834
1835
RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
1836
RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
1837
-GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
1838
-GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
1839
+GEN_VEXT_V_ENV(vfwcvt_f_f_v_h)
1840
+GEN_VEXT_V_ENV(vfwcvt_f_f_v_w)
1841
1842
/* Narrowing Floating-Point/Integer Type-Convert Instructions */
1843
/* (TD, T2, TX2) */
1844
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
1845
RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
1846
RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
1847
RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
1848
-GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
1849
-GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
1850
-GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
1851
+GEN_VEXT_V_ENV(vfncvt_xu_f_w_b)
1852
+GEN_VEXT_V_ENV(vfncvt_xu_f_w_h)
1853
+GEN_VEXT_V_ENV(vfncvt_xu_f_w_w)
1854
1855
/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
1856
RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
1857
RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
1858
RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
1859
-GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
1860
-GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
1861
-GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
1862
+GEN_VEXT_V_ENV(vfncvt_x_f_w_b)
1863
+GEN_VEXT_V_ENV(vfncvt_x_f_w_h)
1864
+GEN_VEXT_V_ENV(vfncvt_x_f_w_w)
1865
1866
/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
1867
RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
1868
RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
1869
-GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
1870
-GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
1871
+GEN_VEXT_V_ENV(vfncvt_f_xu_w_h)
1872
+GEN_VEXT_V_ENV(vfncvt_f_xu_w_w)
1873
1874
/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
1875
RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
1876
RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
1877
-GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
1878
-GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
1879
+GEN_VEXT_V_ENV(vfncvt_f_x_w_h)
1880
+GEN_VEXT_V_ENV(vfncvt_f_x_w_w)
1881
1882
/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
1883
static uint16_t vfncvtffv16(uint32_t a, float_status *s)
1884
@@ -XXX,XX +XXX,XX @@ static uint16_t vfncvtffv16(uint32_t a, float_status *s)
1885
1886
RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
1887
RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
1888
-GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
1889
-GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
1890
+GEN_VEXT_V_ENV(vfncvt_f_f_w_h)
1891
+GEN_VEXT_V_ENV(vfncvt_f_f_w_w)
1892
1893
/*
1894
*** Vector Reduction Operations
1895
--
97
--
1896
2.36.1
98
2.41.0
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
2
2
3
Signed-off-by: eop Chen <eop.chen@sifive.com>
3
Move some macros out of `vector_helper` and into `vector_internals`.
4
Reviewed-by: Frank Chang <frank.chang@sifive.com>
4
This ensures they can be used by both vector and vector-crypto helpers
5
(latter implemented in proceeding commits).
6
7
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
5
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
8
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
6
Acked-by: Alistair Francis <alistair.francis@wdc.com>
9
Signed-off-by: Max Chou <max.chou@sifive.com>
7
Message-Id: <165449614532.19704.7000832880482980398-13@git.sr.ht>
10
Message-ID: <20230711165917.2629866-8-max.chou@sifive.com>
8
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
9
---
12
---
10
target/riscv/vector_helper.c | 20 ++++++++++++++++++++
13
target/riscv/vector_internals.h | 46 +++++++++++++++++++++++++++++++++
11
1 file changed, 20 insertions(+)
14
target/riscv/vector_helper.c | 42 ------------------------------
15
2 files changed, 46 insertions(+), 42 deletions(-)
12
16
17
diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/riscv/vector_internals.h
20
+++ b/target/riscv/vector_internals.h
21
@@ -XXX,XX +XXX,XX @@ void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
22
/* expand macro args before macro */
23
#define RVVCALL(macro, ...) macro(__VA_ARGS__)
24
25
+/* (TD, T2, TX2) */
26
+#define OP_UU_B uint8_t, uint8_t, uint8_t
27
+#define OP_UU_H uint16_t, uint16_t, uint16_t
28
+#define OP_UU_W uint32_t, uint32_t, uint32_t
29
+#define OP_UU_D uint64_t, uint64_t, uint64_t
30
+
31
/* (TD, T1, T2, TX1, TX2) */
32
#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
33
#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
34
#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
35
#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
36
37
+#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
38
+static void do_##NAME(void *vd, void *vs2, int i) \
39
+{ \
40
+ TX2 s2 = *((T2 *)vs2 + HS2(i)); \
41
+ *((TD *)vd + HD(i)) = OP(s2); \
42
+}
43
+
44
+#define GEN_VEXT_V(NAME, ESZ) \
45
+void HELPER(NAME)(void *vd, void *v0, void *vs2, \
46
+ CPURISCVState *env, uint32_t desc) \
47
+{ \
48
+ uint32_t vm = vext_vm(desc); \
49
+ uint32_t vl = env->vl; \
50
+ uint32_t total_elems = \
51
+ vext_get_total_elems(env, desc, ESZ); \
52
+ uint32_t vta = vext_vta(desc); \
53
+ uint32_t vma = vext_vma(desc); \
54
+ uint32_t i; \
55
+ \
56
+ for (i = env->vstart; i < vl; i++) { \
57
+ if (!vm && !vext_elem_mask(v0, i)) { \
58
+ /* set masked-off elements to 1s */ \
59
+ vext_set_elems_1s(vd, vma, i * ESZ, \
60
+ (i + 1) * ESZ); \
61
+ continue; \
62
+ } \
63
+ do_##NAME(vd, vs2, i); \
64
+ } \
65
+ env->vstart = 0; \
66
+ /* set tail elements to 1s */ \
67
+ vext_set_elems_1s(vd, vta, vl * ESZ, \
68
+ total_elems * ESZ); \
69
+}
70
+
71
/* operation of two vector elements */
72
typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
73
74
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
75
do_##NAME, ESZ); \
76
}
77
78
+/* Three of the widening shortening macros: */
79
+/* (TD, T1, T2, TX1, TX2) */
80
+#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
81
+#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
82
+#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
83
+
84
#endif /* TARGET_RISCV_VECTOR_INTERNALS_H */
13
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
85
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
14
index XXXXXXX..XXXXXXX 100644
86
index XXXXXXX..XXXXXXX 100644
15
--- a/target/riscv/vector_helper.c
87
--- a/target/riscv/vector_helper.c
16
+++ b/target/riscv/vector_helper.c
88
+++ b/target/riscv/vector_helper.c
17
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
89
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
18
{ \
90
#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
19
uint32_t vm = vext_vm(desc); \
91
#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
20
uint32_t vl = env->vl; \
92
#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
21
+ uint32_t esz = sizeof(TD); \
93
-#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
22
+ uint32_t vlenb = simd_maxsz(desc); \
94
-#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
23
+ uint32_t vta = vext_vta(desc); \
95
-#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
24
uint32_t i; \
96
#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
25
TD s1 = *((TD *)vs1 + HD(0)); \
97
#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
26
\
98
#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
27
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
99
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_VF(vfwnmsac_vf_h, 4)
28
} \
100
GEN_VEXT_VF(vfwnmsac_vf_w, 8)
29
*((TD *)vd + HD(0)) = s1; \
101
30
env->vstart = 0; \
102
/* Vector Floating-Point Square-Root Instruction */
31
+ /* set tail elements to 1s */ \
103
-/* (TD, T2, TX2) */
32
+ vext_set_elems_1s(vd, vta, esz, vlenb); \
104
-#define OP_UU_H uint16_t, uint16_t, uint16_t
33
}
105
-#define OP_UU_W uint32_t, uint32_t, uint32_t
34
106
-#define OP_UU_D uint64_t, uint64_t, uint64_t
35
/* vd[0] = sum(vs1[0], vs2[*]) */
107
-
36
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
108
#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
37
{ \
109
static void do_##NAME(void *vd, void *vs2, int i, \
38
uint32_t vm = vext_vm(desc); \
110
CPURISCVState *env) \
39
uint32_t vl = env->vl; \
111
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
40
+ uint32_t esz = sizeof(TD); \
112
GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
41
+ uint32_t vlenb = simd_maxsz(desc); \
113
42
+ uint32_t vta = vext_vta(desc); \
114
/* Vector Floating-Point Classify Instruction */
43
uint32_t i; \
115
-#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
44
TD s1 = *((TD *)vs1 + HD(0)); \
116
-static void do_##NAME(void *vd, void *vs2, int i) \
45
\
117
-{ \
46
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
118
- TX2 s2 = *((T2 *)vs2 + HS2(i)); \
47
} \
119
- *((TD *)vd + HD(i)) = OP(s2); \
48
*((TD *)vd + HD(0)) = s1; \
120
-}
49
env->vstart = 0; \
121
-
50
+ /* set tail elements to 1s */ \
122
-#define GEN_VEXT_V(NAME, ESZ) \
51
+ vext_set_elems_1s(vd, vta, esz, vlenb); \
123
-void HELPER(NAME)(void *vd, void *v0, void *vs2, \
52
}
124
- CPURISCVState *env, uint32_t desc) \
53
125
-{ \
54
/* Unordered sum */
126
- uint32_t vm = vext_vm(desc); \
55
@@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
127
- uint32_t vl = env->vl; \
128
- uint32_t total_elems = \
129
- vext_get_total_elems(env, desc, ESZ); \
130
- uint32_t vta = vext_vta(desc); \
131
- uint32_t vma = vext_vma(desc); \
132
- uint32_t i; \
133
- \
134
- for (i = env->vstart; i < vl; i++) { \
135
- if (!vm && !vext_elem_mask(v0, i)) { \
136
- /* set masked-off elements to 1s */ \
137
- vext_set_elems_1s(vd, vma, i * ESZ, \
138
- (i + 1) * ESZ); \
139
- continue; \
140
- } \
141
- do_##NAME(vd, vs2, i); \
142
- } \
143
- env->vstart = 0; \
144
- /* set tail elements to 1s */ \
145
- vext_set_elems_1s(vd, vta, vl * ESZ, \
146
- total_elems * ESZ); \
147
-}
148
-
149
target_ulong fclass_h(uint64_t frs1)
56
{
150
{
57
uint32_t vm = vext_vm(desc);
151
float16 f = frs1;
58
uint32_t vl = env->vl;
59
+ uint32_t esz = sizeof(uint32_t);
60
+ uint32_t vlenb = simd_maxsz(desc);
61
+ uint32_t vta = vext_vta(desc);
62
uint32_t i;
63
uint32_t s1 = *((uint32_t *)vs1 + H4(0));
64
65
@@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
66
}
67
*((uint32_t *)vd + H4(0)) = s1;
68
env->vstart = 0;
69
+ /* set tail elements to 1s */
70
+ vext_set_elems_1s(vd, vta, esz, vlenb);
71
}
72
73
void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
74
@@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
75
{
76
uint32_t vm = vext_vm(desc);
77
uint32_t vl = env->vl;
78
+ uint32_t esz = sizeof(uint64_t);
79
+ uint32_t vlenb = simd_maxsz(desc);
80
+ uint32_t vta = vext_vta(desc);
81
uint32_t i;
82
uint64_t s1 = *((uint64_t *)vs1);
83
84
@@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
85
}
86
*((uint64_t *)vd) = s1;
87
env->vstart = 0;
88
+ /* set tail elements to 1s */
89
+ vext_set_elems_1s(vd, vta, esz, vlenb);
90
}
91
92
/*
93
--
152
--
94
2.36.1
153
2.41.0
diff view generated by jsdifflib
New patch
1
From: Dickon Hood <dickon.hood@codethink.co.uk>
1
2
3
This commit adds support for the Zvbb vector-crypto extension, which
4
consists of the following instructions:
5
6
* vrol.[vv,vx]
7
* vror.[vv,vx,vi]
8
* vbrev8.v
9
* vrev8.v
10
* vandn.[vv,vx]
11
* vbrev.v
12
* vclz.v
13
* vctz.v
14
* vcpop.v
15
* vwsll.[vv,vx,vi]
16
17
Translation functions are defined in
18
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
19
`target/riscv/vcrypto_helper.c`.
20
21
Co-authored-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
22
Co-authored-by: William Salmon <will.salmon@codethink.co.uk>
23
Co-authored-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
24
[max.chou@sifive.com: Fix imm mode of vror.vi]
25
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
26
Signed-off-by: William Salmon <will.salmon@codethink.co.uk>
27
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
28
Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
29
Signed-off-by: Max Chou <max.chou@sifive.com>
30
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
31
[max.chou@sifive.com: Exposed x-zvbb property]
32
Message-ID: <20230711165917.2629866-9-max.chou@sifive.com>
33
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
34
---
35
target/riscv/cpu_cfg.h | 1 +
36
target/riscv/helper.h | 62 +++++++++
37
target/riscv/insn32.decode | 20 +++
38
target/riscv/cpu.c | 12 ++
39
target/riscv/vcrypto_helper.c | 138 +++++++++++++++++++
40
target/riscv/insn_trans/trans_rvvk.c.inc | 164 +++++++++++++++++++++++
41
6 files changed, 397 insertions(+)
42
43
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/riscv/cpu_cfg.h
46
+++ b/target/riscv/cpu_cfg.h
47
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
48
bool ext_zve32f;
49
bool ext_zve64f;
50
bool ext_zve64d;
51
+ bool ext_zvbb;
52
bool ext_zvbc;
53
bool ext_zmmul;
54
bool ext_zvfbfmin;
55
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/riscv/helper.h
58
+++ b/target/riscv/helper.h
59
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
60
DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
61
DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32)
62
DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32)
63
+
64
+DEF_HELPER_6(vror_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
65
+DEF_HELPER_6(vror_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
66
+DEF_HELPER_6(vror_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
67
+DEF_HELPER_6(vror_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
68
+
69
+DEF_HELPER_6(vror_vx_b, void, ptr, ptr, tl, ptr, env, i32)
70
+DEF_HELPER_6(vror_vx_h, void, ptr, ptr, tl, ptr, env, i32)
71
+DEF_HELPER_6(vror_vx_w, void, ptr, ptr, tl, ptr, env, i32)
72
+DEF_HELPER_6(vror_vx_d, void, ptr, ptr, tl, ptr, env, i32)
73
+
74
+DEF_HELPER_6(vrol_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
75
+DEF_HELPER_6(vrol_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
76
+DEF_HELPER_6(vrol_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
77
+DEF_HELPER_6(vrol_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
78
+
79
+DEF_HELPER_6(vrol_vx_b, void, ptr, ptr, tl, ptr, env, i32)
80
+DEF_HELPER_6(vrol_vx_h, void, ptr, ptr, tl, ptr, env, i32)
81
+DEF_HELPER_6(vrol_vx_w, void, ptr, ptr, tl, ptr, env, i32)
82
+DEF_HELPER_6(vrol_vx_d, void, ptr, ptr, tl, ptr, env, i32)
83
+
84
+DEF_HELPER_5(vrev8_v_b, void, ptr, ptr, ptr, env, i32)
85
+DEF_HELPER_5(vrev8_v_h, void, ptr, ptr, ptr, env, i32)
86
+DEF_HELPER_5(vrev8_v_w, void, ptr, ptr, ptr, env, i32)
87
+DEF_HELPER_5(vrev8_v_d, void, ptr, ptr, ptr, env, i32)
88
+DEF_HELPER_5(vbrev8_v_b, void, ptr, ptr, ptr, env, i32)
89
+DEF_HELPER_5(vbrev8_v_h, void, ptr, ptr, ptr, env, i32)
90
+DEF_HELPER_5(vbrev8_v_w, void, ptr, ptr, ptr, env, i32)
91
+DEF_HELPER_5(vbrev8_v_d, void, ptr, ptr, ptr, env, i32)
92
+DEF_HELPER_5(vbrev_v_b, void, ptr, ptr, ptr, env, i32)
93
+DEF_HELPER_5(vbrev_v_h, void, ptr, ptr, ptr, env, i32)
94
+DEF_HELPER_5(vbrev_v_w, void, ptr, ptr, ptr, env, i32)
95
+DEF_HELPER_5(vbrev_v_d, void, ptr, ptr, ptr, env, i32)
96
+
97
+DEF_HELPER_5(vclz_v_b, void, ptr, ptr, ptr, env, i32)
98
+DEF_HELPER_5(vclz_v_h, void, ptr, ptr, ptr, env, i32)
99
+DEF_HELPER_5(vclz_v_w, void, ptr, ptr, ptr, env, i32)
100
+DEF_HELPER_5(vclz_v_d, void, ptr, ptr, ptr, env, i32)
101
+DEF_HELPER_5(vctz_v_b, void, ptr, ptr, ptr, env, i32)
102
+DEF_HELPER_5(vctz_v_h, void, ptr, ptr, ptr, env, i32)
103
+DEF_HELPER_5(vctz_v_w, void, ptr, ptr, ptr, env, i32)
104
+DEF_HELPER_5(vctz_v_d, void, ptr, ptr, ptr, env, i32)
105
+DEF_HELPER_5(vcpop_v_b, void, ptr, ptr, ptr, env, i32)
106
+DEF_HELPER_5(vcpop_v_h, void, ptr, ptr, ptr, env, i32)
107
+DEF_HELPER_5(vcpop_v_w, void, ptr, ptr, ptr, env, i32)
108
+DEF_HELPER_5(vcpop_v_d, void, ptr, ptr, ptr, env, i32)
109
+
110
+DEF_HELPER_6(vwsll_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
111
+DEF_HELPER_6(vwsll_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
112
+DEF_HELPER_6(vwsll_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
113
+DEF_HELPER_6(vwsll_vx_b, void, ptr, ptr, tl, ptr, env, i32)
114
+DEF_HELPER_6(vwsll_vx_h, void, ptr, ptr, tl, ptr, env, i32)
115
+DEF_HELPER_6(vwsll_vx_w, void, ptr, ptr, tl, ptr, env, i32)
116
+
117
+DEF_HELPER_6(vandn_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
118
+DEF_HELPER_6(vandn_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
119
+DEF_HELPER_6(vandn_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
120
+DEF_HELPER_6(vandn_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
121
+DEF_HELPER_6(vandn_vx_b, void, ptr, ptr, tl, ptr, env, i32)
122
+DEF_HELPER_6(vandn_vx_h, void, ptr, ptr, tl, ptr, env, i32)
123
+DEF_HELPER_6(vandn_vx_w, void, ptr, ptr, tl, ptr, env, i32)
124
+DEF_HELPER_6(vandn_vx_d, void, ptr, ptr, tl, ptr, env, i32)
125
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/riscv/insn32.decode
128
+++ b/target/riscv/insn32.decode
129
@@ -XXX,XX +XXX,XX @@
130
%imm_u 12:s20 !function=ex_shift_12
131
%imm_bs 30:2 !function=ex_shift_3
132
%imm_rnum 20:4
133
+%imm_z6 26:1 15:5
134
135
# Argument sets:
136
&empty
137
@@ -XXX,XX +XXX,XX @@
138
@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd
139
@r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd
140
@r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd
141
+@r2_zimm6 ..... . vm:1 ..... ..... ... ..... ....... &rmrr %rs2 rs1=%imm_z6 %rd
142
@r2_zimm11 . zimm:11 ..... ... ..... ....... %rs1 %rd
143
@r2_zimm10 .. zimm:10 ..... ... ..... ....... %rs1 %rd
144
@r2_s ....... ..... ..... ... ..... ....... %rs2 %rs1
145
@@ -XXX,XX +XXX,XX @@ vclmul_vv 001100 . ..... ..... 010 ..... 1010111 @r_vm
146
vclmul_vx 001100 . ..... ..... 110 ..... 1010111 @r_vm
147
vclmulh_vv 001101 . ..... ..... 010 ..... 1010111 @r_vm
148
vclmulh_vx 001101 . ..... ..... 110 ..... 1010111 @r_vm
149
+
150
+# *** Zvbb vector crypto extension ***
151
+vrol_vv 010101 . ..... ..... 000 ..... 1010111 @r_vm
152
+vrol_vx 010101 . ..... ..... 100 ..... 1010111 @r_vm
153
+vror_vv 010100 . ..... ..... 000 ..... 1010111 @r_vm
154
+vror_vx 010100 . ..... ..... 100 ..... 1010111 @r_vm
155
+vror_vi 01010. . ..... ..... 011 ..... 1010111 @r2_zimm6
156
+vbrev8_v 010010 . ..... 01000 010 ..... 1010111 @r2_vm
157
+vrev8_v 010010 . ..... 01001 010 ..... 1010111 @r2_vm
158
+vandn_vv 000001 . ..... ..... 000 ..... 1010111 @r_vm
159
+vandn_vx 000001 . ..... ..... 100 ..... 1010111 @r_vm
160
+vbrev_v 010010 . ..... 01010 010 ..... 1010111 @r2_vm
161
+vclz_v 010010 . ..... 01100 010 ..... 1010111 @r2_vm
162
+vctz_v 010010 . ..... 01101 010 ..... 1010111 @r2_vm
163
+vcpop_v 010010 . ..... 01110 010 ..... 1010111 @r2_vm
164
+vwsll_vv 110101 . ..... ..... 000 ..... 1010111 @r_vm
165
+vwsll_vx 110101 . ..... ..... 100 ..... 1010111 @r_vm
166
+vwsll_vi 110101 . ..... ..... 011 ..... 1010111 @r_vm
167
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
168
index XXXXXXX..XXXXXXX 100644
169
--- a/target/riscv/cpu.c
170
+++ b/target/riscv/cpu.c
171
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
172
ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed),
173
ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh),
174
ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt),
175
+ ISA_EXT_DATA_ENTRY(zvbb, PRIV_VERSION_1_12_0, ext_zvbb),
176
ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc),
177
ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f),
178
ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f),
179
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
180
return;
181
}
182
183
+ /*
184
+ * In principle Zve*x would also suffice here, were they supported
185
+ * in qemu
186
+ */
187
+ if (cpu->cfg.ext_zvbb && !cpu->cfg.ext_zve32f) {
188
+ error_setg(errp,
189
+ "Vector crypto extensions require V or Zve* extensions");
190
+ return;
191
+ }
192
+
193
if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) {
194
error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions");
195
return;
196
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
197
DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false),
198
199
/* Vector cryptography extensions */
200
+ DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false),
201
DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false),
202
203
DEFINE_PROP_END_OF_LIST(),
204
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
205
index XXXXXXX..XXXXXXX 100644
206
--- a/target/riscv/vcrypto_helper.c
207
+++ b/target/riscv/vcrypto_helper.c
208
@@ -XXX,XX +XXX,XX @@
209
#include "qemu/osdep.h"
210
#include "qemu/host-utils.h"
211
#include "qemu/bitops.h"
212
+#include "qemu/bswap.h"
213
#include "cpu.h"
214
#include "exec/memop.h"
215
#include "exec/exec-all.h"
216
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vclmulh_vv, OP_UUU_D, H8, H8, H8, clmulh64)
217
GEN_VEXT_VV(vclmulh_vv, 8)
218
RVVCALL(OPIVX2, vclmulh_vx, OP_UUU_D, H8, H8, clmulh64)
219
GEN_VEXT_VX(vclmulh_vx, 8)
220
+
221
+RVVCALL(OPIVV2, vror_vv_b, OP_UUU_B, H1, H1, H1, ror8)
222
+RVVCALL(OPIVV2, vror_vv_h, OP_UUU_H, H2, H2, H2, ror16)
223
+RVVCALL(OPIVV2, vror_vv_w, OP_UUU_W, H4, H4, H4, ror32)
224
+RVVCALL(OPIVV2, vror_vv_d, OP_UUU_D, H8, H8, H8, ror64)
225
+GEN_VEXT_VV(vror_vv_b, 1)
226
+GEN_VEXT_VV(vror_vv_h, 2)
227
+GEN_VEXT_VV(vror_vv_w, 4)
228
+GEN_VEXT_VV(vror_vv_d, 8)
229
+
230
+RVVCALL(OPIVX2, vror_vx_b, OP_UUU_B, H1, H1, ror8)
231
+RVVCALL(OPIVX2, vror_vx_h, OP_UUU_H, H2, H2, ror16)
232
+RVVCALL(OPIVX2, vror_vx_w, OP_UUU_W, H4, H4, ror32)
233
+RVVCALL(OPIVX2, vror_vx_d, OP_UUU_D, H8, H8, ror64)
234
+GEN_VEXT_VX(vror_vx_b, 1)
235
+GEN_VEXT_VX(vror_vx_h, 2)
236
+GEN_VEXT_VX(vror_vx_w, 4)
237
+GEN_VEXT_VX(vror_vx_d, 8)
238
+
239
+RVVCALL(OPIVV2, vrol_vv_b, OP_UUU_B, H1, H1, H1, rol8)
240
+RVVCALL(OPIVV2, vrol_vv_h, OP_UUU_H, H2, H2, H2, rol16)
241
+RVVCALL(OPIVV2, vrol_vv_w, OP_UUU_W, H4, H4, H4, rol32)
242
+RVVCALL(OPIVV2, vrol_vv_d, OP_UUU_D, H8, H8, H8, rol64)
243
+GEN_VEXT_VV(vrol_vv_b, 1)
244
+GEN_VEXT_VV(vrol_vv_h, 2)
245
+GEN_VEXT_VV(vrol_vv_w, 4)
246
+GEN_VEXT_VV(vrol_vv_d, 8)
247
+
248
+RVVCALL(OPIVX2, vrol_vx_b, OP_UUU_B, H1, H1, rol8)
249
+RVVCALL(OPIVX2, vrol_vx_h, OP_UUU_H, H2, H2, rol16)
250
+RVVCALL(OPIVX2, vrol_vx_w, OP_UUU_W, H4, H4, rol32)
251
+RVVCALL(OPIVX2, vrol_vx_d, OP_UUU_D, H8, H8, rol64)
252
+GEN_VEXT_VX(vrol_vx_b, 1)
253
+GEN_VEXT_VX(vrol_vx_h, 2)
254
+GEN_VEXT_VX(vrol_vx_w, 4)
255
+GEN_VEXT_VX(vrol_vx_d, 8)
256
+
257
+static uint64_t brev8(uint64_t val)
258
+{
259
+ val = ((val & 0x5555555555555555ull) << 1) |
260
+ ((val & 0xAAAAAAAAAAAAAAAAull) >> 1);
261
+ val = ((val & 0x3333333333333333ull) << 2) |
262
+ ((val & 0xCCCCCCCCCCCCCCCCull) >> 2);
263
+ val = ((val & 0x0F0F0F0F0F0F0F0Full) << 4) |
264
+ ((val & 0xF0F0F0F0F0F0F0F0ull) >> 4);
265
+
266
+ return val;
267
+}
268
+
269
+RVVCALL(OPIVV1, vbrev8_v_b, OP_UU_B, H1, H1, brev8)
270
+RVVCALL(OPIVV1, vbrev8_v_h, OP_UU_H, H2, H2, brev8)
271
+RVVCALL(OPIVV1, vbrev8_v_w, OP_UU_W, H4, H4, brev8)
272
+RVVCALL(OPIVV1, vbrev8_v_d, OP_UU_D, H8, H8, brev8)
273
+GEN_VEXT_V(vbrev8_v_b, 1)
274
+GEN_VEXT_V(vbrev8_v_h, 2)
275
+GEN_VEXT_V(vbrev8_v_w, 4)
276
+GEN_VEXT_V(vbrev8_v_d, 8)
277
+
278
+#define DO_IDENTITY(a) (a)
279
+RVVCALL(OPIVV1, vrev8_v_b, OP_UU_B, H1, H1, DO_IDENTITY)
280
+RVVCALL(OPIVV1, vrev8_v_h, OP_UU_H, H2, H2, bswap16)
281
+RVVCALL(OPIVV1, vrev8_v_w, OP_UU_W, H4, H4, bswap32)
282
+RVVCALL(OPIVV1, vrev8_v_d, OP_UU_D, H8, H8, bswap64)
283
+GEN_VEXT_V(vrev8_v_b, 1)
284
+GEN_VEXT_V(vrev8_v_h, 2)
285
+GEN_VEXT_V(vrev8_v_w, 4)
286
+GEN_VEXT_V(vrev8_v_d, 8)
287
+
288
+#define DO_ANDN(a, b) ((a) & ~(b))
289
+RVVCALL(OPIVV2, vandn_vv_b, OP_UUU_B, H1, H1, H1, DO_ANDN)
290
+RVVCALL(OPIVV2, vandn_vv_h, OP_UUU_H, H2, H2, H2, DO_ANDN)
291
+RVVCALL(OPIVV2, vandn_vv_w, OP_UUU_W, H4, H4, H4, DO_ANDN)
292
+RVVCALL(OPIVV2, vandn_vv_d, OP_UUU_D, H8, H8, H8, DO_ANDN)
293
+GEN_VEXT_VV(vandn_vv_b, 1)
294
+GEN_VEXT_VV(vandn_vv_h, 2)
295
+GEN_VEXT_VV(vandn_vv_w, 4)
296
+GEN_VEXT_VV(vandn_vv_d, 8)
297
+
298
+RVVCALL(OPIVX2, vandn_vx_b, OP_UUU_B, H1, H1, DO_ANDN)
299
+RVVCALL(OPIVX2, vandn_vx_h, OP_UUU_H, H2, H2, DO_ANDN)
300
+RVVCALL(OPIVX2, vandn_vx_w, OP_UUU_W, H4, H4, DO_ANDN)
301
+RVVCALL(OPIVX2, vandn_vx_d, OP_UUU_D, H8, H8, DO_ANDN)
302
+GEN_VEXT_VX(vandn_vx_b, 1)
303
+GEN_VEXT_VX(vandn_vx_h, 2)
304
+GEN_VEXT_VX(vandn_vx_w, 4)
305
+GEN_VEXT_VX(vandn_vx_d, 8)
306
+
307
+RVVCALL(OPIVV1, vbrev_v_b, OP_UU_B, H1, H1, revbit8)
308
+RVVCALL(OPIVV1, vbrev_v_h, OP_UU_H, H2, H2, revbit16)
309
+RVVCALL(OPIVV1, vbrev_v_w, OP_UU_W, H4, H4, revbit32)
310
+RVVCALL(OPIVV1, vbrev_v_d, OP_UU_D, H8, H8, revbit64)
311
+GEN_VEXT_V(vbrev_v_b, 1)
312
+GEN_VEXT_V(vbrev_v_h, 2)
313
+GEN_VEXT_V(vbrev_v_w, 4)
314
+GEN_VEXT_V(vbrev_v_d, 8)
315
+
316
+RVVCALL(OPIVV1, vclz_v_b, OP_UU_B, H1, H1, clz8)
317
+RVVCALL(OPIVV1, vclz_v_h, OP_UU_H, H2, H2, clz16)
318
+RVVCALL(OPIVV1, vclz_v_w, OP_UU_W, H4, H4, clz32)
319
+RVVCALL(OPIVV1, vclz_v_d, OP_UU_D, H8, H8, clz64)
320
+GEN_VEXT_V(vclz_v_b, 1)
321
+GEN_VEXT_V(vclz_v_h, 2)
322
+GEN_VEXT_V(vclz_v_w, 4)
323
+GEN_VEXT_V(vclz_v_d, 8)
324
+
325
+RVVCALL(OPIVV1, vctz_v_b, OP_UU_B, H1, H1, ctz8)
326
+RVVCALL(OPIVV1, vctz_v_h, OP_UU_H, H2, H2, ctz16)
327
+RVVCALL(OPIVV1, vctz_v_w, OP_UU_W, H4, H4, ctz32)
328
+RVVCALL(OPIVV1, vctz_v_d, OP_UU_D, H8, H8, ctz64)
329
+GEN_VEXT_V(vctz_v_b, 1)
330
+GEN_VEXT_V(vctz_v_h, 2)
331
+GEN_VEXT_V(vctz_v_w, 4)
332
+GEN_VEXT_V(vctz_v_d, 8)
333
+
334
+RVVCALL(OPIVV1, vcpop_v_b, OP_UU_B, H1, H1, ctpop8)
335
+RVVCALL(OPIVV1, vcpop_v_h, OP_UU_H, H2, H2, ctpop16)
336
+RVVCALL(OPIVV1, vcpop_v_w, OP_UU_W, H4, H4, ctpop32)
337
+RVVCALL(OPIVV1, vcpop_v_d, OP_UU_D, H8, H8, ctpop64)
338
+GEN_VEXT_V(vcpop_v_b, 1)
339
+GEN_VEXT_V(vcpop_v_h, 2)
340
+GEN_VEXT_V(vcpop_v_w, 4)
341
+GEN_VEXT_V(vcpop_v_d, 8)
342
+
343
+#define DO_SLL(N, M) (N << (M & (sizeof(N) * 8 - 1)))
344
+RVVCALL(OPIVV2, vwsll_vv_b, WOP_UUU_B, H2, H1, H1, DO_SLL)
345
+RVVCALL(OPIVV2, vwsll_vv_h, WOP_UUU_H, H4, H2, H2, DO_SLL)
346
+RVVCALL(OPIVV2, vwsll_vv_w, WOP_UUU_W, H8, H4, H4, DO_SLL)
347
+GEN_VEXT_VV(vwsll_vv_b, 2)
348
+GEN_VEXT_VV(vwsll_vv_h, 4)
349
+GEN_VEXT_VV(vwsll_vv_w, 8)
350
+
351
+RVVCALL(OPIVX2, vwsll_vx_b, WOP_UUU_B, H2, H1, DO_SLL)
352
+RVVCALL(OPIVX2, vwsll_vx_h, WOP_UUU_H, H4, H2, DO_SLL)
353
+RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4, DO_SLL)
354
+GEN_VEXT_VX(vwsll_vx_b, 2)
355
+GEN_VEXT_VX(vwsll_vx_h, 4)
356
+GEN_VEXT_VX(vwsll_vx_w, 8)
357
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
358
index XXXXXXX..XXXXXXX 100644
359
--- a/target/riscv/insn_trans/trans_rvvk.c.inc
360
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
361
@@ -XXX,XX +XXX,XX @@ static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a)
362
363
GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check)
364
GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check)
365
+
366
+/*
367
+ * Zvbb
368
+ */
369
+
370
+#define GEN_OPIVI_GVEC_TRANS_CHECK(NAME, IMM_MODE, OPIVX, SUF, CHECK) \
371
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
372
+ { \
373
+ if (CHECK(s, a)) { \
374
+ static gen_helper_opivx *const fns[4] = { \
375
+ gen_helper_##OPIVX##_b, \
376
+ gen_helper_##OPIVX##_h, \
377
+ gen_helper_##OPIVX##_w, \
378
+ gen_helper_##OPIVX##_d, \
379
+ }; \
380
+ return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew], \
381
+ IMM_MODE); \
382
+ } \
383
+ return false; \
384
+ }
385
+
386
+#define GEN_OPIVV_GVEC_TRANS_CHECK(NAME, SUF, CHECK) \
387
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
388
+ { \
389
+ if (CHECK(s, a)) { \
390
+ static gen_helper_gvec_4_ptr *const fns[4] = { \
391
+ gen_helper_##NAME##_b, \
392
+ gen_helper_##NAME##_h, \
393
+ gen_helper_##NAME##_w, \
394
+ gen_helper_##NAME##_d, \
395
+ }; \
396
+ return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
397
+ } \
398
+ return false; \
399
+ }
400
+
401
+#define GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(NAME, SUF, CHECK) \
402
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
403
+ { \
404
+ if (CHECK(s, a)) { \
405
+ static gen_helper_opivx *const fns[4] = { \
406
+ gen_helper_##NAME##_b, \
407
+ gen_helper_##NAME##_h, \
408
+ gen_helper_##NAME##_w, \
409
+ gen_helper_##NAME##_d, \
410
+ }; \
411
+ return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, \
412
+ fns[s->sew]); \
413
+ } \
414
+ return false; \
415
+ }
416
+
417
+static bool zvbb_vv_check(DisasContext *s, arg_rmrr *a)
418
+{
419
+ return opivv_check(s, a) && s->cfg_ptr->ext_zvbb == true;
420
+}
421
+
422
+static bool zvbb_vx_check(DisasContext *s, arg_rmrr *a)
423
+{
424
+ return opivx_check(s, a) && s->cfg_ptr->ext_zvbb == true;
425
+}
426
+
427
+/* vrol.v[vx] */
428
+GEN_OPIVV_GVEC_TRANS_CHECK(vrol_vv, rotlv, zvbb_vv_check)
429
+GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vrol_vx, rotls, zvbb_vx_check)
430
+
431
+/* vror.v[vxi] */
432
+GEN_OPIVV_GVEC_TRANS_CHECK(vror_vv, rotrv, zvbb_vv_check)
433
+GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vror_vx, rotrs, zvbb_vx_check)
434
+GEN_OPIVI_GVEC_TRANS_CHECK(vror_vi, IMM_TRUNC_SEW, vror_vx, rotri, zvbb_vx_check)
435
+
436
+#define GEN_OPIVX_GVEC_TRANS_CHECK(NAME, SUF, CHECK) \
437
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
438
+ { \
439
+ if (CHECK(s, a)) { \
440
+ static gen_helper_opivx *const fns[4] = { \
441
+ gen_helper_##NAME##_b, \
442
+ gen_helper_##NAME##_h, \
443
+ gen_helper_##NAME##_w, \
444
+ gen_helper_##NAME##_d, \
445
+ }; \
446
+ return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
447
+ } \
448
+ return false; \
449
+ }
450
+
451
+/* vandn.v[vx] */
452
+GEN_OPIVV_GVEC_TRANS_CHECK(vandn_vv, andc, zvbb_vv_check)
453
+GEN_OPIVX_GVEC_TRANS_CHECK(vandn_vx, andcs, zvbb_vx_check)
454
+
455
+#define GEN_OPIV_TRANS(NAME, CHECK) \
456
+ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
457
+ { \
458
+ if (CHECK(s, a)) { \
459
+ uint32_t data = 0; \
460
+ static gen_helper_gvec_3_ptr *const fns[4] = { \
461
+ gen_helper_##NAME##_b, \
462
+ gen_helper_##NAME##_h, \
463
+ gen_helper_##NAME##_w, \
464
+ gen_helper_##NAME##_d, \
465
+ }; \
466
+ TCGLabel *over = gen_new_label(); \
467
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
468
+ \
469
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
470
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
471
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
472
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \
473
+ data = FIELD_DP32(data, VDATA, VMA, s->vma); \
474
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
475
+ vreg_ofs(s, a->rs2), cpu_env, \
476
+ s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, \
477
+ data, fns[s->sew]); \
478
+ mark_vs_dirty(s); \
479
+ gen_set_label(over); \
480
+ return true; \
481
+ } \
482
+ return false; \
483
+ }
484
+
485
+static bool zvbb_opiv_check(DisasContext *s, arg_rmr *a)
486
+{
487
+ return s->cfg_ptr->ext_zvbb == true &&
488
+ require_rvv(s) &&
489
+ vext_check_isa_ill(s) &&
490
+ vext_check_ss(s, a->rd, a->rs2, a->vm);
491
+}
492
+
493
+GEN_OPIV_TRANS(vbrev8_v, zvbb_opiv_check)
494
+GEN_OPIV_TRANS(vrev8_v, zvbb_opiv_check)
495
+GEN_OPIV_TRANS(vbrev_v, zvbb_opiv_check)
496
+GEN_OPIV_TRANS(vclz_v, zvbb_opiv_check)
497
+GEN_OPIV_TRANS(vctz_v, zvbb_opiv_check)
498
+GEN_OPIV_TRANS(vcpop_v, zvbb_opiv_check)
499
+
500
+static bool vwsll_vv_check(DisasContext *s, arg_rmrr *a)
501
+{
502
+ return s->cfg_ptr->ext_zvbb && opivv_widen_check(s, a);
503
+}
504
+
505
+static bool vwsll_vx_check(DisasContext *s, arg_rmrr *a)
506
+{
507
+ return s->cfg_ptr->ext_zvbb && opivx_widen_check(s, a);
508
+}
509
+
510
+/* OPIVI without GVEC IR */
511
+#define GEN_OPIVI_WIDEN_TRANS(NAME, IMM_MODE, OPIVX, CHECK) \
512
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
513
+ { \
514
+ if (CHECK(s, a)) { \
515
+ static gen_helper_opivx *const fns[3] = { \
516
+ gen_helper_##OPIVX##_b, \
517
+ gen_helper_##OPIVX##_h, \
518
+ gen_helper_##OPIVX##_w, \
519
+ }; \
520
+ return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, \
521
+ IMM_MODE); \
522
+ } \
523
+ return false; \
524
+ }
525
+
526
+GEN_OPIVV_WIDEN_TRANS(vwsll_vv, vwsll_vv_check)
527
+GEN_OPIVX_WIDEN_TRANS(vwsll_vx, vwsll_vx_check)
528
+GEN_OPIVI_WIDEN_TRANS(vwsll_vi, IMM_ZX, vwsll_vx, vwsll_vx_check)
529
--
530
2.41.0
diff view generated by jsdifflib
New patch
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
1
2
3
This commit adds support for the Zvkned vector-crypto extension, which
4
consists of the following instructions:
5
6
* vaesef.[vv,vs]
7
* vaesdf.[vv,vs]
8
* vaesdm.[vv,vs]
9
* vaesz.vs
10
* vaesem.[vv,vs]
11
* vaeskf1.vi
12
* vaeskf2.vi
13
14
Translation functions are defined in
15
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
16
`target/riscv/vcrypto_helper.c`.
17
18
Co-authored-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
19
Co-authored-by: William Salmon <will.salmon@codethink.co.uk>
20
[max.chou@sifive.com: Replaced vstart checking by TCG op]
21
Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
22
Signed-off-by: William Salmon <will.salmon@codethink.co.uk>
23
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
24
Signed-off-by: Max Chou <max.chou@sifive.com>
25
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
26
[max.chou@sifive.com: Imported aes-round.h and exposed x-zvkned
27
property]
28
[max.chou@sifive.com: Fixed endian issues and replaced the vstart & vl
29
egs checking by helper function]
30
[max.chou@sifive.com: Replaced bswap32 calls in aes key expanding]
31
Message-ID: <20230711165917.2629866-10-max.chou@sifive.com>
32
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
33
---
34
target/riscv/cpu_cfg.h | 1 +
35
target/riscv/helper.h | 14 ++
36
target/riscv/insn32.decode | 14 ++
37
target/riscv/cpu.c | 4 +-
38
target/riscv/vcrypto_helper.c | 202 +++++++++++++++++++++++
39
target/riscv/insn_trans/trans_rvvk.c.inc | 147 +++++++++++++++++
40
6 files changed, 381 insertions(+), 1 deletion(-)
41
42
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/target/riscv/cpu_cfg.h
45
+++ b/target/riscv/cpu_cfg.h
46
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
47
bool ext_zve64d;
48
bool ext_zvbb;
49
bool ext_zvbc;
50
+ bool ext_zvkned;
51
bool ext_zmmul;
52
bool ext_zvfbfmin;
53
bool ext_zvfbfwma;
54
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
55
index XXXXXXX..XXXXXXX 100644
56
--- a/target/riscv/helper.h
57
+++ b/target/riscv/helper.h
58
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vandn_vx_b, void, ptr, ptr, tl, ptr, env, i32)
59
DEF_HELPER_6(vandn_vx_h, void, ptr, ptr, tl, ptr, env, i32)
60
DEF_HELPER_6(vandn_vx_w, void, ptr, ptr, tl, ptr, env, i32)
61
DEF_HELPER_6(vandn_vx_d, void, ptr, ptr, tl, ptr, env, i32)
62
+
63
+DEF_HELPER_2(egs_check, void, i32, env)
64
+
65
+DEF_HELPER_4(vaesef_vv, void, ptr, ptr, env, i32)
66
+DEF_HELPER_4(vaesef_vs, void, ptr, ptr, env, i32)
67
+DEF_HELPER_4(vaesdf_vv, void, ptr, ptr, env, i32)
68
+DEF_HELPER_4(vaesdf_vs, void, ptr, ptr, env, i32)
69
+DEF_HELPER_4(vaesem_vv, void, ptr, ptr, env, i32)
70
+DEF_HELPER_4(vaesem_vs, void, ptr, ptr, env, i32)
71
+DEF_HELPER_4(vaesdm_vv, void, ptr, ptr, env, i32)
72
+DEF_HELPER_4(vaesdm_vs, void, ptr, ptr, env, i32)
73
+DEF_HELPER_4(vaesz_vs, void, ptr, ptr, env, i32)
74
+DEF_HELPER_5(vaeskf1_vi, void, ptr, ptr, i32, env, i32)
75
+DEF_HELPER_5(vaeskf2_vi, void, ptr, ptr, i32, env, i32)
76
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
77
index XXXXXXX..XXXXXXX 100644
78
--- a/target/riscv/insn32.decode
79
+++ b/target/riscv/insn32.decode
80
@@ -XXX,XX +XXX,XX @@
81
@r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd
82
@r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd
83
@r2 ....... ..... ..... ... ..... ....... &r2 %rs1 %rd
84
+@r2_vm_1 ...... . ..... ..... ... ..... ....... &rmr vm=1 %rs2 %rd
85
@r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd
86
@r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd
87
@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd
88
@@ -XXX,XX +XXX,XX @@ vcpop_v 010010 . ..... 01110 010 ..... 1010111 @r2_vm
89
vwsll_vv 110101 . ..... ..... 000 ..... 1010111 @r_vm
90
vwsll_vx 110101 . ..... ..... 100 ..... 1010111 @r_vm
91
vwsll_vi 110101 . ..... ..... 011 ..... 1010111 @r_vm
92
+
93
+# *** Zvkned vector crypto extension ***
94
+vaesef_vv 101000 1 ..... 00011 010 ..... 1110111 @r2_vm_1
95
+vaesef_vs 101001 1 ..... 00011 010 ..... 1110111 @r2_vm_1
96
+vaesdf_vv 101000 1 ..... 00001 010 ..... 1110111 @r2_vm_1
97
+vaesdf_vs 101001 1 ..... 00001 010 ..... 1110111 @r2_vm_1
98
+vaesem_vv 101000 1 ..... 00010 010 ..... 1110111 @r2_vm_1
99
+vaesem_vs 101001 1 ..... 00010 010 ..... 1110111 @r2_vm_1
100
+vaesdm_vv 101000 1 ..... 00000 010 ..... 1110111 @r2_vm_1
101
+vaesdm_vs 101001 1 ..... 00000 010 ..... 1110111 @r2_vm_1
102
+vaesz_vs 101001 1 ..... 00111 010 ..... 1110111 @r2_vm_1
103
+vaeskf1_vi 100010 1 ..... ..... 010 ..... 1110111 @r_vm_1
104
+vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1
105
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
106
index XXXXXXX..XXXXXXX 100644
107
--- a/target/riscv/cpu.c
108
+++ b/target/riscv/cpu.c
109
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
110
ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma),
111
ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
112
ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin),
113
+ ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned),
114
ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
115
ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
116
ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia),
117
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
118
* In principle Zve*x would also suffice here, were they supported
119
* in qemu
120
*/
121
- if (cpu->cfg.ext_zvbb && !cpu->cfg.ext_zve32f) {
122
+ if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned) && !cpu->cfg.ext_zve32f) {
123
error_setg(errp,
124
"Vector crypto extensions require V or Zve* extensions");
125
return;
126
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
127
/* Vector cryptography extensions */
128
DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false),
129
DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false),
130
+ DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false),
131
132
DEFINE_PROP_END_OF_LIST(),
133
};
134
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
135
index XXXXXXX..XXXXXXX 100644
136
--- a/target/riscv/vcrypto_helper.c
137
+++ b/target/riscv/vcrypto_helper.c
138
@@ -XXX,XX +XXX,XX @@
139
#include "qemu/bitops.h"
140
#include "qemu/bswap.h"
141
#include "cpu.h"
142
+#include "crypto/aes.h"
143
+#include "crypto/aes-round.h"
144
#include "exec/memop.h"
145
#include "exec/exec-all.h"
146
#include "exec/helper-proto.h"
147
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4, DO_SLL)
148
GEN_VEXT_VX(vwsll_vx_b, 2)
149
GEN_VEXT_VX(vwsll_vx_h, 4)
150
GEN_VEXT_VX(vwsll_vx_w, 8)
151
+
152
+void HELPER(egs_check)(uint32_t egs, CPURISCVState *env)
153
+{
154
+ uint32_t vl = env->vl;
155
+ uint32_t vstart = env->vstart;
156
+
157
+ if (vl % egs != 0 || vstart % egs != 0) {
158
+ riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
159
+ }
160
+}
161
+
162
+static inline void xor_round_key(AESState *round_state, AESState *round_key)
163
+{
164
+ round_state->v = round_state->v ^ round_key->v;
165
+}
166
+
167
+#define GEN_ZVKNED_HELPER_VV(NAME, ...) \
168
+ void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
169
+ uint32_t desc) \
170
+ { \
171
+ uint32_t vl = env->vl; \
172
+ uint32_t total_elems = vext_get_total_elems(env, desc, 4); \
173
+ uint32_t vta = vext_vta(desc); \
174
+ \
175
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \
176
+ AESState round_key; \
177
+ round_key.d[0] = *((uint64_t *)vs2 + H8(i * 2 + 0)); \
178
+ round_key.d[1] = *((uint64_t *)vs2 + H8(i * 2 + 1)); \
179
+ AESState round_state; \
180
+ round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0)); \
181
+ round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1)); \
182
+ __VA_ARGS__; \
183
+ *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0]; \
184
+ *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1]; \
185
+ } \
186
+ env->vstart = 0; \
187
+ /* set tail elements to 1s */ \
188
+ vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); \
189
+ }
190
+
191
+#define GEN_ZVKNED_HELPER_VS(NAME, ...) \
192
+ void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
193
+ uint32_t desc) \
194
+ { \
195
+ uint32_t vl = env->vl; \
196
+ uint32_t total_elems = vext_get_total_elems(env, desc, 4); \
197
+ uint32_t vta = vext_vta(desc); \
198
+ \
199
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \
200
+ AESState round_key; \
201
+ round_key.d[0] = *((uint64_t *)vs2 + H8(0)); \
202
+ round_key.d[1] = *((uint64_t *)vs2 + H8(1)); \
203
+ AESState round_state; \
204
+ round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0)); \
205
+ round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1)); \
206
+ __VA_ARGS__; \
207
+ *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0]; \
208
+ *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1]; \
209
+ } \
210
+ env->vstart = 0; \
211
+ /* set tail elements to 1s */ \
212
+ vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); \
213
+ }
214
+
215
+GEN_ZVKNED_HELPER_VV(vaesef_vv, aesenc_SB_SR_AK(&round_state,
216
+ &round_state,
217
+ &round_key,
218
+ false);)
219
+GEN_ZVKNED_HELPER_VS(vaesef_vs, aesenc_SB_SR_AK(&round_state,
220
+ &round_state,
221
+ &round_key,
222
+ false);)
223
+GEN_ZVKNED_HELPER_VV(vaesdf_vv, aesdec_ISB_ISR_AK(&round_state,
224
+ &round_state,
225
+ &round_key,
226
+ false);)
227
+GEN_ZVKNED_HELPER_VS(vaesdf_vs, aesdec_ISB_ISR_AK(&round_state,
228
+ &round_state,
229
+ &round_key,
230
+ false);)
231
+GEN_ZVKNED_HELPER_VV(vaesem_vv, aesenc_SB_SR_MC_AK(&round_state,
232
+ &round_state,
233
+ &round_key,
234
+ false);)
235
+GEN_ZVKNED_HELPER_VS(vaesem_vs, aesenc_SB_SR_MC_AK(&round_state,
236
+ &round_state,
237
+ &round_key,
238
+ false);)
239
+GEN_ZVKNED_HELPER_VV(vaesdm_vv, aesdec_ISB_ISR_AK_IMC(&round_state,
240
+ &round_state,
241
+ &round_key,
242
+ false);)
243
+GEN_ZVKNED_HELPER_VS(vaesdm_vs, aesdec_ISB_ISR_AK_IMC(&round_state,
244
+ &round_state,
245
+ &round_key,
246
+ false);)
247
+GEN_ZVKNED_HELPER_VS(vaesz_vs, xor_round_key(&round_state, &round_key);)
248
+
249
+void HELPER(vaeskf1_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
250
+ CPURISCVState *env, uint32_t desc)
251
+{
252
+ uint32_t *vd = vd_vptr;
253
+ uint32_t *vs2 = vs2_vptr;
254
+ uint32_t vl = env->vl;
255
+ uint32_t total_elems = vext_get_total_elems(env, desc, 4);
256
+ uint32_t vta = vext_vta(desc);
257
+
258
+ uimm &= 0b1111;
259
+ if (uimm > 10 || uimm == 0) {
260
+ uimm ^= 0b1000;
261
+ }
262
+
263
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
264
+ uint32_t rk[8], tmp;
265
+ static const uint32_t rcon[] = {
266
+ 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010,
267
+ 0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036,
268
+ };
269
+
270
+ rk[0] = vs2[i * 4 + H4(0)];
271
+ rk[1] = vs2[i * 4 + H4(1)];
272
+ rk[2] = vs2[i * 4 + H4(2)];
273
+ rk[3] = vs2[i * 4 + H4(3)];
274
+ tmp = ror32(rk[3], 8);
275
+
276
+ rk[4] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) |
277
+ ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) |
278
+ ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) |
279
+ ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0))
280
+ ^ rcon[uimm - 1];
281
+ rk[5] = rk[1] ^ rk[4];
282
+ rk[6] = rk[2] ^ rk[5];
283
+ rk[7] = rk[3] ^ rk[6];
284
+
285
+ vd[i * 4 + H4(0)] = rk[4];
286
+ vd[i * 4 + H4(1)] = rk[5];
287
+ vd[i * 4 + H4(2)] = rk[6];
288
+ vd[i * 4 + H4(3)] = rk[7];
289
+ }
290
+ env->vstart = 0;
291
+ /* set tail elements to 1s */
292
+ vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4);
293
+}
294
+
295
+void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
296
+ CPURISCVState *env, uint32_t desc)
297
+{
298
+ uint32_t *vd = vd_vptr;
299
+ uint32_t *vs2 = vs2_vptr;
300
+ uint32_t vl = env->vl;
301
+ uint32_t total_elems = vext_get_total_elems(env, desc, 4);
302
+ uint32_t vta = vext_vta(desc);
303
+
304
+ uimm &= 0b1111;
305
+ if (uimm > 14 || uimm < 2) {
306
+ uimm ^= 0b1000;
307
+ }
308
+
309
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
310
+ uint32_t rk[12], tmp;
311
+ static const uint32_t rcon[] = {
312
+ 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010,
313
+ 0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036,
314
+ };
315
+
316
+ rk[0] = vd[i * 4 + H4(0)];
317
+ rk[1] = vd[i * 4 + H4(1)];
318
+ rk[2] = vd[i * 4 + H4(2)];
319
+ rk[3] = vd[i * 4 + H4(3)];
320
+ rk[4] = vs2[i * 4 + H4(0)];
321
+ rk[5] = vs2[i * 4 + H4(1)];
322
+ rk[6] = vs2[i * 4 + H4(2)];
323
+ rk[7] = vs2[i * 4 + H4(3)];
324
+
325
+ if (uimm % 2 == 0) {
326
+ tmp = ror32(rk[7], 8);
327
+ rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) |
328
+ ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) |
329
+ ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) |
330
+ ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0))
331
+ ^ rcon[(uimm - 1) / 2];
332
+ } else {
333
+ rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(rk[7] >> 24) & 0xff] << 24) |
334
+ ((uint32_t)AES_sbox[(rk[7] >> 16) & 0xff] << 16) |
335
+ ((uint32_t)AES_sbox[(rk[7] >> 8) & 0xff] << 8) |
336
+ ((uint32_t)AES_sbox[(rk[7] >> 0) & 0xff] << 0));
337
+ }
338
+ rk[9] = rk[1] ^ rk[8];
339
+ rk[10] = rk[2] ^ rk[9];
340
+ rk[11] = rk[3] ^ rk[10];
341
+
342
+ vd[i * 4 + H4(0)] = rk[8];
343
+ vd[i * 4 + H4(1)] = rk[9];
344
+ vd[i * 4 + H4(2)] = rk[10];
345
+ vd[i * 4 + H4(3)] = rk[11];
346
+ }
347
+ env->vstart = 0;
348
+ /* set tail elements to 1s */
349
+ vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4);
350
+}
351
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
352
index XXXXXXX..XXXXXXX 100644
353
--- a/target/riscv/insn_trans/trans_rvvk.c.inc
354
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
355
@@ -XXX,XX +XXX,XX @@ static bool vwsll_vx_check(DisasContext *s, arg_rmrr *a)
356
GEN_OPIVV_WIDEN_TRANS(vwsll_vv, vwsll_vv_check)
357
GEN_OPIVX_WIDEN_TRANS(vwsll_vx, vwsll_vx_check)
358
GEN_OPIVI_WIDEN_TRANS(vwsll_vi, IMM_ZX, vwsll_vx, vwsll_vx_check)
359
+
360
+/*
361
+ * Zvkned
362
+ */
363
+
364
+#define ZVKNED_EGS 4
365
+
366
+#define GEN_V_UNMASKED_TRANS(NAME, CHECK, EGS) \
367
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
368
+ { \
369
+ if (CHECK(s, a)) { \
370
+ TCGv_ptr rd_v, rs2_v; \
371
+ TCGv_i32 desc, egs; \
372
+ uint32_t data = 0; \
373
+ TCGLabel *over = gen_new_label(); \
374
+ \
375
+ if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \
376
+ /* save opcode for unwinding in case we throw an exception */ \
377
+ decode_save_opc(s); \
378
+ egs = tcg_constant_i32(EGS); \
379
+ gen_helper_egs_check(egs, cpu_env); \
380
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
381
+ } \
382
+ \
383
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
384
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
385
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
386
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \
387
+ data = FIELD_DP32(data, VDATA, VMA, s->vma); \
388
+ rd_v = tcg_temp_new_ptr(); \
389
+ rs2_v = tcg_temp_new_ptr(); \
390
+ desc = tcg_constant_i32( \
391
+ simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \
392
+ tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd)); \
393
+ tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2)); \
394
+ gen_helper_##NAME(rd_v, rs2_v, cpu_env, desc); \
395
+ mark_vs_dirty(s); \
396
+ gen_set_label(over); \
397
+ return true; \
398
+ } \
399
+ return false; \
400
+ }
401
+
402
+static bool vaes_check_vv(DisasContext *s, arg_rmr *a)
403
+{
404
+ int egw_bytes = ZVKNED_EGS << s->sew;
405
+ return s->cfg_ptr->ext_zvkned == true &&
406
+ require_rvv(s) &&
407
+ vext_check_isa_ill(s) &&
408
+ MAXSZ(s) >= egw_bytes &&
409
+ require_align(a->rd, s->lmul) &&
410
+ require_align(a->rs2, s->lmul) &&
411
+ s->sew == MO_32;
412
+}
413
+
414
+static bool vaes_check_overlap(DisasContext *s, int vd, int vs2)
415
+{
416
+ int8_t op_size = s->lmul <= 0 ? 1 : 1 << s->lmul;
417
+ return !is_overlapped(vd, op_size, vs2, 1);
418
+}
419
+
420
+static bool vaes_check_vs(DisasContext *s, arg_rmr *a)
421
+{
422
+ int egw_bytes = ZVKNED_EGS << s->sew;
423
+ return vaes_check_overlap(s, a->rd, a->rs2) &&
424
+ MAXSZ(s) >= egw_bytes &&
425
+ s->cfg_ptr->ext_zvkned == true &&
426
+ require_rvv(s) &&
427
+ vext_check_isa_ill(s) &&
428
+ require_align(a->rd, s->lmul) &&
429
+ s->sew == MO_32;
430
+}
431
+
432
+GEN_V_UNMASKED_TRANS(vaesef_vv, vaes_check_vv, ZVKNED_EGS)
433
+GEN_V_UNMASKED_TRANS(vaesef_vs, vaes_check_vs, ZVKNED_EGS)
434
+GEN_V_UNMASKED_TRANS(vaesdf_vv, vaes_check_vv, ZVKNED_EGS)
435
+GEN_V_UNMASKED_TRANS(vaesdf_vs, vaes_check_vs, ZVKNED_EGS)
436
+GEN_V_UNMASKED_TRANS(vaesdm_vv, vaes_check_vv, ZVKNED_EGS)
437
+GEN_V_UNMASKED_TRANS(vaesdm_vs, vaes_check_vs, ZVKNED_EGS)
438
+GEN_V_UNMASKED_TRANS(vaesz_vs, vaes_check_vs, ZVKNED_EGS)
439
+GEN_V_UNMASKED_TRANS(vaesem_vv, vaes_check_vv, ZVKNED_EGS)
440
+GEN_V_UNMASKED_TRANS(vaesem_vs, vaes_check_vs, ZVKNED_EGS)
441
+
442
+#define GEN_VI_UNMASKED_TRANS(NAME, CHECK, EGS) \
443
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
444
+ { \
445
+ if (CHECK(s, a)) { \
446
+ TCGv_ptr rd_v, rs2_v; \
447
+ TCGv_i32 uimm_v, desc, egs; \
448
+ uint32_t data = 0; \
449
+ TCGLabel *over = gen_new_label(); \
450
+ \
451
+ if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \
452
+ /* save opcode for unwinding in case we throw an exception */ \
453
+ decode_save_opc(s); \
454
+ egs = tcg_constant_i32(EGS); \
455
+ gen_helper_egs_check(egs, cpu_env); \
456
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
457
+ } \
458
+ \
459
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
460
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
461
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
462
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \
463
+ data = FIELD_DP32(data, VDATA, VMA, s->vma); \
464
+ \
465
+ rd_v = tcg_temp_new_ptr(); \
466
+ rs2_v = tcg_temp_new_ptr(); \
467
+ uimm_v = tcg_constant_i32(a->rs1); \
468
+ desc = tcg_constant_i32( \
469
+ simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \
470
+ tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd)); \
471
+ tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2)); \
472
+ gen_helper_##NAME(rd_v, rs2_v, uimm_v, cpu_env, desc); \
473
+ mark_vs_dirty(s); \
474
+ gen_set_label(over); \
475
+ return true; \
476
+ } \
477
+ return false; \
478
+ }
479
+
480
+static bool vaeskf1_check(DisasContext *s, arg_vaeskf1_vi *a)
481
+{
482
+ int egw_bytes = ZVKNED_EGS << s->sew;
483
+ return s->cfg_ptr->ext_zvkned == true &&
484
+ require_rvv(s) &&
485
+ vext_check_isa_ill(s) &&
486
+ MAXSZ(s) >= egw_bytes &&
487
+ s->sew == MO_32 &&
488
+ require_align(a->rd, s->lmul) &&
489
+ require_align(a->rs2, s->lmul);
490
+}
491
+
492
+static bool vaeskf2_check(DisasContext *s, arg_vaeskf2_vi *a)
493
+{
494
+ int egw_bytes = ZVKNED_EGS << s->sew;
495
+ return s->cfg_ptr->ext_zvkned == true &&
496
+ require_rvv(s) &&
497
+ vext_check_isa_ill(s) &&
498
+ MAXSZ(s) >= egw_bytes &&
499
+ s->sew == MO_32 &&
500
+ require_align(a->rd, s->lmul) &&
501
+ require_align(a->rs2, s->lmul);
502
+}
503
+
504
+GEN_VI_UNMASKED_TRANS(vaeskf1_vi, vaeskf1_check, ZVKNED_EGS)
505
+GEN_VI_UNMASKED_TRANS(vaeskf2_vi, vaeskf2_check, ZVKNED_EGS)
506
--
507
2.41.0
diff view generated by jsdifflib
New patch
1
From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
1
2
3
This commit adds support for the Zvknh vector-crypto extension, which
4
consists of the following instructions:
5
6
* vsha2ms.vv
7
* vsha2c[hl].vv
8
9
Translation functions are defined in
10
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
11
`target/riscv/vcrypto_helper.c`.
12
13
Co-authored-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
14
Co-authored-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
15
[max.chou@sifive.com: Replaced vstart checking by TCG op]
16
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
17
Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
18
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
19
Signed-off-by: Max Chou <max.chou@sifive.com>
20
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
21
[max.chou@sifive.com: Exposed x-zvknha & x-zvknhb properties]
22
[max.chou@sifive.com: Replaced SEW selection to happened during
23
translation]
24
Message-ID: <20230711165917.2629866-11-max.chou@sifive.com>
25
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
26
---
27
target/riscv/cpu_cfg.h | 2 +
28
target/riscv/helper.h | 6 +
29
target/riscv/insn32.decode | 5 +
30
target/riscv/cpu.c | 13 +-
31
target/riscv/vcrypto_helper.c | 238 +++++++++++++++++++++++
32
target/riscv/insn_trans/trans_rvvk.c.inc | 129 ++++++++++++
33
6 files changed, 390 insertions(+), 3 deletions(-)
34
35
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
36
index XXXXXXX..XXXXXXX 100644
37
--- a/target/riscv/cpu_cfg.h
38
+++ b/target/riscv/cpu_cfg.h
39
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
40
bool ext_zvbb;
41
bool ext_zvbc;
42
bool ext_zvkned;
43
+ bool ext_zvknha;
44
+ bool ext_zvknhb;
45
bool ext_zmmul;
46
bool ext_zvfbfmin;
47
bool ext_zvfbfwma;
48
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/riscv/helper.h
51
+++ b/target/riscv/helper.h
52
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vaesdm_vs, void, ptr, ptr, env, i32)
53
DEF_HELPER_4(vaesz_vs, void, ptr, ptr, env, i32)
54
DEF_HELPER_5(vaeskf1_vi, void, ptr, ptr, i32, env, i32)
55
DEF_HELPER_5(vaeskf2_vi, void, ptr, ptr, i32, env, i32)
56
+
57
+DEF_HELPER_5(vsha2ms_vv, void, ptr, ptr, ptr, env, i32)
58
+DEF_HELPER_5(vsha2ch32_vv, void, ptr, ptr, ptr, env, i32)
59
+DEF_HELPER_5(vsha2ch64_vv, void, ptr, ptr, ptr, env, i32)
60
+DEF_HELPER_5(vsha2cl32_vv, void, ptr, ptr, ptr, env, i32)
61
+DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32)
62
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/riscv/insn32.decode
65
+++ b/target/riscv/insn32.decode
66
@@ -XXX,XX +XXX,XX @@ vaesdm_vs 101001 1 ..... 00000 010 ..... 1110111 @r2_vm_1
67
vaesz_vs 101001 1 ..... 00111 010 ..... 1110111 @r2_vm_1
68
vaeskf1_vi 100010 1 ..... ..... 010 ..... 1110111 @r_vm_1
69
vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1
70
+
71
+# *** Zvknh vector crypto extension ***
72
+vsha2ms_vv 101101 1 ..... ..... 010 ..... 1110111 @r_vm_1
73
+vsha2ch_vv 101110 1 ..... ..... 010 ..... 1110111 @r_vm_1
74
+vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1
75
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/riscv/cpu.c
78
+++ b/target/riscv/cpu.c
79
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
80
ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
81
ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin),
82
ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned),
83
+ ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha),
84
+ ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb),
85
ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
86
ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
87
ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia),
88
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
89
* In principle Zve*x would also suffice here, were they supported
90
* in qemu
91
*/
92
- if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned) && !cpu->cfg.ext_zve32f) {
93
+ if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha) &&
94
+ !cpu->cfg.ext_zve32f) {
95
error_setg(errp,
96
"Vector crypto extensions require V or Zve* extensions");
97
return;
98
}
99
100
- if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) {
101
- error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions");
102
+ if ((cpu->cfg.ext_zvbc || cpu->cfg.ext_zvknhb) && !cpu->cfg.ext_zve64f) {
103
+ error_setg(
104
+ errp,
105
+ "Zvbc and Zvknhb extensions require V or Zve64{f,d} extensions");
106
return;
107
}
108
109
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
110
DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false),
111
DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false),
112
DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false),
113
+ DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false),
114
+ DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false),
115
116
DEFINE_PROP_END_OF_LIST(),
117
};
118
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
119
index XXXXXXX..XXXXXXX 100644
120
--- a/target/riscv/vcrypto_helper.c
121
+++ b/target/riscv/vcrypto_helper.c
122
@@ -XXX,XX +XXX,XX @@ void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
123
/* set tail elements to 1s */
124
vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4);
125
}
126
+
127
+static inline uint32_t sig0_sha256(uint32_t x)
128
+{
129
+ return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
130
+}
131
+
132
+static inline uint32_t sig1_sha256(uint32_t x)
133
+{
134
+ return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
135
+}
136
+
137
+static inline uint64_t sig0_sha512(uint64_t x)
138
+{
139
+ return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
140
+}
141
+
142
+static inline uint64_t sig1_sha512(uint64_t x)
143
+{
144
+ return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
145
+}
146
+
147
+static inline void vsha2ms_e32(uint32_t *vd, uint32_t *vs1, uint32_t *vs2)
148
+{
149
+ uint32_t res[4];
150
+ res[0] = sig1_sha256(vs1[H4(2)]) + vs2[H4(1)] + sig0_sha256(vd[H4(1)]) +
151
+ vd[H4(0)];
152
+ res[1] = sig1_sha256(vs1[H4(3)]) + vs2[H4(2)] + sig0_sha256(vd[H4(2)]) +
153
+ vd[H4(1)];
154
+ res[2] =
155
+ sig1_sha256(res[0]) + vs2[H4(3)] + sig0_sha256(vd[H4(3)]) + vd[H4(2)];
156
+ res[3] =
157
+ sig1_sha256(res[1]) + vs1[H4(0)] + sig0_sha256(vs2[H4(0)]) + vd[H4(3)];
158
+ vd[H4(3)] = res[3];
159
+ vd[H4(2)] = res[2];
160
+ vd[H4(1)] = res[1];
161
+ vd[H4(0)] = res[0];
162
+}
163
+
164
+static inline void vsha2ms_e64(uint64_t *vd, uint64_t *vs1, uint64_t *vs2)
165
+{
166
+ uint64_t res[4];
167
+ res[0] = sig1_sha512(vs1[2]) + vs2[1] + sig0_sha512(vd[1]) + vd[0];
168
+ res[1] = sig1_sha512(vs1[3]) + vs2[2] + sig0_sha512(vd[2]) + vd[1];
169
+ res[2] = sig1_sha512(res[0]) + vs2[3] + sig0_sha512(vd[3]) + vd[2];
170
+ res[3] = sig1_sha512(res[1]) + vs1[0] + sig0_sha512(vs2[0]) + vd[3];
171
+ vd[3] = res[3];
172
+ vd[2] = res[2];
173
+ vd[1] = res[1];
174
+ vd[0] = res[0];
175
+}
176
+
177
+void HELPER(vsha2ms_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
178
+ uint32_t desc)
179
+{
180
+ uint32_t sew = FIELD_EX64(env->vtype, VTYPE, VSEW);
181
+ uint32_t esz = sew == MO_32 ? 4 : 8;
182
+ uint32_t total_elems;
183
+ uint32_t vta = vext_vta(desc);
184
+
185
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
186
+ if (sew == MO_32) {
187
+ vsha2ms_e32(((uint32_t *)vd) + i * 4, ((uint32_t *)vs1) + i * 4,
188
+ ((uint32_t *)vs2) + i * 4);
189
+ } else {
190
+ /* If not 32 then SEW should be 64 */
191
+ vsha2ms_e64(((uint64_t *)vd) + i * 4, ((uint64_t *)vs1) + i * 4,
192
+ ((uint64_t *)vs2) + i * 4);
193
+ }
194
+ }
195
+ /* set tail elements to 1s */
196
+ total_elems = vext_get_total_elems(env, desc, esz);
197
+ vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
198
+ env->vstart = 0;
199
+}
200
+
201
+static inline uint64_t sum0_64(uint64_t x)
202
+{
203
+ return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
204
+}
205
+
206
+static inline uint32_t sum0_32(uint32_t x)
207
+{
208
+ return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
209
+}
210
+
211
+static inline uint64_t sum1_64(uint64_t x)
212
+{
213
+ return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
214
+}
215
+
216
+static inline uint32_t sum1_32(uint32_t x)
217
+{
218
+ return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
219
+}
220
+
221
+#define ch(x, y, z) ((x & y) ^ ((~x) & z))
222
+
223
+#define maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
224
+
225
+static void vsha2c_64(uint64_t *vs2, uint64_t *vd, uint64_t *vs1)
226
+{
227
+ uint64_t a = vs2[3], b = vs2[2], e = vs2[1], f = vs2[0];
228
+ uint64_t c = vd[3], d = vd[2], g = vd[1], h = vd[0];
229
+ uint64_t W0 = vs1[0], W1 = vs1[1];
230
+ uint64_t T1 = h + sum1_64(e) + ch(e, f, g) + W0;
231
+ uint64_t T2 = sum0_64(a) + maj(a, b, c);
232
+
233
+ h = g;
234
+ g = f;
235
+ f = e;
236
+ e = d + T1;
237
+ d = c;
238
+ c = b;
239
+ b = a;
240
+ a = T1 + T2;
241
+
242
+ T1 = h + sum1_64(e) + ch(e, f, g) + W1;
243
+ T2 = sum0_64(a) + maj(a, b, c);
244
+ h = g;
245
+ g = f;
246
+ f = e;
247
+ e = d + T1;
248
+ d = c;
249
+ c = b;
250
+ b = a;
251
+ a = T1 + T2;
252
+
253
+ vd[0] = f;
254
+ vd[1] = e;
255
+ vd[2] = b;
256
+ vd[3] = a;
257
+}
258
+
259
+static void vsha2c_32(uint32_t *vs2, uint32_t *vd, uint32_t *vs1)
260
+{
261
+ uint32_t a = vs2[H4(3)], b = vs2[H4(2)], e = vs2[H4(1)], f = vs2[H4(0)];
262
+ uint32_t c = vd[H4(3)], d = vd[H4(2)], g = vd[H4(1)], h = vd[H4(0)];
263
+ uint32_t W0 = vs1[H4(0)], W1 = vs1[H4(1)];
264
+ uint32_t T1 = h + sum1_32(e) + ch(e, f, g) + W0;
265
+ uint32_t T2 = sum0_32(a) + maj(a, b, c);
266
+
267
+ h = g;
268
+ g = f;
269
+ f = e;
270
+ e = d + T1;
271
+ d = c;
272
+ c = b;
273
+ b = a;
274
+ a = T1 + T2;
275
+
276
+ T1 = h + sum1_32(e) + ch(e, f, g) + W1;
277
+ T2 = sum0_32(a) + maj(a, b, c);
278
+ h = g;
279
+ g = f;
280
+ f = e;
281
+ e = d + T1;
282
+ d = c;
283
+ c = b;
284
+ b = a;
285
+ a = T1 + T2;
286
+
287
+ vd[H4(0)] = f;
288
+ vd[H4(1)] = e;
289
+ vd[H4(2)] = b;
290
+ vd[H4(3)] = a;
291
+}
292
+
293
+void HELPER(vsha2ch32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
294
+ uint32_t desc)
295
+{
296
+ const uint32_t esz = 4;
297
+ uint32_t total_elems;
298
+ uint32_t vta = vext_vta(desc);
299
+
300
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
301
+ vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i,
302
+ ((uint32_t *)vs1) + 4 * i + 2);
303
+ }
304
+
305
+ /* set tail elements to 1s */
306
+ total_elems = vext_get_total_elems(env, desc, esz);
307
+ vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
308
+ env->vstart = 0;
309
+}
310
+
311
+void HELPER(vsha2ch64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
312
+ uint32_t desc)
313
+{
314
+ const uint32_t esz = 8;
315
+ uint32_t total_elems;
316
+ uint32_t vta = vext_vta(desc);
317
+
318
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
319
+ vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i,
320
+ ((uint64_t *)vs1) + 4 * i + 2);
321
+ }
322
+
323
+ /* set tail elements to 1s */
324
+ total_elems = vext_get_total_elems(env, desc, esz);
325
+ vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
326
+ env->vstart = 0;
327
+}
328
+
329
+void HELPER(vsha2cl32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
330
+ uint32_t desc)
331
+{
332
+ const uint32_t esz = 4;
333
+ uint32_t total_elems;
334
+ uint32_t vta = vext_vta(desc);
335
+
336
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
337
+ vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i,
338
+ (((uint32_t *)vs1) + 4 * i));
339
+ }
340
+
341
+ /* set tail elements to 1s */
342
+ total_elems = vext_get_total_elems(env, desc, esz);
343
+ vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
344
+ env->vstart = 0;
345
+}
346
+
347
+void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
348
+ uint32_t desc)
349
+{
350
+ uint32_t esz = 8;
351
+ uint32_t total_elems;
352
+ uint32_t vta = vext_vta(desc);
353
+
354
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
355
+ vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i,
356
+ (((uint64_t *)vs1) + 4 * i));
357
+ }
358
+
359
+ /* set tail elements to 1s */
360
+ total_elems = vext_get_total_elems(env, desc, esz);
361
+ vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
362
+ env->vstart = 0;
363
+}
364
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
365
index XXXXXXX..XXXXXXX 100644
366
--- a/target/riscv/insn_trans/trans_rvvk.c.inc
367
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
368
@@ -XXX,XX +XXX,XX @@ static bool vaeskf2_check(DisasContext *s, arg_vaeskf2_vi *a)
369
370
GEN_VI_UNMASKED_TRANS(vaeskf1_vi, vaeskf1_check, ZVKNED_EGS)
371
GEN_VI_UNMASKED_TRANS(vaeskf2_vi, vaeskf2_check, ZVKNED_EGS)
372
+
373
+/*
374
+ * Zvknh
375
+ */
376
+
377
+#define ZVKNH_EGS 4
378
+
379
+#define GEN_VV_UNMASKED_TRANS(NAME, CHECK, EGS) \
380
+ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
381
+ { \
382
+ if (CHECK(s, a)) { \
383
+ uint32_t data = 0; \
384
+ TCGLabel *over = gen_new_label(); \
385
+ TCGv_i32 egs; \
386
+ \
387
+ if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \
388
+ /* save opcode for unwinding in case we throw an exception */ \
389
+ decode_save_opc(s); \
390
+ egs = tcg_constant_i32(EGS); \
391
+ gen_helper_egs_check(egs, cpu_env); \
392
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
393
+ } \
394
+ \
395
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
396
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
397
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
398
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \
399
+ data = FIELD_DP32(data, VDATA, VMA, s->vma); \
400
+ \
401
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), \
402
+ vreg_ofs(s, a->rs2), cpu_env, \
403
+ s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, \
404
+ data, gen_helper_##NAME); \
405
+ \
406
+ mark_vs_dirty(s); \
407
+ gen_set_label(over); \
408
+ return true; \
409
+ } \
410
+ return false; \
411
+ }
412
+
413
+static bool vsha_check_sew(DisasContext *s)
414
+{
415
+ return (s->cfg_ptr->ext_zvknha == true && s->sew == MO_32) ||
416
+ (s->cfg_ptr->ext_zvknhb == true &&
417
+ (s->sew == MO_32 || s->sew == MO_64));
418
+}
419
+
420
+static bool vsha_check(DisasContext *s, arg_rmrr *a)
421
+{
422
+ int egw_bytes = ZVKNH_EGS << s->sew;
423
+ int mult = 1 << MAX(s->lmul, 0);
424
+ return opivv_check(s, a) &&
425
+ vsha_check_sew(s) &&
426
+ MAXSZ(s) >= egw_bytes &&
427
+ !is_overlapped(a->rd, mult, a->rs1, mult) &&
428
+ !is_overlapped(a->rd, mult, a->rs2, mult) &&
429
+ s->lmul >= 0;
430
+}
431
+
432
+GEN_VV_UNMASKED_TRANS(vsha2ms_vv, vsha_check, ZVKNH_EGS)
433
+
434
+static bool trans_vsha2cl_vv(DisasContext *s, arg_rmrr *a)
435
+{
436
+ if (vsha_check(s, a)) {
437
+ uint32_t data = 0;
438
+ TCGLabel *over = gen_new_label();
439
+ TCGv_i32 egs;
440
+
441
+ if (!s->vstart_eq_zero || !s->vl_eq_vlmax) {
442
+ /* save opcode for unwinding in case we throw an exception */
443
+ decode_save_opc(s);
444
+ egs = tcg_constant_i32(ZVKNH_EGS);
445
+ gen_helper_egs_check(egs, cpu_env);
446
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
447
+ }
448
+
449
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
450
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
451
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
452
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
453
+ data = FIELD_DP32(data, VDATA, VMA, s->vma);
454
+
455
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
456
+ vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8,
457
+ s->cfg_ptr->vlen / 8, data,
458
+ s->sew == MO_32 ?
459
+ gen_helper_vsha2cl32_vv : gen_helper_vsha2cl64_vv);
460
+
461
+ mark_vs_dirty(s);
462
+ gen_set_label(over);
463
+ return true;
464
+ }
465
+ return false;
466
+}
467
+
468
+static bool trans_vsha2ch_vv(DisasContext *s, arg_rmrr *a)
469
+{
470
+ if (vsha_check(s, a)) {
471
+ uint32_t data = 0;
472
+ TCGLabel *over = gen_new_label();
473
+ TCGv_i32 egs;
474
+
475
+ if (!s->vstart_eq_zero || !s->vl_eq_vlmax) {
476
+ /* save opcode for unwinding in case we throw an exception */
477
+ decode_save_opc(s);
478
+ egs = tcg_constant_i32(ZVKNH_EGS);
479
+ gen_helper_egs_check(egs, cpu_env);
480
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
481
+ }
482
+
483
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
484
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
485
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
486
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
487
+ data = FIELD_DP32(data, VDATA, VMA, s->vma);
488
+
489
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
490
+ vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8,
491
+ s->cfg_ptr->vlen / 8, data,
492
+ s->sew == MO_32 ?
493
+ gen_helper_vsha2ch32_vv : gen_helper_vsha2ch64_vv);
494
+
495
+ mark_vs_dirty(s);
496
+ gen_set_label(over);
497
+ return true;
498
+ }
499
+ return false;
500
+}
501
--
502
2.41.0
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
2
2
3
Signed-off-by: eop Chen <eop.chen@sifive.com>
3
This commit adds support for the Zvksh vector-crypto extension, which
4
Reviewed-by: Frank Chang <frank.chang@sifive.com>
4
consists of the following instructions:
5
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
5
6
Acked-by: Alistair Francis <alistair.francis@wdc.com>
6
* vsm3me.vv
7
Message-Id: <165449614532.19704.7000832880482980398-11@git.sr.ht>
7
* vsm3c.vi
8
9
Translation functions are defined in
10
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
11
`target/riscv/vcrypto_helper.c`.
12
13
Co-authored-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
14
[max.chou@sifive.com: Replaced vstart checking by TCG op]
15
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
16
Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
17
Signed-off-by: Max Chou <max.chou@sifive.com>
18
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
19
[max.chou@sifive.com: Exposed x-zvksh property]
20
Message-ID: <20230711165917.2629866-12-max.chou@sifive.com>
8
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
21
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
9
---
22
---
10
target/riscv/vector_helper.c | 220 ++++++++++++++++++-----------------
23
target/riscv/cpu_cfg.h | 1 +
11
1 file changed, 114 insertions(+), 106 deletions(-)
24
target/riscv/helper.h | 3 +
12
25
target/riscv/insn32.decode | 4 +
13
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
26
target/riscv/cpu.c | 6 +-
14
index XXXXXXX..XXXXXXX 100644
27
target/riscv/vcrypto_helper.c | 134 +++++++++++++++++++++++
15
--- a/target/riscv/vector_helper.c
28
target/riscv/insn_trans/trans_rvvk.c.inc | 31 ++++++
16
+++ b/target/riscv/vector_helper.c
29
6 files changed, 177 insertions(+), 2 deletions(-)
17
@@ -XXX,XX +XXX,XX @@ static inline void
30
18
vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
31
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
19
CPURISCVState *env,
32
index XXXXXXX..XXXXXXX 100644
20
uint32_t desc,
33
--- a/target/riscv/cpu_cfg.h
21
- opivv2_rm_fn *fn)
34
+++ b/target/riscv/cpu_cfg.h
22
+ opivv2_rm_fn *fn, uint32_t esz)
35
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
23
{
36
bool ext_zvkned;
24
uint32_t vm = vext_vm(desc);
37
bool ext_zvknha;
25
uint32_t vl = env->vl;
38
bool ext_zvknhb;
39
+ bool ext_zvksh;
40
bool ext_zmmul;
41
bool ext_zvfbfmin;
42
bool ext_zvfbfwma;
43
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/riscv/helper.h
46
+++ b/target/riscv/helper.h
47
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vsha2ch32_vv, void, ptr, ptr, ptr, env, i32)
48
DEF_HELPER_5(vsha2ch64_vv, void, ptr, ptr, ptr, env, i32)
49
DEF_HELPER_5(vsha2cl32_vv, void, ptr, ptr, ptr, env, i32)
50
DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32)
51
+
52
+DEF_HELPER_5(vsm3me_vv, void, ptr, ptr, ptr, env, i32)
53
+DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32)
54
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
55
index XXXXXXX..XXXXXXX 100644
56
--- a/target/riscv/insn32.decode
57
+++ b/target/riscv/insn32.decode
58
@@ -XXX,XX +XXX,XX @@ vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1
59
vsha2ms_vv 101101 1 ..... ..... 010 ..... 1110111 @r_vm_1
60
vsha2ch_vv 101110 1 ..... ..... 010 ..... 1110111 @r_vm_1
61
vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1
62
+
63
+# *** Zvksh vector crypto extension ***
64
+vsm3me_vv 100000 1 ..... ..... 010 ..... 1110111 @r_vm_1
65
+vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1
66
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/riscv/cpu.c
69
+++ b/target/riscv/cpu.c
70
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
71
ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned),
72
ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha),
73
ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb),
74
+ ISA_EXT_DATA_ENTRY(zvksh, PRIV_VERSION_1_12_0, ext_zvksh),
75
ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
76
ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
77
ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia),
78
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
79
* In principle Zve*x would also suffice here, were they supported
80
* in qemu
81
*/
82
- if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha) &&
83
- !cpu->cfg.ext_zve32f) {
84
+ if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha ||
85
+ cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) {
86
error_setg(errp,
87
"Vector crypto extensions require V or Zve* extensions");
88
return;
89
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
90
DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false),
91
DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false),
92
DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false),
93
+ DEFINE_PROP_BOOL("x-zvksh", RISCVCPU, cfg.ext_zvksh, false),
94
95
DEFINE_PROP_END_OF_LIST(),
96
};
97
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/target/riscv/vcrypto_helper.c
100
+++ b/target/riscv/vcrypto_helper.c
101
@@ -XXX,XX +XXX,XX @@ void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
102
vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz);
103
env->vstart = 0;
104
}
105
+
106
+static inline uint32_t p1(uint32_t x)
107
+{
108
+ return x ^ rol32(x, 15) ^ rol32(x, 23);
109
+}
110
+
111
+static inline uint32_t zvksh_w(uint32_t m16, uint32_t m9, uint32_t m3,
112
+ uint32_t m13, uint32_t m6)
113
+{
114
+ return p1(m16 ^ m9 ^ rol32(m3, 15)) ^ rol32(m13, 7) ^ m6;
115
+}
116
+
117
+void HELPER(vsm3me_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr,
118
+ CPURISCVState *env, uint32_t desc)
119
+{
120
+ uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW));
26
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
121
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
27
+ uint32_t vta = vext_vta(desc);
122
+ uint32_t vta = vext_vta(desc);
28
123
+ uint32_t *vd = vd_vptr;
29
switch (env->vxrm) {
124
+ uint32_t *vs1 = vs1_vptr;
30
case 0: /* rnu */
125
+ uint32_t *vs2 = vs2_vptr;
31
@@ -XXX,XX +XXX,XX @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
126
+
32
env, vl, vm, 3, fn);
127
+ for (int i = env->vstart / 8; i < env->vl / 8; i++) {
33
break;
128
+ uint32_t w[24];
34
}
129
+ for (int j = 0; j < 8; j++) {
35
+ /* set tail elements to 1s */
130
+ w[j] = bswap32(vs1[H4((i * 8) + j)]);
36
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
131
+ w[j + 8] = bswap32(vs2[H4((i * 8) + j)]);
37
}
132
+ }
38
133
+ for (int j = 0; j < 8; j++) {
39
/* generate helpers for fixed point instructions with OPIVV format */
134
+ w[j + 16] =
40
-#define GEN_VEXT_VV_RM(NAME) \
135
+ zvksh_w(w[j], w[j + 7], w[j + 13], w[j + 3], w[j + 10]);
41
+#define GEN_VEXT_VV_RM(NAME, ESZ) \
136
+ }
42
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
137
+ for (int j = 0; j < 8; j++) {
43
CPURISCVState *env, uint32_t desc) \
138
+ vd[(i * 8) + j] = bswap32(w[H4(j + 16)]);
44
{ \
139
+ }
45
vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
140
+ }
46
- do_##NAME); \
141
+ vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz);
47
+ do_##NAME, ESZ); \
142
+ env->vstart = 0;
48
}
143
+}
49
144
+
50
static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
145
+static inline uint32_t ff1(uint32_t x, uint32_t y, uint32_t z)
51
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
146
+{
52
RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
147
+ return x ^ y ^ z;
53
RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
148
+}
54
RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
149
+
55
-GEN_VEXT_VV_RM(vsaddu_vv_b)
150
+static inline uint32_t ff2(uint32_t x, uint32_t y, uint32_t z)
56
-GEN_VEXT_VV_RM(vsaddu_vv_h)
151
+{
57
-GEN_VEXT_VV_RM(vsaddu_vv_w)
152
+ return (x & y) | (x & z) | (y & z);
58
-GEN_VEXT_VV_RM(vsaddu_vv_d)
153
+}
59
+GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
154
+
60
+GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
155
+static inline uint32_t ff_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j)
61
+GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
156
+{
62
+GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
157
+ return (j <= 15) ? ff1(x, y, z) : ff2(x, y, z);
63
158
+}
64
typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
159
+
65
CPURISCVState *env, int vxrm);
160
+static inline uint32_t gg1(uint32_t x, uint32_t y, uint32_t z)
66
@@ -XXX,XX +XXX,XX @@ static inline void
161
+{
67
vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
162
+ return x ^ y ^ z;
68
CPURISCVState *env,
163
+}
69
uint32_t desc,
164
+
70
- opivx2_rm_fn *fn)
165
+static inline uint32_t gg2(uint32_t x, uint32_t y, uint32_t z)
71
+ opivx2_rm_fn *fn, uint32_t esz)
166
+{
72
{
167
+ return (x & y) | (~x & z);
73
uint32_t vm = vext_vm(desc);
168
+}
74
uint32_t vl = env->vl;
169
+
170
+static inline uint32_t gg_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j)
171
+{
172
+ return (j <= 15) ? gg1(x, y, z) : gg2(x, y, z);
173
+}
174
+
175
+static inline uint32_t t_j(uint32_t j)
176
+{
177
+ return (j <= 15) ? 0x79cc4519 : 0x7a879d8a;
178
+}
179
+
180
+static inline uint32_t p_0(uint32_t x)
181
+{
182
+ return x ^ rol32(x, 9) ^ rol32(x, 17);
183
+}
184
+
185
+static void sm3c(uint32_t *vd, uint32_t *vs1, uint32_t *vs2, uint32_t uimm)
186
+{
187
+ uint32_t x0, x1;
188
+ uint32_t j;
189
+ uint32_t ss1, ss2, tt1, tt2;
190
+ x0 = vs2[0] ^ vs2[4];
191
+ x1 = vs2[1] ^ vs2[5];
192
+ j = 2 * uimm;
193
+ ss1 = rol32(rol32(vs1[0], 12) + vs1[4] + rol32(t_j(j), j % 32), 7);
194
+ ss2 = ss1 ^ rol32(vs1[0], 12);
195
+ tt1 = ff_j(vs1[0], vs1[1], vs1[2], j) + vs1[3] + ss2 + x0;
196
+ tt2 = gg_j(vs1[4], vs1[5], vs1[6], j) + vs1[7] + ss1 + vs2[0];
197
+ vs1[3] = vs1[2];
198
+ vd[3] = rol32(vs1[1], 9);
199
+ vs1[1] = vs1[0];
200
+ vd[1] = tt1;
201
+ vs1[7] = vs1[6];
202
+ vd[7] = rol32(vs1[5], 19);
203
+ vs1[5] = vs1[4];
204
+ vd[5] = p_0(tt2);
205
+ j = 2 * uimm + 1;
206
+ ss1 = rol32(rol32(vd[1], 12) + vd[5] + rol32(t_j(j), j % 32), 7);
207
+ ss2 = ss1 ^ rol32(vd[1], 12);
208
+ tt1 = ff_j(vd[1], vs1[1], vd[3], j) + vs1[3] + ss2 + x1;
209
+ tt2 = gg_j(vd[5], vs1[5], vd[7], j) + vs1[7] + ss1 + vs2[1];
210
+ vd[2] = rol32(vs1[1], 9);
211
+ vd[0] = tt1;
212
+ vd[6] = rol32(vs1[5], 19);
213
+ vd[4] = p_0(tt2);
214
+}
215
+
216
+void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
217
+ CPURISCVState *env, uint32_t desc)
218
+{
219
+ uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW));
75
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
220
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
76
+ uint32_t vta = vext_vta(desc);
221
+ uint32_t vta = vext_vta(desc);
77
222
+ uint32_t *vd = vd_vptr;
78
switch (env->vxrm) {
223
+ uint32_t *vs2 = vs2_vptr;
79
case 0: /* rnu */
224
+ uint32_t v1[8], v2[8], v3[8];
80
@@ -XXX,XX +XXX,XX @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
225
+
81
env, vl, vm, 3, fn);
226
+ for (int i = env->vstart / 8; i < env->vl / 8; i++) {
82
break;
227
+ for (int k = 0; k < 8; k++) {
228
+ v2[k] = bswap32(vd[H4(i * 8 + k)]);
229
+ v3[k] = bswap32(vs2[H4(i * 8 + k)]);
230
+ }
231
+ sm3c(v1, v2, v3, uimm);
232
+ for (int k = 0; k < 8; k++) {
233
+ vd[i * 8 + k] = bswap32(v1[H4(k)]);
234
+ }
235
+ }
236
+ vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz);
237
+ env->vstart = 0;
238
+}
239
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
240
index XXXXXXX..XXXXXXX 100644
241
--- a/target/riscv/insn_trans/trans_rvvk.c.inc
242
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
243
@@ -XXX,XX +XXX,XX @@ static bool trans_vsha2ch_vv(DisasContext *s, arg_rmrr *a)
83
}
244
}
84
+ /* set tail elements to 1s */
245
return false;
85
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
86
}
246
}
87
247
+
88
/* generate helpers for fixed point instructions with OPIVX format */
248
+/*
89
-#define GEN_VEXT_VX_RM(NAME) \
249
+ * Zvksh
90
+#define GEN_VEXT_VX_RM(NAME, ESZ) \
250
+ */
91
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
251
+
92
void *vs2, CPURISCVState *env, uint32_t desc) \
252
+#define ZVKSH_EGS 8
93
{ \
253
+
94
vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
254
+static inline bool vsm3_check(DisasContext *s, arg_rmrr *a)
95
- do_##NAME); \
255
+{
96
+ do_##NAME, ESZ); \
256
+ int egw_bytes = ZVKSH_EGS << s->sew;
97
}
257
+ int mult = 1 << MAX(s->lmul, 0);
98
258
+ return s->cfg_ptr->ext_zvksh == true &&
99
RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
259
+ require_rvv(s) &&
100
RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
260
+ vext_check_isa_ill(s) &&
101
RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
261
+ !is_overlapped(a->rd, mult, a->rs2, mult) &&
102
RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
262
+ MAXSZ(s) >= egw_bytes &&
103
-GEN_VEXT_VX_RM(vsaddu_vx_b)
263
+ s->sew == MO_32;
104
-GEN_VEXT_VX_RM(vsaddu_vx_h)
264
+}
105
-GEN_VEXT_VX_RM(vsaddu_vx_w)
265
+
106
-GEN_VEXT_VX_RM(vsaddu_vx_d)
266
+static inline bool vsm3me_check(DisasContext *s, arg_rmrr *a)
107
+GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
267
+{
108
+GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
268
+ return vsm3_check(s, a) && vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm);
109
+GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
269
+}
110
+GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
270
+
111
271
+static inline bool vsm3c_check(DisasContext *s, arg_rmrr *a)
112
static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
272
+{
113
{
273
+ return vsm3_check(s, a) && vext_check_ss(s, a->rd, a->rs2, a->vm);
114
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
274
+}
115
RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
275
+
116
RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
276
+GEN_VV_UNMASKED_TRANS(vsm3me_vv, vsm3me_check, ZVKSH_EGS)
117
RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
277
+GEN_VI_UNMASKED_TRANS(vsm3c_vi, vsm3c_check, ZVKSH_EGS)
118
-GEN_VEXT_VV_RM(vsadd_vv_b)
119
-GEN_VEXT_VV_RM(vsadd_vv_h)
120
-GEN_VEXT_VV_RM(vsadd_vv_w)
121
-GEN_VEXT_VV_RM(vsadd_vv_d)
122
+GEN_VEXT_VV_RM(vsadd_vv_b, 1)
123
+GEN_VEXT_VV_RM(vsadd_vv_h, 2)
124
+GEN_VEXT_VV_RM(vsadd_vv_w, 4)
125
+GEN_VEXT_VV_RM(vsadd_vv_d, 8)
126
127
RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
128
RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
129
RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
130
RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
131
-GEN_VEXT_VX_RM(vsadd_vx_b)
132
-GEN_VEXT_VX_RM(vsadd_vx_h)
133
-GEN_VEXT_VX_RM(vsadd_vx_w)
134
-GEN_VEXT_VX_RM(vsadd_vx_d)
135
+GEN_VEXT_VX_RM(vsadd_vx_b, 1)
136
+GEN_VEXT_VX_RM(vsadd_vx_h, 2)
137
+GEN_VEXT_VX_RM(vsadd_vx_w, 4)
138
+GEN_VEXT_VX_RM(vsadd_vx_d, 8)
139
140
static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
141
{
142
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
143
RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
144
RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
145
RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
146
-GEN_VEXT_VV_RM(vssubu_vv_b)
147
-GEN_VEXT_VV_RM(vssubu_vv_h)
148
-GEN_VEXT_VV_RM(vssubu_vv_w)
149
-GEN_VEXT_VV_RM(vssubu_vv_d)
150
+GEN_VEXT_VV_RM(vssubu_vv_b, 1)
151
+GEN_VEXT_VV_RM(vssubu_vv_h, 2)
152
+GEN_VEXT_VV_RM(vssubu_vv_w, 4)
153
+GEN_VEXT_VV_RM(vssubu_vv_d, 8)
154
155
RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
156
RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
157
RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
158
RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
159
-GEN_VEXT_VX_RM(vssubu_vx_b)
160
-GEN_VEXT_VX_RM(vssubu_vx_h)
161
-GEN_VEXT_VX_RM(vssubu_vx_w)
162
-GEN_VEXT_VX_RM(vssubu_vx_d)
163
+GEN_VEXT_VX_RM(vssubu_vx_b, 1)
164
+GEN_VEXT_VX_RM(vssubu_vx_h, 2)
165
+GEN_VEXT_VX_RM(vssubu_vx_w, 4)
166
+GEN_VEXT_VX_RM(vssubu_vx_d, 8)
167
168
static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
169
{
170
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
171
RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
172
RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
173
RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
174
-GEN_VEXT_VV_RM(vssub_vv_b)
175
-GEN_VEXT_VV_RM(vssub_vv_h)
176
-GEN_VEXT_VV_RM(vssub_vv_w)
177
-GEN_VEXT_VV_RM(vssub_vv_d)
178
+GEN_VEXT_VV_RM(vssub_vv_b, 1)
179
+GEN_VEXT_VV_RM(vssub_vv_h, 2)
180
+GEN_VEXT_VV_RM(vssub_vv_w, 4)
181
+GEN_VEXT_VV_RM(vssub_vv_d, 8)
182
183
RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
184
RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
185
RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
186
RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
187
-GEN_VEXT_VX_RM(vssub_vx_b)
188
-GEN_VEXT_VX_RM(vssub_vx_h)
189
-GEN_VEXT_VX_RM(vssub_vx_w)
190
-GEN_VEXT_VX_RM(vssub_vx_d)
191
+GEN_VEXT_VX_RM(vssub_vx_b, 1)
192
+GEN_VEXT_VX_RM(vssub_vx_h, 2)
193
+GEN_VEXT_VX_RM(vssub_vx_w, 4)
194
+GEN_VEXT_VX_RM(vssub_vx_d, 8)
195
196
/* Vector Single-Width Averaging Add and Subtract */
197
static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
198
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
199
RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
200
RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
201
RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
202
-GEN_VEXT_VV_RM(vaadd_vv_b)
203
-GEN_VEXT_VV_RM(vaadd_vv_h)
204
-GEN_VEXT_VV_RM(vaadd_vv_w)
205
-GEN_VEXT_VV_RM(vaadd_vv_d)
206
+GEN_VEXT_VV_RM(vaadd_vv_b, 1)
207
+GEN_VEXT_VV_RM(vaadd_vv_h, 2)
208
+GEN_VEXT_VV_RM(vaadd_vv_w, 4)
209
+GEN_VEXT_VV_RM(vaadd_vv_d, 8)
210
211
RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
212
RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
213
RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
214
RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
215
-GEN_VEXT_VX_RM(vaadd_vx_b)
216
-GEN_VEXT_VX_RM(vaadd_vx_h)
217
-GEN_VEXT_VX_RM(vaadd_vx_w)
218
-GEN_VEXT_VX_RM(vaadd_vx_d)
219
+GEN_VEXT_VX_RM(vaadd_vx_b, 1)
220
+GEN_VEXT_VX_RM(vaadd_vx_h, 2)
221
+GEN_VEXT_VX_RM(vaadd_vx_w, 4)
222
+GEN_VEXT_VX_RM(vaadd_vx_d, 8)
223
224
static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
225
uint32_t a, uint32_t b)
226
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
227
RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
228
RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
229
RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
230
-GEN_VEXT_VV_RM(vaaddu_vv_b)
231
-GEN_VEXT_VV_RM(vaaddu_vv_h)
232
-GEN_VEXT_VV_RM(vaaddu_vv_w)
233
-GEN_VEXT_VV_RM(vaaddu_vv_d)
234
+GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
235
+GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
236
+GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
237
+GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
238
239
RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
240
RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
241
RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
242
RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
243
-GEN_VEXT_VX_RM(vaaddu_vx_b)
244
-GEN_VEXT_VX_RM(vaaddu_vx_h)
245
-GEN_VEXT_VX_RM(vaaddu_vx_w)
246
-GEN_VEXT_VX_RM(vaaddu_vx_d)
247
+GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
248
+GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
249
+GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
250
+GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
251
252
static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
253
{
254
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
255
RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
256
RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
257
RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
258
-GEN_VEXT_VV_RM(vasub_vv_b)
259
-GEN_VEXT_VV_RM(vasub_vv_h)
260
-GEN_VEXT_VV_RM(vasub_vv_w)
261
-GEN_VEXT_VV_RM(vasub_vv_d)
262
+GEN_VEXT_VV_RM(vasub_vv_b, 1)
263
+GEN_VEXT_VV_RM(vasub_vv_h, 2)
264
+GEN_VEXT_VV_RM(vasub_vv_w, 4)
265
+GEN_VEXT_VV_RM(vasub_vv_d, 8)
266
267
RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
268
RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
269
RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
270
RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
271
-GEN_VEXT_VX_RM(vasub_vx_b)
272
-GEN_VEXT_VX_RM(vasub_vx_h)
273
-GEN_VEXT_VX_RM(vasub_vx_w)
274
-GEN_VEXT_VX_RM(vasub_vx_d)
275
+GEN_VEXT_VX_RM(vasub_vx_b, 1)
276
+GEN_VEXT_VX_RM(vasub_vx_h, 2)
277
+GEN_VEXT_VX_RM(vasub_vx_w, 4)
278
+GEN_VEXT_VX_RM(vasub_vx_d, 8)
279
280
static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
281
uint32_t a, uint32_t b)
282
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
283
RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
284
RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
285
RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
286
-GEN_VEXT_VV_RM(vasubu_vv_b)
287
-GEN_VEXT_VV_RM(vasubu_vv_h)
288
-GEN_VEXT_VV_RM(vasubu_vv_w)
289
-GEN_VEXT_VV_RM(vasubu_vv_d)
290
+GEN_VEXT_VV_RM(vasubu_vv_b, 1)
291
+GEN_VEXT_VV_RM(vasubu_vv_h, 2)
292
+GEN_VEXT_VV_RM(vasubu_vv_w, 4)
293
+GEN_VEXT_VV_RM(vasubu_vv_d, 8)
294
295
RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
296
RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
297
RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
298
RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
299
-GEN_VEXT_VX_RM(vasubu_vx_b)
300
-GEN_VEXT_VX_RM(vasubu_vx_h)
301
-GEN_VEXT_VX_RM(vasubu_vx_w)
302
-GEN_VEXT_VX_RM(vasubu_vx_d)
303
+GEN_VEXT_VX_RM(vasubu_vx_b, 1)
304
+GEN_VEXT_VX_RM(vasubu_vx_h, 2)
305
+GEN_VEXT_VX_RM(vasubu_vx_w, 4)
306
+GEN_VEXT_VX_RM(vasubu_vx_d, 8)
307
308
/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
309
static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
310
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
311
RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
312
RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
313
RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
314
-GEN_VEXT_VV_RM(vsmul_vv_b)
315
-GEN_VEXT_VV_RM(vsmul_vv_h)
316
-GEN_VEXT_VV_RM(vsmul_vv_w)
317
-GEN_VEXT_VV_RM(vsmul_vv_d)
318
+GEN_VEXT_VV_RM(vsmul_vv_b, 1)
319
+GEN_VEXT_VV_RM(vsmul_vv_h, 2)
320
+GEN_VEXT_VV_RM(vsmul_vv_w, 4)
321
+GEN_VEXT_VV_RM(vsmul_vv_d, 8)
322
323
RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
324
RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
325
RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
326
RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
327
-GEN_VEXT_VX_RM(vsmul_vx_b)
328
-GEN_VEXT_VX_RM(vsmul_vx_h)
329
-GEN_VEXT_VX_RM(vsmul_vx_w)
330
-GEN_VEXT_VX_RM(vsmul_vx_d)
331
+GEN_VEXT_VX_RM(vsmul_vx_b, 1)
332
+GEN_VEXT_VX_RM(vsmul_vx_h, 2)
333
+GEN_VEXT_VX_RM(vsmul_vx_w, 4)
334
+GEN_VEXT_VX_RM(vsmul_vx_d, 8)
335
336
/* Vector Single-Width Scaling Shift Instructions */
337
static inline uint8_t
338
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
339
RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
340
RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
341
RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
342
-GEN_VEXT_VV_RM(vssrl_vv_b)
343
-GEN_VEXT_VV_RM(vssrl_vv_h)
344
-GEN_VEXT_VV_RM(vssrl_vv_w)
345
-GEN_VEXT_VV_RM(vssrl_vv_d)
346
+GEN_VEXT_VV_RM(vssrl_vv_b, 1)
347
+GEN_VEXT_VV_RM(vssrl_vv_h, 2)
348
+GEN_VEXT_VV_RM(vssrl_vv_w, 4)
349
+GEN_VEXT_VV_RM(vssrl_vv_d, 8)
350
351
RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
352
RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
353
RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
354
RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
355
-GEN_VEXT_VX_RM(vssrl_vx_b)
356
-GEN_VEXT_VX_RM(vssrl_vx_h)
357
-GEN_VEXT_VX_RM(vssrl_vx_w)
358
-GEN_VEXT_VX_RM(vssrl_vx_d)
359
+GEN_VEXT_VX_RM(vssrl_vx_b, 1)
360
+GEN_VEXT_VX_RM(vssrl_vx_h, 2)
361
+GEN_VEXT_VX_RM(vssrl_vx_w, 4)
362
+GEN_VEXT_VX_RM(vssrl_vx_d, 8)
363
364
static inline int8_t
365
vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
366
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
367
RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
368
RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
369
RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
370
-GEN_VEXT_VV_RM(vssra_vv_b)
371
-GEN_VEXT_VV_RM(vssra_vv_h)
372
-GEN_VEXT_VV_RM(vssra_vv_w)
373
-GEN_VEXT_VV_RM(vssra_vv_d)
374
+GEN_VEXT_VV_RM(vssra_vv_b, 1)
375
+GEN_VEXT_VV_RM(vssra_vv_h, 2)
376
+GEN_VEXT_VV_RM(vssra_vv_w, 4)
377
+GEN_VEXT_VV_RM(vssra_vv_d, 8)
378
379
RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
380
RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
381
RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
382
RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
383
-GEN_VEXT_VX_RM(vssra_vx_b)
384
-GEN_VEXT_VX_RM(vssra_vx_h)
385
-GEN_VEXT_VX_RM(vssra_vx_w)
386
-GEN_VEXT_VX_RM(vssra_vx_d)
387
+GEN_VEXT_VX_RM(vssra_vx_b, 1)
388
+GEN_VEXT_VX_RM(vssra_vx_h, 2)
389
+GEN_VEXT_VX_RM(vssra_vx_w, 4)
390
+GEN_VEXT_VX_RM(vssra_vx_d, 8)
391
392
/* Vector Narrowing Fixed-Point Clip Instructions */
393
static inline int8_t
394
@@ -XXX,XX +XXX,XX @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
395
RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
396
RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
397
RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
398
-GEN_VEXT_VV_RM(vnclip_wv_b)
399
-GEN_VEXT_VV_RM(vnclip_wv_h)
400
-GEN_VEXT_VV_RM(vnclip_wv_w)
401
+GEN_VEXT_VV_RM(vnclip_wv_b, 1)
402
+GEN_VEXT_VV_RM(vnclip_wv_h, 2)
403
+GEN_VEXT_VV_RM(vnclip_wv_w, 4)
404
405
RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
406
RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
407
RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
408
-GEN_VEXT_VX_RM(vnclip_wx_b)
409
-GEN_VEXT_VX_RM(vnclip_wx_h)
410
-GEN_VEXT_VX_RM(vnclip_wx_w)
411
+GEN_VEXT_VX_RM(vnclip_wx_b, 1)
412
+GEN_VEXT_VX_RM(vnclip_wx_h, 2)
413
+GEN_VEXT_VX_RM(vnclip_wx_w, 4)
414
415
static inline uint8_t
416
vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
417
@@ -XXX,XX +XXX,XX @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
418
RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
419
RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
420
RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
421
-GEN_VEXT_VV_RM(vnclipu_wv_b)
422
-GEN_VEXT_VV_RM(vnclipu_wv_h)
423
-GEN_VEXT_VV_RM(vnclipu_wv_w)
424
+GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
425
+GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
426
+GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
427
428
RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
429
RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
430
RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
431
-GEN_VEXT_VX_RM(vnclipu_wx_b)
432
-GEN_VEXT_VX_RM(vnclipu_wx_h)
433
-GEN_VEXT_VX_RM(vnclipu_wx_w)
434
+GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
435
+GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
436
+GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
437
438
/*
439
*** Vector Float Point Arithmetic Instructions
440
--
278
--
441
2.36.1
279
2.41.0
diff view generated by jsdifflib
New patch
1
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
2
3
This commit adds support for the Zvkg vector-crypto extension, which
4
consists of the following instructions:
5
6
* vgmul.vv
7
* vghsh.vv
8
9
Translation functions are defined in
10
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
11
`target/riscv/vcrypto_helper.c`.
12
13
Co-authored-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
14
[max.chou@sifive.com: Replaced vstart checking by TCG op]
15
Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk>
16
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
17
Signed-off-by: Max Chou <max.chou@sifive.com>
18
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
19
[max.chou@sifive.com: Exposed x-zvkg property]
20
[max.chou@sifive.com: Replaced uint by int for cross win32 build]
21
Message-ID: <20230711165917.2629866-13-max.chou@sifive.com>
22
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
23
---
24
target/riscv/cpu_cfg.h | 1 +
25
target/riscv/helper.h | 3 +
26
target/riscv/insn32.decode | 4 ++
27
target/riscv/cpu.c | 6 +-
28
target/riscv/vcrypto_helper.c | 72 ++++++++++++++++++++++++
29
target/riscv/insn_trans/trans_rvvk.c.inc | 30 ++++++++++
30
6 files changed, 114 insertions(+), 2 deletions(-)
31
32
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/riscv/cpu_cfg.h
35
+++ b/target/riscv/cpu_cfg.h
36
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
37
bool ext_zve64d;
38
bool ext_zvbb;
39
bool ext_zvbc;
40
+ bool ext_zvkg;
41
bool ext_zvkned;
42
bool ext_zvknha;
43
bool ext_zvknhb;
44
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/riscv/helper.h
47
+++ b/target/riscv/helper.h
48
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32)
49
50
DEF_HELPER_5(vsm3me_vv, void, ptr, ptr, ptr, env, i32)
51
DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32)
52
+
53
+DEF_HELPER_5(vghsh_vv, void, ptr, ptr, ptr, env, i32)
54
+DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32)
55
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/riscv/insn32.decode
58
+++ b/target/riscv/insn32.decode
59
@@ -XXX,XX +XXX,XX @@ vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1
60
# *** Zvksh vector crypto extension ***
61
vsm3me_vv 100000 1 ..... ..... 010 ..... 1110111 @r_vm_1
62
vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1
63
+
64
+# *** Zvkg vector crypto extension ***
65
+vghsh_vv 101100 1 ..... ..... 010 ..... 1110111 @r_vm_1
66
+vgmul_vv 101000 1 ..... 10001 010 ..... 1110111 @r2_vm_1
67
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/target/riscv/cpu.c
70
+++ b/target/riscv/cpu.c
71
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
72
ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma),
73
ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
74
ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin),
75
+ ISA_EXT_DATA_ENTRY(zvkg, PRIV_VERSION_1_12_0, ext_zvkg),
76
ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned),
77
ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha),
78
ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb),
79
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
80
* In principle Zve*x would also suffice here, were they supported
81
* in qemu
82
*/
83
- if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha ||
84
- cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) {
85
+ if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned ||
86
+ cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) {
87
error_setg(errp,
88
"Vector crypto extensions require V or Zve* extensions");
89
return;
90
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
91
/* Vector cryptography extensions */
92
DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false),
93
DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false),
94
+ DEFINE_PROP_BOOL("x-zvkg", RISCVCPU, cfg.ext_zvkg, false),
95
DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false),
96
DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false),
97
DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false),
98
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/riscv/vcrypto_helper.c
101
+++ b/target/riscv/vcrypto_helper.c
102
@@ -XXX,XX +XXX,XX @@ void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
103
vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz);
104
env->vstart = 0;
105
}
106
+
107
+void HELPER(vghsh_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr,
108
+ CPURISCVState *env, uint32_t desc)
109
+{
110
+ uint64_t *vd = vd_vptr;
111
+ uint64_t *vs1 = vs1_vptr;
112
+ uint64_t *vs2 = vs2_vptr;
113
+ uint32_t vta = vext_vta(desc);
114
+ uint32_t total_elems = vext_get_total_elems(env, desc, 4);
115
+
116
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
117
+ uint64_t Y[2] = {vd[i * 2 + 0], vd[i * 2 + 1]};
118
+ uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])};
119
+ uint64_t X[2] = {vs1[i * 2 + 0], vs1[i * 2 + 1]};
120
+ uint64_t Z[2] = {0, 0};
121
+
122
+ uint64_t S[2] = {brev8(Y[0] ^ X[0]), brev8(Y[1] ^ X[1])};
123
+
124
+ for (int j = 0; j < 128; j++) {
125
+ if ((S[j / 64] >> (j % 64)) & 1) {
126
+ Z[0] ^= H[0];
127
+ Z[1] ^= H[1];
128
+ }
129
+ bool reduce = ((H[1] >> 63) & 1);
130
+ H[1] = H[1] << 1 | H[0] >> 63;
131
+ H[0] = H[0] << 1;
132
+ if (reduce) {
133
+ H[0] ^= 0x87;
134
+ }
135
+ }
136
+
137
+ vd[i * 2 + 0] = brev8(Z[0]);
138
+ vd[i * 2 + 1] = brev8(Z[1]);
139
+ }
140
+ /* set tail elements to 1s */
141
+ vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4);
142
+ env->vstart = 0;
143
+}
144
+
145
+void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env,
146
+ uint32_t desc)
147
+{
148
+ uint64_t *vd = vd_vptr;
149
+ uint64_t *vs2 = vs2_vptr;
150
+ uint32_t vta = vext_vta(desc);
151
+ uint32_t total_elems = vext_get_total_elems(env, desc, 4);
152
+
153
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
154
+ uint64_t Y[2] = {brev8(vd[i * 2 + 0]), brev8(vd[i * 2 + 1])};
155
+ uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])};
156
+ uint64_t Z[2] = {0, 0};
157
+
158
+ for (int j = 0; j < 128; j++) {
159
+ if ((Y[j / 64] >> (j % 64)) & 1) {
160
+ Z[0] ^= H[0];
161
+ Z[1] ^= H[1];
162
+ }
163
+ bool reduce = ((H[1] >> 63) & 1);
164
+ H[1] = H[1] << 1 | H[0] >> 63;
165
+ H[0] = H[0] << 1;
166
+ if (reduce) {
167
+ H[0] ^= 0x87;
168
+ }
169
+ }
170
+
171
+ vd[i * 2 + 0] = brev8(Z[0]);
172
+ vd[i * 2 + 1] = brev8(Z[1]);
173
+ }
174
+ /* set tail elements to 1s */
175
+ vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4);
176
+ env->vstart = 0;
177
+}
178
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
179
index XXXXXXX..XXXXXXX 100644
180
--- a/target/riscv/insn_trans/trans_rvvk.c.inc
181
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
182
@@ -XXX,XX +XXX,XX @@ static inline bool vsm3c_check(DisasContext *s, arg_rmrr *a)
183
184
GEN_VV_UNMASKED_TRANS(vsm3me_vv, vsm3me_check, ZVKSH_EGS)
185
GEN_VI_UNMASKED_TRANS(vsm3c_vi, vsm3c_check, ZVKSH_EGS)
186
+
187
+/*
188
+ * Zvkg
189
+ */
190
+
191
+#define ZVKG_EGS 4
192
+
193
+static bool vgmul_check(DisasContext *s, arg_rmr *a)
194
+{
195
+ int egw_bytes = ZVKG_EGS << s->sew;
196
+ return s->cfg_ptr->ext_zvkg == true &&
197
+ vext_check_isa_ill(s) &&
198
+ require_rvv(s) &&
199
+ MAXSZ(s) >= egw_bytes &&
200
+ vext_check_ss(s, a->rd, a->rs2, a->vm) &&
201
+ s->sew == MO_32;
202
+}
203
+
204
+GEN_V_UNMASKED_TRANS(vgmul_vv, vgmul_check, ZVKG_EGS)
205
+
206
+static bool vghsh_check(DisasContext *s, arg_rmrr *a)
207
+{
208
+ int egw_bytes = ZVKG_EGS << s->sew;
209
+ return s->cfg_ptr->ext_zvkg == true &&
210
+ opivv_check(s, a) &&
211
+ MAXSZ(s) >= egw_bytes &&
212
+ s->sew == MO_32;
213
+}
214
+
215
+GEN_VV_UNMASKED_TRANS(vghsh_vv, vghsh_check, ZVKG_EGS)
216
--
217
2.41.0
diff view generated by jsdifflib
New patch
1
From: Max Chou <max.chou@sifive.com>
1
2
3
Allows sharing of sm4_subword between different targets.
4
5
Signed-off-by: Max Chou <max.chou@sifive.com>
6
Reviewed-by: Frank Chang <frank.chang@sifive.com>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Max Chou <max.chou@sifive.com>
9
Message-ID: <20230711165917.2629866-14-max.chou@sifive.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
---
12
include/crypto/sm4.h | 8 ++++++++
13
target/arm/tcg/crypto_helper.c | 10 ++--------
14
2 files changed, 10 insertions(+), 8 deletions(-)
15
16
diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/crypto/sm4.h
19
+++ b/include/crypto/sm4.h
20
@@ -XXX,XX +XXX,XX @@
21
22
extern const uint8_t sm4_sbox[256];
23
24
+static inline uint32_t sm4_subword(uint32_t word)
25
+{
26
+ return sm4_sbox[word & 0xff] |
27
+ sm4_sbox[(word >> 8) & 0xff] << 8 |
28
+ sm4_sbox[(word >> 16) & 0xff] << 16 |
29
+ sm4_sbox[(word >> 24) & 0xff] << 24;
30
+}
31
+
32
#endif
33
diff --git a/target/arm/tcg/crypto_helper.c b/target/arm/tcg/crypto_helper.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/tcg/crypto_helper.c
36
+++ b/target/arm/tcg/crypto_helper.c
37
@@ -XXX,XX +XXX,XX @@ static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
38
CR_ST_WORD(d, (i + 3) % 4) ^
39
CR_ST_WORD(n, i);
40
41
- t = sm4_sbox[t & 0xff] |
42
- sm4_sbox[(t >> 8) & 0xff] << 8 |
43
- sm4_sbox[(t >> 16) & 0xff] << 16 |
44
- sm4_sbox[(t >> 24) & 0xff] << 24;
45
+ t = sm4_subword(t);
46
47
CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
48
rol32(t, 24);
49
@@ -XXX,XX +XXX,XX @@ static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
50
CR_ST_WORD(d, (i + 3) % 4) ^
51
CR_ST_WORD(m, i);
52
53
- t = sm4_sbox[t & 0xff] |
54
- sm4_sbox[(t >> 8) & 0xff] << 8 |
55
- sm4_sbox[(t >> 16) & 0xff] << 16 |
56
- sm4_sbox[(t >> 24) & 0xff] << 24;
57
+ t = sm4_subword(t);
58
59
CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
60
}
61
--
62
2.41.0
diff view generated by jsdifflib
New patch
1
From: Max Chou <max.chou@sifive.com>
1
2
3
Adds sm4_ck constant for use in sm4 cryptography across different targets.
4
5
Signed-off-by: Max Chou <max.chou@sifive.com>
6
Reviewed-by: Frank Chang <frank.chang@sifive.com>
7
Signed-off-by: Max Chou <max.chou@sifive.com>
8
Message-ID: <20230711165917.2629866-15-max.chou@sifive.com>
9
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
10
---
11
include/crypto/sm4.h | 1 +
12
crypto/sm4.c | 10 ++++++++++
13
2 files changed, 11 insertions(+)
14
15
diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/crypto/sm4.h
18
+++ b/include/crypto/sm4.h
19
@@ -XXX,XX +XXX,XX @@
20
#define QEMU_SM4_H
21
22
extern const uint8_t sm4_sbox[256];
23
+extern const uint32_t sm4_ck[32];
24
25
static inline uint32_t sm4_subword(uint32_t word)
26
{
27
diff --git a/crypto/sm4.c b/crypto/sm4.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/crypto/sm4.c
30
+++ b/crypto/sm4.c
31
@@ -XXX,XX +XXX,XX @@ uint8_t const sm4_sbox[] = {
32
0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
33
};
34
35
+uint32_t const sm4_ck[] = {
36
+ 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
37
+ 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
38
+ 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
39
+ 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
40
+ 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
41
+ 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
42
+ 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
43
+ 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
44
+};
45
--
46
2.41.0
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: Max Chou <max.chou@sifive.com>
2
2
3
Compares write mask registers, and so always operate under a tail-
3
This commit adds support for the Zvksed vector-crypto extension, which
4
agnostic policy.
4
consists of the following instructions:
5
5
6
Signed-off-by: eop Chen <eop.chen@sifive.com>
6
* vsm4k.vi
7
* vsm4r.[vv,vs]
8
9
Translation functions are defined in
10
`target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in
11
`target/riscv/vcrypto_helper.c`.
12
13
Signed-off-by: Max Chou <max.chou@sifive.com>
7
Reviewed-by: Frank Chang <frank.chang@sifive.com>
14
Reviewed-by: Frank Chang <frank.chang@sifive.com>
8
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
15
[lawrence.hunter@codethink.co.uk: Moved SM4 functions from
9
Acked-by: Alistair Francis <alistair.francis@wdc.com>
16
crypto_helper.c to vcrypto_helper.c]
10
Message-Id: <165449614532.19704.7000832880482980398-12@git.sr.ht>
17
[nazar.kazakov@codethink.co.uk: Added alignment checks, refactored code to
18
use macros, and minor style changes]
19
Signed-off-by: Max Chou <max.chou@sifive.com>
20
Message-ID: <20230711165917.2629866-16-max.chou@sifive.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
21
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
---
22
---
13
target/riscv/vector_helper.c | 440 +++++++++++++-----------
23
target/riscv/cpu_cfg.h | 1 +
14
target/riscv/insn_trans/trans_rvv.c.inc | 17 +
24
target/riscv/helper.h | 4 +
15
2 files changed, 261 insertions(+), 196 deletions(-)
25
target/riscv/insn32.decode | 5 +
16
26
target/riscv/cpu.c | 5 +-
17
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
27
target/riscv/vcrypto_helper.c | 127 +++++++++++++++++++++++
18
index XXXXXXX..XXXXXXX 100644
28
target/riscv/insn_trans/trans_rvvk.c.inc | 43 ++++++++
19
--- a/target/riscv/vector_helper.c
29
6 files changed, 184 insertions(+), 1 deletion(-)
20
+++ b/target/riscv/vector_helper.c
30
21
@@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
31
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
22
*((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
32
index XXXXXXX..XXXXXXX 100644
33
--- a/target/riscv/cpu_cfg.h
34
+++ b/target/riscv/cpu_cfg.h
35
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
36
bool ext_zvkned;
37
bool ext_zvknha;
38
bool ext_zvknhb;
39
+ bool ext_zvksed;
40
bool ext_zvksh;
41
bool ext_zmmul;
42
bool ext_zvfbfmin;
43
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/riscv/helper.h
46
+++ b/target/riscv/helper.h
47
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32)
48
49
DEF_HELPER_5(vghsh_vv, void, ptr, ptr, ptr, env, i32)
50
DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32)
51
+
52
+DEF_HELPER_5(vsm4k_vi, void, ptr, ptr, i32, env, i32)
53
+DEF_HELPER_4(vsm4r_vv, void, ptr, ptr, env, i32)
54
+DEF_HELPER_4(vsm4r_vs, void, ptr, ptr, env, i32)
55
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
56
index XXXXXXX..XXXXXXX 100644
57
--- a/target/riscv/insn32.decode
58
+++ b/target/riscv/insn32.decode
59
@@ -XXX,XX +XXX,XX @@ vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1
60
# *** Zvkg vector crypto extension ***
61
vghsh_vv 101100 1 ..... ..... 010 ..... 1110111 @r_vm_1
62
vgmul_vv 101000 1 ..... 10001 010 ..... 1110111 @r2_vm_1
63
+
64
+# *** Zvksed vector crypto extension ***
65
+vsm4k_vi 100001 1 ..... ..... 010 ..... 1110111 @r_vm_1
66
+vsm4r_vv 101000 1 ..... 10000 010 ..... 1110111 @r2_vm_1
67
+vsm4r_vs 101001 1 ..... 10000 010 ..... 1110111 @r2_vm_1
68
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/target/riscv/cpu.c
71
+++ b/target/riscv/cpu.c
72
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
73
ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned),
74
ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha),
75
ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb),
76
+ ISA_EXT_DATA_ENTRY(zvksed, PRIV_VERSION_1_12_0, ext_zvksed),
77
ISA_EXT_DATA_ENTRY(zvksh, PRIV_VERSION_1_12_0, ext_zvksh),
78
ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
79
ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
80
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
81
* in qemu
82
*/
83
if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned ||
84
- cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) {
85
+ cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed || cpu->cfg.ext_zvksh) &&
86
+ !cpu->cfg.ext_zve32f) {
87
error_setg(errp,
88
"Vector crypto extensions require V or Zve* extensions");
89
return;
90
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
91
DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false),
92
DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false),
93
DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false),
94
+ DEFINE_PROP_BOOL("x-zvksed", RISCVCPU, cfg.ext_zvksed, false),
95
DEFINE_PROP_BOOL("x-zvksh", RISCVCPU, cfg.ext_zvksh, false),
96
97
DEFINE_PROP_END_OF_LIST(),
98
diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/target/riscv/vcrypto_helper.c
101
+++ b/target/riscv/vcrypto_helper.c
102
@@ -XXX,XX +XXX,XX @@
103
#include "cpu.h"
104
#include "crypto/aes.h"
105
#include "crypto/aes-round.h"
106
+#include "crypto/sm4.h"
107
#include "exec/memop.h"
108
#include "exec/exec-all.h"
109
#include "exec/helper-proto.h"
110
@@ -XXX,XX +XXX,XX @@ void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env,
111
vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4);
112
env->vstart = 0;
23
}
113
}
24
114
+
25
-#define GEN_VEXT_VV_ENV(NAME) \
115
+void HELPER(vsm4k_vi)(void *vd, void *vs2, uint32_t uimm5, CPURISCVState *env,
26
+#define GEN_VEXT_VV_ENV(NAME, ESZ) \
116
+ uint32_t desc)
27
void HELPER(NAME)(void *vd, void *v0, void *vs1, \
117
+{
28
void *vs2, CPURISCVState *env, \
118
+ const uint32_t egs = 4;
29
uint32_t desc) \
119
+ uint32_t rnd = uimm5 & 0x7;
30
{ \
120
+ uint32_t group_start = env->vstart / egs;
31
uint32_t vm = vext_vm(desc); \
121
+ uint32_t group_end = env->vl / egs;
32
uint32_t vl = env->vl; \
122
+ uint32_t esz = sizeof(uint32_t);
33
+ uint32_t total_elems = \
123
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
34
+ vext_get_total_elems(env, desc, ESZ); \
124
+
35
+ uint32_t vta = vext_vta(desc); \
125
+ for (uint32_t i = group_start; i < group_end; ++i) {
36
uint32_t i; \
126
+ uint32_t vstart = i * egs;
37
\
127
+ uint32_t vend = (i + 1) * egs;
38
for (i = env->vstart; i < vl; i++) { \
128
+ uint32_t rk[4] = {0};
39
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
129
+ uint32_t tmp[8] = {0};
40
do_##NAME(vd, vs1, vs2, i, env); \
130
+
41
} \
131
+ for (uint32_t j = vstart; j < vend; ++j) {
42
env->vstart = 0; \
132
+ rk[j - vstart] = *((uint32_t *)vs2 + H4(j));
43
+ /* set tail elements to 1s */ \
133
+ }
44
+ vext_set_elems_1s(vd, vta, vl * ESZ, \
134
+
45
+ total_elems * ESZ); \
135
+ for (uint32_t j = 0; j < egs; ++j) {
136
+ tmp[j] = rk[j];
137
+ }
138
+
139
+ for (uint32_t j = 0; j < egs; ++j) {
140
+ uint32_t b, s;
141
+ b = tmp[j + 1] ^ tmp[j + 2] ^ tmp[j + 3] ^ sm4_ck[rnd * 4 + j];
142
+
143
+ s = sm4_subword(b);
144
+
145
+ tmp[j + 4] = tmp[j] ^ (s ^ rol32(s, 13) ^ rol32(s, 23));
146
+ }
147
+
148
+ for (uint32_t j = vstart; j < vend; ++j) {
149
+ *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)];
150
+ }
151
+ }
152
+
153
+ env->vstart = 0;
154
+ /* set tail elements to 1s */
155
+ vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz);
156
+}
157
+
158
+static void do_sm4_round(uint32_t *rk, uint32_t *buf)
159
+{
160
+ const uint32_t egs = 4;
161
+ uint32_t s, b;
162
+
163
+ for (uint32_t j = egs; j < egs * 2; ++j) {
164
+ b = buf[j - 3] ^ buf[j - 2] ^ buf[j - 1] ^ rk[j - 4];
165
+
166
+ s = sm4_subword(b);
167
+
168
+ buf[j] = buf[j - 4] ^ (s ^ rol32(s, 2) ^ rol32(s, 10) ^ rol32(s, 18) ^
169
+ rol32(s, 24));
170
+ }
171
+}
172
+
173
+void HELPER(vsm4r_vv)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
174
+{
175
+ const uint32_t egs = 4;
176
+ uint32_t group_start = env->vstart / egs;
177
+ uint32_t group_end = env->vl / egs;
178
+ uint32_t esz = sizeof(uint32_t);
179
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
180
+
181
+ for (uint32_t i = group_start; i < group_end; ++i) {
182
+ uint32_t vstart = i * egs;
183
+ uint32_t vend = (i + 1) * egs;
184
+ uint32_t rk[4] = {0};
185
+ uint32_t tmp[8] = {0};
186
+
187
+ for (uint32_t j = vstart; j < vend; ++j) {
188
+ rk[j - vstart] = *((uint32_t *)vs2 + H4(j));
189
+ }
190
+
191
+ for (uint32_t j = vstart; j < vend; ++j) {
192
+ tmp[j - vstart] = *((uint32_t *)vd + H4(j));
193
+ }
194
+
195
+ do_sm4_round(rk, tmp);
196
+
197
+ for (uint32_t j = vstart; j < vend; ++j) {
198
+ *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)];
199
+ }
200
+ }
201
+
202
+ env->vstart = 0;
203
+ /* set tail elements to 1s */
204
+ vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz);
205
+}
206
+
207
+void HELPER(vsm4r_vs)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
208
+{
209
+ const uint32_t egs = 4;
210
+ uint32_t group_start = env->vstart / egs;
211
+ uint32_t group_end = env->vl / egs;
212
+ uint32_t esz = sizeof(uint32_t);
213
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
214
+
215
+ for (uint32_t i = group_start; i < group_end; ++i) {
216
+ uint32_t vstart = i * egs;
217
+ uint32_t vend = (i + 1) * egs;
218
+ uint32_t rk[4] = {0};
219
+ uint32_t tmp[8] = {0};
220
+
221
+ for (uint32_t j = 0; j < egs; ++j) {
222
+ rk[j] = *((uint32_t *)vs2 + H4(j));
223
+ }
224
+
225
+ for (uint32_t j = vstart; j < vend; ++j) {
226
+ tmp[j - vstart] = *((uint32_t *)vd + H4(j));
227
+ }
228
+
229
+ do_sm4_round(rk, tmp);
230
+
231
+ for (uint32_t j = vstart; j < vend; ++j) {
232
+ *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)];
233
+ }
234
+ }
235
+
236
+ env->vstart = 0;
237
+ /* set tail elements to 1s */
238
+ vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz);
239
+}
240
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
241
index XXXXXXX..XXXXXXX 100644
242
--- a/target/riscv/insn_trans/trans_rvvk.c.inc
243
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
244
@@ -XXX,XX +XXX,XX @@ static bool vghsh_check(DisasContext *s, arg_rmrr *a)
46
}
245
}
47
246
48
RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
247
GEN_VV_UNMASKED_TRANS(vghsh_vv, vghsh_check, ZVKG_EGS)
49
RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
248
+
50
RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
249
+/*
51
-GEN_VEXT_VV_ENV(vfadd_vv_h)
250
+ * Zvksed
52
-GEN_VEXT_VV_ENV(vfadd_vv_w)
251
+ */
53
-GEN_VEXT_VV_ENV(vfadd_vv_d)
252
+
54
+GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
253
+#define ZVKSED_EGS 4
55
+GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
254
+
56
+GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
255
+static bool zvksed_check(DisasContext *s)
57
256
+{
58
#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
257
+ int egw_bytes = ZVKSED_EGS << s->sew;
59
static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
258
+ return s->cfg_ptr->ext_zvksed == true &&
60
@@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
259
+ require_rvv(s) &&
61
*((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
260
+ vext_check_isa_ill(s) &&
62
}
261
+ MAXSZ(s) >= egw_bytes &&
63
262
+ s->sew == MO_32;
64
-#define GEN_VEXT_VF(NAME) \
263
+}
65
+#define GEN_VEXT_VF(NAME, ESZ) \
264
+
66
void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
265
+static bool vsm4k_vi_check(DisasContext *s, arg_rmrr *a)
67
void *vs2, CPURISCVState *env, \
266
+{
68
uint32_t desc) \
267
+ return zvksed_check(s) &&
69
{ \
268
+ require_align(a->rd, s->lmul) &&
70
uint32_t vm = vext_vm(desc); \
269
+ require_align(a->rs2, s->lmul);
71
uint32_t vl = env->vl; \
270
+}
72
+ uint32_t total_elems = \
271
+
73
+ vext_get_total_elems(env, desc, ESZ); \
272
+GEN_VI_UNMASKED_TRANS(vsm4k_vi, vsm4k_vi_check, ZVKSED_EGS)
74
+ uint32_t vta = vext_vta(desc); \
273
+
75
uint32_t i; \
274
+static bool vsm4r_vv_check(DisasContext *s, arg_rmr *a)
76
\
275
+{
77
for (i = env->vstart; i < vl; i++) { \
276
+ return zvksed_check(s) &&
78
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
277
+ require_align(a->rd, s->lmul) &&
79
do_##NAME(vd, s1, vs2, i, env); \
278
+ require_align(a->rs2, s->lmul);
80
} \
279
+}
81
env->vstart = 0; \
280
+
82
+ /* set tail elements to 1s */ \
281
+GEN_V_UNMASKED_TRANS(vsm4r_vv, vsm4r_vv_check, ZVKSED_EGS)
83
+ vext_set_elems_1s(vd, vta, vl * ESZ, \
282
+
84
+ total_elems * ESZ); \
283
+static bool vsm4r_vs_check(DisasContext *s, arg_rmr *a)
85
}
284
+{
86
285
+ return zvksed_check(s) &&
87
RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
286
+ !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1) &&
88
RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
287
+ require_align(a->rd, s->lmul);
89
RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
288
+}
90
-GEN_VEXT_VF(vfadd_vf_h)
289
+
91
-GEN_VEXT_VF(vfadd_vf_w)
290
+GEN_V_UNMASKED_TRANS(vsm4r_vs, vsm4r_vs_check, ZVKSED_EGS)
92
-GEN_VEXT_VF(vfadd_vf_d)
93
+GEN_VEXT_VF(vfadd_vf_h, 2)
94
+GEN_VEXT_VF(vfadd_vf_w, 4)
95
+GEN_VEXT_VF(vfadd_vf_d, 8)
96
97
RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
98
RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
99
RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
100
-GEN_VEXT_VV_ENV(vfsub_vv_h)
101
-GEN_VEXT_VV_ENV(vfsub_vv_w)
102
-GEN_VEXT_VV_ENV(vfsub_vv_d)
103
+GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
104
+GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
105
+GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
106
RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
107
RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
108
RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
109
-GEN_VEXT_VF(vfsub_vf_h)
110
-GEN_VEXT_VF(vfsub_vf_w)
111
-GEN_VEXT_VF(vfsub_vf_d)
112
+GEN_VEXT_VF(vfsub_vf_h, 2)
113
+GEN_VEXT_VF(vfsub_vf_w, 4)
114
+GEN_VEXT_VF(vfsub_vf_d, 8)
115
116
static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
117
{
118
@@ -XXX,XX +XXX,XX @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
119
RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
120
RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
121
RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
122
-GEN_VEXT_VF(vfrsub_vf_h)
123
-GEN_VEXT_VF(vfrsub_vf_w)
124
-GEN_VEXT_VF(vfrsub_vf_d)
125
+GEN_VEXT_VF(vfrsub_vf_h, 2)
126
+GEN_VEXT_VF(vfrsub_vf_w, 4)
127
+GEN_VEXT_VF(vfrsub_vf_d, 8)
128
129
/* Vector Widening Floating-Point Add/Subtract Instructions */
130
static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
131
@@ -XXX,XX +XXX,XX @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
132
133
RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
134
RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
135
-GEN_VEXT_VV_ENV(vfwadd_vv_h)
136
-GEN_VEXT_VV_ENV(vfwadd_vv_w)
137
+GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
138
+GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
139
RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
140
RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
141
-GEN_VEXT_VF(vfwadd_vf_h)
142
-GEN_VEXT_VF(vfwadd_vf_w)
143
+GEN_VEXT_VF(vfwadd_vf_h, 4)
144
+GEN_VEXT_VF(vfwadd_vf_w, 8)
145
146
static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
147
{
148
@@ -XXX,XX +XXX,XX @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
149
150
RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
151
RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
152
-GEN_VEXT_VV_ENV(vfwsub_vv_h)
153
-GEN_VEXT_VV_ENV(vfwsub_vv_w)
154
+GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
155
+GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
156
RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
157
RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
158
-GEN_VEXT_VF(vfwsub_vf_h)
159
-GEN_VEXT_VF(vfwsub_vf_w)
160
+GEN_VEXT_VF(vfwsub_vf_h, 4)
161
+GEN_VEXT_VF(vfwsub_vf_w, 8)
162
163
static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
164
{
165
@@ -XXX,XX +XXX,XX @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
166
167
RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
168
RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
169
-GEN_VEXT_VV_ENV(vfwadd_wv_h)
170
-GEN_VEXT_VV_ENV(vfwadd_wv_w)
171
+GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
172
+GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
173
RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
174
RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
175
-GEN_VEXT_VF(vfwadd_wf_h)
176
-GEN_VEXT_VF(vfwadd_wf_w)
177
+GEN_VEXT_VF(vfwadd_wf_h, 4)
178
+GEN_VEXT_VF(vfwadd_wf_w, 8)
179
180
static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
181
{
182
@@ -XXX,XX +XXX,XX @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
183
184
RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
185
RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
186
-GEN_VEXT_VV_ENV(vfwsub_wv_h)
187
-GEN_VEXT_VV_ENV(vfwsub_wv_w)
188
+GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
189
+GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
190
RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
191
RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
192
-GEN_VEXT_VF(vfwsub_wf_h)
193
-GEN_VEXT_VF(vfwsub_wf_w)
194
+GEN_VEXT_VF(vfwsub_wf_h, 4)
195
+GEN_VEXT_VF(vfwsub_wf_w, 8)
196
197
/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
198
RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
199
RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
200
RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
201
-GEN_VEXT_VV_ENV(vfmul_vv_h)
202
-GEN_VEXT_VV_ENV(vfmul_vv_w)
203
-GEN_VEXT_VV_ENV(vfmul_vv_d)
204
+GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
205
+GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
206
+GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
207
RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
208
RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
209
RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
210
-GEN_VEXT_VF(vfmul_vf_h)
211
-GEN_VEXT_VF(vfmul_vf_w)
212
-GEN_VEXT_VF(vfmul_vf_d)
213
+GEN_VEXT_VF(vfmul_vf_h, 2)
214
+GEN_VEXT_VF(vfmul_vf_w, 4)
215
+GEN_VEXT_VF(vfmul_vf_d, 8)
216
217
RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
218
RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
219
RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
220
-GEN_VEXT_VV_ENV(vfdiv_vv_h)
221
-GEN_VEXT_VV_ENV(vfdiv_vv_w)
222
-GEN_VEXT_VV_ENV(vfdiv_vv_d)
223
+GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
224
+GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
225
+GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
226
RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
227
RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
228
RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
229
-GEN_VEXT_VF(vfdiv_vf_h)
230
-GEN_VEXT_VF(vfdiv_vf_w)
231
-GEN_VEXT_VF(vfdiv_vf_d)
232
+GEN_VEXT_VF(vfdiv_vf_h, 2)
233
+GEN_VEXT_VF(vfdiv_vf_w, 4)
234
+GEN_VEXT_VF(vfdiv_vf_d, 8)
235
236
static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
237
{
238
@@ -XXX,XX +XXX,XX @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
239
RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
240
RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
241
RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
242
-GEN_VEXT_VF(vfrdiv_vf_h)
243
-GEN_VEXT_VF(vfrdiv_vf_w)
244
-GEN_VEXT_VF(vfrdiv_vf_d)
245
+GEN_VEXT_VF(vfrdiv_vf_h, 2)
246
+GEN_VEXT_VF(vfrdiv_vf_w, 4)
247
+GEN_VEXT_VF(vfrdiv_vf_d, 8)
248
249
/* Vector Widening Floating-Point Multiply */
250
static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
251
@@ -XXX,XX +XXX,XX @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
252
}
253
RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
254
RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
255
-GEN_VEXT_VV_ENV(vfwmul_vv_h)
256
-GEN_VEXT_VV_ENV(vfwmul_vv_w)
257
+GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
258
+GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
259
RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
260
RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
261
-GEN_VEXT_VF(vfwmul_vf_h)
262
-GEN_VEXT_VF(vfwmul_vf_w)
263
+GEN_VEXT_VF(vfwmul_vf_h, 4)
264
+GEN_VEXT_VF(vfwmul_vf_w, 8)
265
266
/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
267
#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
268
@@ -XXX,XX +XXX,XX @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
269
RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
270
RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
271
RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
272
-GEN_VEXT_VV_ENV(vfmacc_vv_h)
273
-GEN_VEXT_VV_ENV(vfmacc_vv_w)
274
-GEN_VEXT_VV_ENV(vfmacc_vv_d)
275
+GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
276
+GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
277
+GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
278
279
#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
280
static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
281
@@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
282
RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
283
RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
284
RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
285
-GEN_VEXT_VF(vfmacc_vf_h)
286
-GEN_VEXT_VF(vfmacc_vf_w)
287
-GEN_VEXT_VF(vfmacc_vf_d)
288
+GEN_VEXT_VF(vfmacc_vf_h, 2)
289
+GEN_VEXT_VF(vfmacc_vf_w, 4)
290
+GEN_VEXT_VF(vfmacc_vf_d, 8)
291
292
static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
293
{
294
@@ -XXX,XX +XXX,XX @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
295
RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
296
RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
297
RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
298
-GEN_VEXT_VV_ENV(vfnmacc_vv_h)
299
-GEN_VEXT_VV_ENV(vfnmacc_vv_w)
300
-GEN_VEXT_VV_ENV(vfnmacc_vv_d)
301
+GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
302
+GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
303
+GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
304
RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
305
RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
306
RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
307
-GEN_VEXT_VF(vfnmacc_vf_h)
308
-GEN_VEXT_VF(vfnmacc_vf_w)
309
-GEN_VEXT_VF(vfnmacc_vf_d)
310
+GEN_VEXT_VF(vfnmacc_vf_h, 2)
311
+GEN_VEXT_VF(vfnmacc_vf_w, 4)
312
+GEN_VEXT_VF(vfnmacc_vf_d, 8)
313
314
static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
315
{
316
@@ -XXX,XX +XXX,XX @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
317
RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
318
RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
319
RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
320
-GEN_VEXT_VV_ENV(vfmsac_vv_h)
321
-GEN_VEXT_VV_ENV(vfmsac_vv_w)
322
-GEN_VEXT_VV_ENV(vfmsac_vv_d)
323
+GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
324
+GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
325
+GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
326
RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
327
RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
328
RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
329
-GEN_VEXT_VF(vfmsac_vf_h)
330
-GEN_VEXT_VF(vfmsac_vf_w)
331
-GEN_VEXT_VF(vfmsac_vf_d)
332
+GEN_VEXT_VF(vfmsac_vf_h, 2)
333
+GEN_VEXT_VF(vfmsac_vf_w, 4)
334
+GEN_VEXT_VF(vfmsac_vf_d, 8)
335
336
static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
337
{
338
@@ -XXX,XX +XXX,XX @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
339
RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
340
RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
341
RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
342
-GEN_VEXT_VV_ENV(vfnmsac_vv_h)
343
-GEN_VEXT_VV_ENV(vfnmsac_vv_w)
344
-GEN_VEXT_VV_ENV(vfnmsac_vv_d)
345
+GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
346
+GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
347
+GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
348
RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
349
RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
350
RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
351
-GEN_VEXT_VF(vfnmsac_vf_h)
352
-GEN_VEXT_VF(vfnmsac_vf_w)
353
-GEN_VEXT_VF(vfnmsac_vf_d)
354
+GEN_VEXT_VF(vfnmsac_vf_h, 2)
355
+GEN_VEXT_VF(vfnmsac_vf_w, 4)
356
+GEN_VEXT_VF(vfnmsac_vf_d, 8)
357
358
static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
359
{
360
@@ -XXX,XX +XXX,XX @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
361
RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
362
RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
363
RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
364
-GEN_VEXT_VV_ENV(vfmadd_vv_h)
365
-GEN_VEXT_VV_ENV(vfmadd_vv_w)
366
-GEN_VEXT_VV_ENV(vfmadd_vv_d)
367
+GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
368
+GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
369
+GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
370
RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
371
RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
372
RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
373
-GEN_VEXT_VF(vfmadd_vf_h)
374
-GEN_VEXT_VF(vfmadd_vf_w)
375
-GEN_VEXT_VF(vfmadd_vf_d)
376
+GEN_VEXT_VF(vfmadd_vf_h, 2)
377
+GEN_VEXT_VF(vfmadd_vf_w, 4)
378
+GEN_VEXT_VF(vfmadd_vf_d, 8)
379
380
static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
381
{
382
@@ -XXX,XX +XXX,XX @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
383
RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
384
RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
385
RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
386
-GEN_VEXT_VV_ENV(vfnmadd_vv_h)
387
-GEN_VEXT_VV_ENV(vfnmadd_vv_w)
388
-GEN_VEXT_VV_ENV(vfnmadd_vv_d)
389
+GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
390
+GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
391
+GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
392
RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
393
RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
394
RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
395
-GEN_VEXT_VF(vfnmadd_vf_h)
396
-GEN_VEXT_VF(vfnmadd_vf_w)
397
-GEN_VEXT_VF(vfnmadd_vf_d)
398
+GEN_VEXT_VF(vfnmadd_vf_h, 2)
399
+GEN_VEXT_VF(vfnmadd_vf_w, 4)
400
+GEN_VEXT_VF(vfnmadd_vf_d, 8)
401
402
static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
403
{
404
@@ -XXX,XX +XXX,XX @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
405
RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
406
RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
407
RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
408
-GEN_VEXT_VV_ENV(vfmsub_vv_h)
409
-GEN_VEXT_VV_ENV(vfmsub_vv_w)
410
-GEN_VEXT_VV_ENV(vfmsub_vv_d)
411
+GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
412
+GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
413
+GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
414
RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
415
RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
416
RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
417
-GEN_VEXT_VF(vfmsub_vf_h)
418
-GEN_VEXT_VF(vfmsub_vf_w)
419
-GEN_VEXT_VF(vfmsub_vf_d)
420
+GEN_VEXT_VF(vfmsub_vf_h, 2)
421
+GEN_VEXT_VF(vfmsub_vf_w, 4)
422
+GEN_VEXT_VF(vfmsub_vf_d, 8)
423
424
static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
425
{
426
@@ -XXX,XX +XXX,XX @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
427
RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
428
RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
429
RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
430
-GEN_VEXT_VV_ENV(vfnmsub_vv_h)
431
-GEN_VEXT_VV_ENV(vfnmsub_vv_w)
432
-GEN_VEXT_VV_ENV(vfnmsub_vv_d)
433
+GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
434
+GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
435
+GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
436
RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
437
RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
438
RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
439
-GEN_VEXT_VF(vfnmsub_vf_h)
440
-GEN_VEXT_VF(vfnmsub_vf_w)
441
-GEN_VEXT_VF(vfnmsub_vf_d)
442
+GEN_VEXT_VF(vfnmsub_vf_h, 2)
443
+GEN_VEXT_VF(vfnmsub_vf_w, 4)
444
+GEN_VEXT_VF(vfnmsub_vf_d, 8)
445
446
/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
447
static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
448
@@ -XXX,XX +XXX,XX @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
449
450
RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
451
RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
452
-GEN_VEXT_VV_ENV(vfwmacc_vv_h)
453
-GEN_VEXT_VV_ENV(vfwmacc_vv_w)
454
+GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
455
+GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
456
RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
457
RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
458
-GEN_VEXT_VF(vfwmacc_vf_h)
459
-GEN_VEXT_VF(vfwmacc_vf_w)
460
+GEN_VEXT_VF(vfwmacc_vf_h, 4)
461
+GEN_VEXT_VF(vfwmacc_vf_w, 8)
462
463
static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
464
{
465
@@ -XXX,XX +XXX,XX @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
466
467
RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
468
RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
469
-GEN_VEXT_VV_ENV(vfwnmacc_vv_h)
470
-GEN_VEXT_VV_ENV(vfwnmacc_vv_w)
471
+GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
472
+GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
473
RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
474
RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
475
-GEN_VEXT_VF(vfwnmacc_vf_h)
476
-GEN_VEXT_VF(vfwnmacc_vf_w)
477
+GEN_VEXT_VF(vfwnmacc_vf_h, 4)
478
+GEN_VEXT_VF(vfwnmacc_vf_w, 8)
479
480
static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
481
{
482
@@ -XXX,XX +XXX,XX @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
483
484
RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
485
RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
486
-GEN_VEXT_VV_ENV(vfwmsac_vv_h)
487
-GEN_VEXT_VV_ENV(vfwmsac_vv_w)
488
+GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
489
+GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
490
RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
491
RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
492
-GEN_VEXT_VF(vfwmsac_vf_h)
493
-GEN_VEXT_VF(vfwmsac_vf_w)
494
+GEN_VEXT_VF(vfwmsac_vf_h, 4)
495
+GEN_VEXT_VF(vfwmsac_vf_w, 8)
496
497
static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
498
{
499
@@ -XXX,XX +XXX,XX @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
500
501
RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
502
RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
503
-GEN_VEXT_VV_ENV(vfwnmsac_vv_h)
504
-GEN_VEXT_VV_ENV(vfwnmsac_vv_w)
505
+GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
506
+GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
507
RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
508
RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
509
-GEN_VEXT_VF(vfwnmsac_vf_h)
510
-GEN_VEXT_VF(vfwnmsac_vf_w)
511
+GEN_VEXT_VF(vfwnmsac_vf_h, 4)
512
+GEN_VEXT_VF(vfwnmsac_vf_w, 8)
513
514
/* Vector Floating-Point Square-Root Instruction */
515
/* (TD, T2, TX2) */
516
@@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i, \
517
*((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
518
}
519
520
-#define GEN_VEXT_V_ENV(NAME) \
521
+#define GEN_VEXT_V_ENV(NAME, ESZ) \
522
void HELPER(NAME)(void *vd, void *v0, void *vs2, \
523
CPURISCVState *env, uint32_t desc) \
524
{ \
525
uint32_t vm = vext_vm(desc); \
526
uint32_t vl = env->vl; \
527
+ uint32_t total_elems = \
528
+ vext_get_total_elems(env, desc, ESZ); \
529
+ uint32_t vta = vext_vta(desc); \
530
uint32_t i; \
531
\
532
if (vl == 0) { \
533
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \
534
do_##NAME(vd, vs2, i, env); \
535
} \
536
env->vstart = 0; \
537
+ vext_set_elems_1s(vd, vta, vl * ESZ, \
538
+ total_elems * ESZ); \
539
}
540
541
RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
542
RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
543
RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
544
-GEN_VEXT_V_ENV(vfsqrt_v_h)
545
-GEN_VEXT_V_ENV(vfsqrt_v_w)
546
-GEN_VEXT_V_ENV(vfsqrt_v_d)
547
+GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
548
+GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
549
+GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
550
551
/*
552
* Vector Floating-Point Reciprocal Square-Root Estimate Instruction
553
@@ -XXX,XX +XXX,XX @@ static float64 frsqrt7_d(float64 f, float_status *s)
554
RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
555
RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
556
RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
557
-GEN_VEXT_V_ENV(vfrsqrt7_v_h)
558
-GEN_VEXT_V_ENV(vfrsqrt7_v_w)
559
-GEN_VEXT_V_ENV(vfrsqrt7_v_d)
560
+GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
561
+GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
562
+GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
563
564
/*
565
* Vector Floating-Point Reciprocal Estimate Instruction
566
@@ -XXX,XX +XXX,XX @@ static float64 frec7_d(float64 f, float_status *s)
567
RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
568
RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
569
RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
570
-GEN_VEXT_V_ENV(vfrec7_v_h)
571
-GEN_VEXT_V_ENV(vfrec7_v_w)
572
-GEN_VEXT_V_ENV(vfrec7_v_d)
573
+GEN_VEXT_V_ENV(vfrec7_v_h, 2)
574
+GEN_VEXT_V_ENV(vfrec7_v_w, 4)
575
+GEN_VEXT_V_ENV(vfrec7_v_d, 8)
576
577
/* Vector Floating-Point MIN/MAX Instructions */
578
RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
579
RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
580
RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
581
-GEN_VEXT_VV_ENV(vfmin_vv_h)
582
-GEN_VEXT_VV_ENV(vfmin_vv_w)
583
-GEN_VEXT_VV_ENV(vfmin_vv_d)
584
+GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
585
+GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
586
+GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
587
RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
588
RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
589
RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
590
-GEN_VEXT_VF(vfmin_vf_h)
591
-GEN_VEXT_VF(vfmin_vf_w)
592
-GEN_VEXT_VF(vfmin_vf_d)
593
+GEN_VEXT_VF(vfmin_vf_h, 2)
594
+GEN_VEXT_VF(vfmin_vf_w, 4)
595
+GEN_VEXT_VF(vfmin_vf_d, 8)
596
597
RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
598
RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
599
RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
600
-GEN_VEXT_VV_ENV(vfmax_vv_h)
601
-GEN_VEXT_VV_ENV(vfmax_vv_w)
602
-GEN_VEXT_VV_ENV(vfmax_vv_d)
603
+GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
604
+GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
605
+GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
606
RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
607
RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
608
RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
609
-GEN_VEXT_VF(vfmax_vf_h)
610
-GEN_VEXT_VF(vfmax_vf_w)
611
-GEN_VEXT_VF(vfmax_vf_d)
612
+GEN_VEXT_VF(vfmax_vf_h, 2)
613
+GEN_VEXT_VF(vfmax_vf_w, 4)
614
+GEN_VEXT_VF(vfmax_vf_d, 8)
615
616
/* Vector Floating-Point Sign-Injection Instructions */
617
static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
618
@@ -XXX,XX +XXX,XX @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
619
RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
620
RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
621
RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
622
-GEN_VEXT_VV_ENV(vfsgnj_vv_h)
623
-GEN_VEXT_VV_ENV(vfsgnj_vv_w)
624
-GEN_VEXT_VV_ENV(vfsgnj_vv_d)
625
+GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
626
+GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
627
+GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
628
RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
629
RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
630
RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
631
-GEN_VEXT_VF(vfsgnj_vf_h)
632
-GEN_VEXT_VF(vfsgnj_vf_w)
633
-GEN_VEXT_VF(vfsgnj_vf_d)
634
+GEN_VEXT_VF(vfsgnj_vf_h, 2)
635
+GEN_VEXT_VF(vfsgnj_vf_w, 4)
636
+GEN_VEXT_VF(vfsgnj_vf_d, 8)
637
638
static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
639
{
640
@@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
641
RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
642
RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
643
RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
644
-GEN_VEXT_VV_ENV(vfsgnjn_vv_h)
645
-GEN_VEXT_VV_ENV(vfsgnjn_vv_w)
646
-GEN_VEXT_VV_ENV(vfsgnjn_vv_d)
647
+GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
648
+GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
649
+GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
650
RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
651
RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
652
RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
653
-GEN_VEXT_VF(vfsgnjn_vf_h)
654
-GEN_VEXT_VF(vfsgnjn_vf_w)
655
-GEN_VEXT_VF(vfsgnjn_vf_d)
656
+GEN_VEXT_VF(vfsgnjn_vf_h, 2)
657
+GEN_VEXT_VF(vfsgnjn_vf_w, 4)
658
+GEN_VEXT_VF(vfsgnjn_vf_d, 8)
659
660
static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
661
{
662
@@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
663
RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
664
RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
665
RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
666
-GEN_VEXT_VV_ENV(vfsgnjx_vv_h)
667
-GEN_VEXT_VV_ENV(vfsgnjx_vv_w)
668
-GEN_VEXT_VV_ENV(vfsgnjx_vv_d)
669
+GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
670
+GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
671
+GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
672
RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
673
RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
674
RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
675
-GEN_VEXT_VF(vfsgnjx_vf_h)
676
-GEN_VEXT_VF(vfsgnjx_vf_w)
677
-GEN_VEXT_VF(vfsgnjx_vf_d)
678
+GEN_VEXT_VF(vfsgnjx_vf_h, 2)
679
+GEN_VEXT_VF(vfsgnjx_vf_w, 4)
680
+GEN_VEXT_VF(vfsgnjx_vf_d, 8)
681
682
/* Vector Floating-Point Compare Instructions */
683
#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
684
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
685
{ \
686
uint32_t vm = vext_vm(desc); \
687
uint32_t vl = env->vl; \
688
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
689
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
690
uint32_t i; \
691
\
692
for (i = env->vstart; i < vl; i++) { \
693
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
694
DO_OP(s2, s1, &env->fp_status)); \
695
} \
696
env->vstart = 0; \
697
+ /* mask destination register are always tail-agnostic */ \
698
+ /* set tail elements to 1s */ \
699
+ if (vta_all_1s) { \
700
+ for (; i < total_elems; i++) { \
701
+ vext_set_elem_mask(vd, i, 1); \
702
+ } \
703
+ } \
704
}
705
706
GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
707
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
708
{ \
709
uint32_t vm = vext_vm(desc); \
710
uint32_t vl = env->vl; \
711
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
712
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
713
uint32_t i; \
714
\
715
for (i = env->vstart; i < vl; i++) { \
716
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
717
DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
718
} \
719
env->vstart = 0; \
720
+ /* mask destination register are always tail-agnostic */ \
721
+ /* set tail elements to 1s */ \
722
+ if (vta_all_1s) { \
723
+ for (; i < total_elems; i++) { \
724
+ vext_set_elem_mask(vd, i, 1); \
725
+ } \
726
+ } \
727
}
728
729
GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
730
@@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i) \
731
*((TD *)vd + HD(i)) = OP(s2); \
732
}
733
734
-#define GEN_VEXT_V(NAME) \
735
+#define GEN_VEXT_V(NAME, ESZ) \
736
void HELPER(NAME)(void *vd, void *v0, void *vs2, \
737
CPURISCVState *env, uint32_t desc) \
738
{ \
739
uint32_t vm = vext_vm(desc); \
740
uint32_t vl = env->vl; \
741
+ uint32_t total_elems = \
742
+ vext_get_total_elems(env, desc, ESZ); \
743
+ uint32_t vta = vext_vta(desc); \
744
uint32_t i; \
745
\
746
for (i = env->vstart; i < vl; i++) { \
747
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \
748
do_##NAME(vd, vs2, i); \
749
} \
750
env->vstart = 0; \
751
+ /* set tail elements to 1s */ \
752
+ vext_set_elems_1s(vd, vta, vl * ESZ, \
753
+ total_elems * ESZ); \
754
}
755
756
target_ulong fclass_h(uint64_t frs1)
757
@@ -XXX,XX +XXX,XX @@ target_ulong fclass_d(uint64_t frs1)
758
RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
759
RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
760
RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
761
-GEN_VEXT_V(vfclass_v_h)
762
-GEN_VEXT_V(vfclass_v_w)
763
-GEN_VEXT_V(vfclass_v_d)
764
+GEN_VEXT_V(vfclass_v_h, 2)
765
+GEN_VEXT_V(vfclass_v_w, 4)
766
+GEN_VEXT_V(vfclass_v_d, 8)
767
768
/* Vector Floating-Point Merge Instruction */
769
+
770
#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
771
void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
772
CPURISCVState *env, uint32_t desc) \
773
{ \
774
uint32_t vm = vext_vm(desc); \
775
uint32_t vl = env->vl; \
776
+ uint32_t esz = sizeof(ETYPE); \
777
+ uint32_t total_elems = \
778
+ vext_get_total_elems(env, desc, esz); \
779
+ uint32_t vta = vext_vta(desc); \
780
uint32_t i; \
781
\
782
for (i = env->vstart; i < vl; i++) { \
783
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
784
= (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
785
} \
786
env->vstart = 0; \
787
+ /* set tail elements to 1s */ \
788
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
789
}
790
791
GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
792
@@ -XXX,XX +XXX,XX @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
793
RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
794
RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
795
RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
796
-GEN_VEXT_V_ENV(vfcvt_xu_f_v_h)
797
-GEN_VEXT_V_ENV(vfcvt_xu_f_v_w)
798
-GEN_VEXT_V_ENV(vfcvt_xu_f_v_d)
799
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
800
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
801
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
802
803
/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
804
RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
805
RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
806
RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
807
-GEN_VEXT_V_ENV(vfcvt_x_f_v_h)
808
-GEN_VEXT_V_ENV(vfcvt_x_f_v_w)
809
-GEN_VEXT_V_ENV(vfcvt_x_f_v_d)
810
+GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
811
+GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
812
+GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
813
814
/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
815
RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
816
RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
817
RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
818
-GEN_VEXT_V_ENV(vfcvt_f_xu_v_h)
819
-GEN_VEXT_V_ENV(vfcvt_f_xu_v_w)
820
-GEN_VEXT_V_ENV(vfcvt_f_xu_v_d)
821
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
822
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
823
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
824
825
/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
826
RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
827
RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
828
RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
829
-GEN_VEXT_V_ENV(vfcvt_f_x_v_h)
830
-GEN_VEXT_V_ENV(vfcvt_f_x_v_w)
831
-GEN_VEXT_V_ENV(vfcvt_f_x_v_d)
832
+GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
833
+GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
834
+GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
835
836
/* Widening Floating-Point/Integer Type-Convert Instructions */
837
/* (TD, T2, TX2) */
838
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d)
839
/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
840
RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
841
RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
842
-GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h)
843
-GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w)
844
+GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
845
+GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
846
847
/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
848
RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
849
RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
850
-GEN_VEXT_V_ENV(vfwcvt_x_f_v_h)
851
-GEN_VEXT_V_ENV(vfwcvt_x_f_v_w)
852
+GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
853
+GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
854
855
/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
856
RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
857
RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
858
RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
859
-GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b)
860
-GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h)
861
-GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w)
862
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
863
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
864
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
865
866
/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
867
RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
868
RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
869
RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
870
-GEN_VEXT_V_ENV(vfwcvt_f_x_v_b)
871
-GEN_VEXT_V_ENV(vfwcvt_f_x_v_h)
872
-GEN_VEXT_V_ENV(vfwcvt_f_x_v_w)
873
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
874
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
875
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
876
877
/*
878
* vfwcvt.f.f.v vd, vs2, vm
879
@@ -XXX,XX +XXX,XX @@ static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
880
881
RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
882
RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
883
-GEN_VEXT_V_ENV(vfwcvt_f_f_v_h)
884
-GEN_VEXT_V_ENV(vfwcvt_f_f_v_w)
885
+GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
886
+GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
887
888
/* Narrowing Floating-Point/Integer Type-Convert Instructions */
889
/* (TD, T2, TX2) */
890
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w)
891
RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
892
RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
893
RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
894
-GEN_VEXT_V_ENV(vfncvt_xu_f_w_b)
895
-GEN_VEXT_V_ENV(vfncvt_xu_f_w_h)
896
-GEN_VEXT_V_ENV(vfncvt_xu_f_w_w)
897
+GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
898
+GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
899
+GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
900
901
/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
902
RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
903
RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
904
RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
905
-GEN_VEXT_V_ENV(vfncvt_x_f_w_b)
906
-GEN_VEXT_V_ENV(vfncvt_x_f_w_h)
907
-GEN_VEXT_V_ENV(vfncvt_x_f_w_w)
908
+GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
909
+GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
910
+GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
911
912
/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
913
RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
914
RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
915
-GEN_VEXT_V_ENV(vfncvt_f_xu_w_h)
916
-GEN_VEXT_V_ENV(vfncvt_f_xu_w_w)
917
+GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
918
+GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
919
920
/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
921
RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
922
RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
923
-GEN_VEXT_V_ENV(vfncvt_f_x_w_h)
924
-GEN_VEXT_V_ENV(vfncvt_f_x_w_w)
925
+GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
926
+GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
927
928
/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
929
static uint16_t vfncvtffv16(uint32_t a, float_status *s)
930
@@ -XXX,XX +XXX,XX @@ static uint16_t vfncvtffv16(uint32_t a, float_status *s)
931
932
RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
933
RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
934
-GEN_VEXT_V_ENV(vfncvt_f_f_w_h)
935
-GEN_VEXT_V_ENV(vfncvt_f_f_w_w)
936
+GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
937
+GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
938
939
/*
940
*** Vector Reduction Operations
941
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
942
index XXXXXXX..XXXXXXX 100644
943
--- a/target/riscv/insn_trans/trans_rvv.c.inc
944
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
945
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
946
\
947
data = FIELD_DP32(data, VDATA, VM, a->vm); \
948
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
949
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
950
+ data = \
951
+ FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
952
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
953
vreg_ofs(s, a->rs1), \
954
vreg_ofs(s, a->rs2), cpu_env, \
955
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
956
gen_set_rm(s, RISCV_FRM_DYN); \
957
data = FIELD_DP32(data, VDATA, VM, a->vm); \
958
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
959
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
960
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, \
961
+ s->cfg_vta_all_1s); \
962
return opfvf_trans(a->rd, a->rs1, a->rs2, data, \
963
fns[s->sew - 1], s); \
964
} \
965
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
966
\
967
data = FIELD_DP32(data, VDATA, VM, a->vm); \
968
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
969
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
970
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
971
vreg_ofs(s, a->rs1), \
972
vreg_ofs(s, a->rs2), cpu_env, \
973
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
974
gen_set_rm(s, RISCV_FRM_DYN); \
975
data = FIELD_DP32(data, VDATA, VM, a->vm); \
976
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
977
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
978
return opfvf_trans(a->rd, a->rs1, a->rs2, data, \
979
fns[s->sew - 1], s); \
980
} \
981
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
982
\
983
data = FIELD_DP32(data, VDATA, VM, a->vm); \
984
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
985
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
986
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
987
vreg_ofs(s, a->rs1), \
988
vreg_ofs(s, a->rs2), cpu_env, \
989
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
990
gen_set_rm(s, RISCV_FRM_DYN); \
991
data = FIELD_DP32(data, VDATA, VM, a->vm); \
992
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
993
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
994
return opfvf_trans(a->rd, a->rs1, a->rs2, data, \
995
fns[s->sew - 1], s); \
996
} \
997
@@ -XXX,XX +XXX,XX @@ static bool do_opfv(DisasContext *s, arg_rmr *a,
998
999
data = FIELD_DP32(data, VDATA, VM, a->vm);
1000
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
1001
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
1002
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
1003
vreg_ofs(s, a->rs2), cpu_env,
1004
s->cfg_ptr->vlen / 8,
1005
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
1006
\
1007
data = FIELD_DP32(data, VDATA, VM, a->vm); \
1008
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
1009
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
1010
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
1011
vreg_ofs(s, a->rs2), cpu_env, \
1012
s->cfg_ptr->vlen / 8, \
1013
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
1014
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
1015
\
1016
data = FIELD_DP32(data, VDATA, VM, a->vm); \
1017
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
1018
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
1019
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
1020
vreg_ofs(s, a->rs2), cpu_env, \
1021
s->cfg_ptr->vlen / 8, \
1022
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
1023
\
1024
data = FIELD_DP32(data, VDATA, VM, a->vm); \
1025
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
1026
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
1027
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
1028
vreg_ofs(s, a->rs2), cpu_env, \
1029
s->cfg_ptr->vlen / 8, \
1030
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
1031
tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
1032
\
1033
data = FIELD_DP32(data, VDATA, VM, a->vm); \
1034
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
1035
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
1036
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
1037
vreg_ofs(s, a->rs2), cpu_env, \
1038
s->cfg_ptr->vlen / 8, \
1039
--
291
--
1040
2.36.1
292
2.41.0
diff view generated by jsdifflib
New patch
1
From: Rob Bradford <rbradford@rivosinc.com>
1
2
3
These are WARL fields - zero out the bits for unavailable counters and
4
special case the TM bit in mcountinhibit which is hardwired to zero.
5
This patch achieves this by modifying the value written so that any use
6
of the field will see the correctly masked bits.
7
8
Tested by modifying OpenSBI to write max value to these CSRs and upon
9
subsequent read the appropriate number of bits for number of PMUs is
10
enabled and the TM bit is zero in mcountinhibit.
11
12
Signed-off-by: Rob Bradford <rbradford@rivosinc.com>
13
Acked-by: Alistair Francis <alistair.francis@wdc.com>
14
Reviewed-by: Atish Patra <atishp@rivosinc.com>
15
Message-ID: <20230802124906.24197-1-rbradford@rivosinc.com>
16
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
17
---
18
target/riscv/csr.c | 11 +++++++++--
19
1 file changed, 9 insertions(+), 2 deletions(-)
20
21
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/target/riscv/csr.c
24
+++ b/target/riscv/csr.c
25
@@ -XXX,XX +XXX,XX @@ static RISCVException write_mcountinhibit(CPURISCVState *env, int csrno,
26
{
27
int cidx;
28
PMUCTRState *counter;
29
+ RISCVCPU *cpu = env_archcpu(env);
30
31
- env->mcountinhibit = val;
32
+ /* WARL register - disable unavailable counters; TM bit is always 0 */
33
+ env->mcountinhibit =
34
+ val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_IR);
35
36
/* Check if any other counter is also monitoring cycles/instructions */
37
for (cidx = 0; cidx < RV_MAX_MHPMCOUNTERS; cidx++) {
38
@@ -XXX,XX +XXX,XX @@ static RISCVException read_mcounteren(CPURISCVState *env, int csrno,
39
static RISCVException write_mcounteren(CPURISCVState *env, int csrno,
40
target_ulong val)
41
{
42
- env->mcounteren = val;
43
+ RISCVCPU *cpu = env_archcpu(env);
44
+
45
+ /* WARL register - disable unavailable counters */
46
+ env->mcounteren = val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_TM |
47
+ COUNTEREN_IR);
48
return RISCV_EXCP_NONE;
49
}
50
51
--
52
2.41.0
diff view generated by jsdifflib
New patch
1
From: Jason Chien <jason.chien@sifive.com>
1
2
3
RVA23 Profiles states:
4
The RVA23 profiles are intended to be used for 64-bit application
5
processors that will run rich OS stacks from standard binary OS
6
distributions and with a substantial number of third-party binary user
7
applications that will be supported over a considerable length of time
8
in the field.
9
10
The chapter 4 of the unprivileged spec introduces the Zihintntl extension
11
and Zihintntl is a mandatory extension presented in RVA23 Profiles, whose
12
purpose is to enable application and operating system portability across
13
different implementations. Thus the DTS should contain the Zihintntl ISA
14
string in order to pass to software.
15
16
The unprivileged spec states:
17
Like any HINTs, these instructions may be freely ignored. Hence, although
18
they are described in terms of cache-based memory hierarchies, they do not
19
mandate the provision of caches.
20
21
These instructions are encoded with non-used opcode, e.g. ADD x0, x0, x2,
22
which QEMU already supports, and QEMU does not emulate cache. Therefore
23
these instructions can be considered as a no-op, and we only need to add
24
a new property for the Zihintntl extension.
25
26
Reviewed-by: Frank Chang <frank.chang@sifive.com>
27
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
28
Signed-off-by: Jason Chien <jason.chien@sifive.com>
29
Message-ID: <20230726074049.19505-2-jason.chien@sifive.com>
30
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
31
---
32
target/riscv/cpu_cfg.h | 1 +
33
target/riscv/cpu.c | 2 ++
34
2 files changed, 3 insertions(+)
35
36
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/riscv/cpu_cfg.h
39
+++ b/target/riscv/cpu_cfg.h
40
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
41
bool ext_icbom;
42
bool ext_icboz;
43
bool ext_zicond;
44
+ bool ext_zihintntl;
45
bool ext_zihintpause;
46
bool ext_smstateen;
47
bool ext_sstc;
48
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/riscv/cpu.c
51
+++ b/target/riscv/cpu.c
52
@@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = {
53
ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond),
54
ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr),
55
ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei),
56
+ ISA_EXT_DATA_ENTRY(zihintntl, PRIV_VERSION_1_10_0, ext_zihintntl),
57
ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause),
58
ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul),
59
ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs),
60
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
61
DEFINE_PROP_BOOL("sscofpmf", RISCVCPU, cfg.ext_sscofpmf, false),
62
DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
63
DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
64
+ DEFINE_PROP_BOOL("Zihintntl", RISCVCPU, cfg.ext_zihintntl, true),
65
DEFINE_PROP_BOOL("Zihintpause", RISCVCPU, cfg.ext_zihintpause, true),
66
DEFINE_PROP_BOOL("Zawrs", RISCVCPU, cfg.ext_zawrs, true),
67
DEFINE_PROP_BOOL("Zfa", RISCVCPU, cfg.ext_zfa, true),
68
--
69
2.41.0
diff view generated by jsdifflib
New patch
1
From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
1
2
3
Commit a47842d ("riscv: Add support for the Zfa extension") implemented the zfa extension.
4
However, it has some typos for fleq.d and fltq.d. Both of them misused the fltq.s
5
helper function.
6
7
Fixes: a47842d ("riscv: Add support for the Zfa extension")
8
Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
9
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
10
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
11
Message-ID: <20230728003906.768-1-zhiwei_liu@linux.alibaba.com>
12
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
---
14
target/riscv/insn_trans/trans_rvzfa.c.inc | 4 ++--
15
1 file changed, 2 insertions(+), 2 deletions(-)
16
17
diff --git a/target/riscv/insn_trans/trans_rvzfa.c.inc b/target/riscv/insn_trans/trans_rvzfa.c.inc
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/riscv/insn_trans/trans_rvzfa.c.inc
20
+++ b/target/riscv/insn_trans/trans_rvzfa.c.inc
21
@@ -XXX,XX +XXX,XX @@ bool trans_fleq_d(DisasContext *ctx, arg_fleq_d *a)
22
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
23
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
24
25
- gen_helper_fltq_s(dest, cpu_env, src1, src2);
26
+ gen_helper_fleq_d(dest, cpu_env, src1, src2);
27
gen_set_gpr(ctx, a->rd, dest);
28
return true;
29
}
30
@@ -XXX,XX +XXX,XX @@ bool trans_fltq_d(DisasContext *ctx, arg_fltq_d *a)
31
TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
32
TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
33
34
- gen_helper_fltq_s(dest, cpu_env, src1, src2);
35
+ gen_helper_fltq_d(dest, cpu_env, src1, src2);
36
gen_set_gpr(ctx, a->rd, dest);
37
return true;
38
}
39
--
40
2.41.0
diff view generated by jsdifflib
New patch
1
From: Jason Chien <jason.chien@sifive.com>
1
2
3
When writing the upper mtime, we should keep the original lower mtime
4
whose value is given by cpu_riscv_read_rtc() instead of
5
cpu_riscv_read_rtc_raw(). The same logic applies to writes to lower mtime.
6
7
Signed-off-by: Jason Chien <jason.chien@sifive.com>
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
9
Message-ID: <20230728082502.26439-1-jason.chien@sifive.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
---
12
hw/intc/riscv_aclint.c | 5 +++--
13
1 file changed, 3 insertions(+), 2 deletions(-)
14
15
diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/intc/riscv_aclint.c
18
+++ b/hw/intc/riscv_aclint.c
19
@@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
20
return;
21
} else if (addr == mtimer->time_base || addr == mtimer->time_base + 4) {
22
uint64_t rtc_r = cpu_riscv_read_rtc_raw(mtimer->timebase_freq);
23
+ uint64_t rtc = cpu_riscv_read_rtc(mtimer);
24
25
if (addr == mtimer->time_base) {
26
if (size == 4) {
27
/* time_lo for RV32/RV64 */
28
- mtimer->time_delta = ((rtc_r & ~0xFFFFFFFFULL) | value) - rtc_r;
29
+ mtimer->time_delta = ((rtc & ~0xFFFFFFFFULL) | value) - rtc_r;
30
} else {
31
/* time for RV64 */
32
mtimer->time_delta = value - rtc_r;
33
@@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
34
} else {
35
if (size == 4) {
36
/* time_hi for RV32/RV64 */
37
- mtimer->time_delta = (value << 32 | (rtc_r & 0xFFFFFFFF)) - rtc_r;
38
+ mtimer->time_delta = (value << 32 | (rtc & 0xFFFFFFFF)) - rtc_r;
39
} else {
40
qemu_log_mask(LOG_GUEST_ERROR,
41
"aclint-mtimer: invalid time_hi write: %08x",
42
--
43
2.41.0
diff view generated by jsdifflib
1
From: Alistair Francis <alistair.francis@wdc.com>
1
From: Jason Chien <jason.chien@sifive.com>
2
2
3
The variables whose values are given by cpu_riscv_read_rtc() should be named
4
"rtc". The variables whose value are given by cpu_riscv_read_rtc_raw()
5
should be named "rtc_r".
6
7
Signed-off-by: Jason Chien <jason.chien@sifive.com>
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
9
Message-ID: <20230728082502.26439-2-jason.chien@sifive.com>
3
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Message-Id: <20220509091339.26016-1-alistair.francis@wdc.com>
6
---
11
---
7
MAINTAINERS | 1 +
12
hw/intc/riscv_aclint.c | 6 +++---
8
1 file changed, 1 insertion(+)
13
1 file changed, 3 insertions(+), 3 deletions(-)
9
14
10
diff --git a/MAINTAINERS b/MAINTAINERS
15
diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
11
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
12
--- a/MAINTAINERS
17
--- a/hw/intc/riscv_aclint.c
13
+++ b/MAINTAINERS
18
+++ b/hw/intc/riscv_aclint.c
14
@@ -XXX,XX +XXX,XX @@ Generic Loader
19
@@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
15
M: Alistair Francis <alistair@alistair23.me>
20
uint64_t next;
16
S: Maintained
21
uint64_t diff;
17
F: hw/core/generic-loader.c
22
18
+F: hw/core/uboot_image.h
23
- uint64_t rtc_r = cpu_riscv_read_rtc(mtimer);
19
F: include/hw/core/generic-loader.h
24
+ uint64_t rtc = cpu_riscv_read_rtc(mtimer);
20
F: docs/system/generic-loader.rst
25
26
/* Compute the relative hartid w.r.t the socket */
27
hartid = hartid - mtimer->hartid_base;
28
29
mtimer->timecmp[hartid] = value;
30
- if (mtimer->timecmp[hartid] <= rtc_r) {
31
+ if (mtimer->timecmp[hartid] <= rtc) {
32
/*
33
* If we're setting an MTIMECMP value in the "past",
34
* immediately raise the timer interrupt
35
@@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
36
37
/* otherwise, set up the future timer interrupt */
38
qemu_irq_lower(mtimer->timer_irqs[hartid]);
39
- diff = mtimer->timecmp[hartid] - rtc_r;
40
+ diff = mtimer->timecmp[hartid] - rtc;
41
/* back to ns (note args switched in muldiv64) */
42
uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq);
21
43
22
--
44
--
23
2.36.1
45
2.41.0
diff view generated by jsdifflib
New patch
1
From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
1
2
3
We should not use types dependend on host arch for target_ucontext.
4
This bug is found when run rv32 applications.
5
6
Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Message-ID: <20230811055438.1945-1-zhiwei_liu@linux.alibaba.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
---
13
linux-user/riscv/signal.c | 4 ++--
14
1 file changed, 2 insertions(+), 2 deletions(-)
15
16
diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/linux-user/riscv/signal.c
19
+++ b/linux-user/riscv/signal.c
20
@@ -XXX,XX +XXX,XX @@ struct target_sigcontext {
21
}; /* cf. riscv-linux:arch/riscv/include/uapi/asm/ptrace.h */
22
23
struct target_ucontext {
24
- unsigned long uc_flags;
25
- struct target_ucontext *uc_link;
26
+ abi_ulong uc_flags;
27
+ abi_ptr uc_link;
28
target_stack_t uc_stack;
29
target_sigset_t uc_sigmask;
30
uint8_t __unused[1024 / 8 - sizeof(target_sigset_t)];
31
--
32
2.41.0
33
34
diff view generated by jsdifflib
1
From: Jamie Iles <jamie@nuviainc.com>
1
From: Yong-Xuan Wang <yongxuan.wang@sifive.com>
2
2
3
Various loader functions return an int which limits images to 2GB which
3
In this patch, we create the APLIC and IMSIC FDT helper functions and
4
is fine for things like a BIOS/kernel image, but if we want to be able
4
remove M mode AIA devices when using KVM acceleration.
5
to load memory images or large ramdisks then any file over 2GB would
6
silently fail to load.
7
5
8
Cc: Luc Michel <lmichel@kalray.eu>
6
Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
9
Signed-off-by: Jamie Iles <jamie@nuviainc.com>
7
Reviewed-by: Jim Shu <jim.shu@sifive.com>
10
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
11
Reviewed-by: Luc Michel <lmichel@kalray.eu>
9
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
12
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
10
Message-ID: <20230727102439.22554-2-yongxuan.wang@sifive.com>
13
Message-Id: <20211111141141.3295094-2-jamie@nuviainc.com>
14
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
15
---
12
---
16
include/hw/loader.h | 55 +++++++++++++--------------
13
hw/riscv/virt.c | 290 +++++++++++++++++++++++-------------------------
17
hw/arm/armv7m.c | 2 +-
14
1 file changed, 137 insertions(+), 153 deletions(-)
18
hw/arm/boot.c | 8 ++--
19
hw/core/generic-loader.c | 2 +-
20
hw/core/loader.c | 81 +++++++++++++++++++++-------------------
21
hw/i386/x86.c | 2 +-
22
hw/riscv/boot.c | 5 ++-
23
7 files changed, 80 insertions(+), 75 deletions(-)
24
15
25
diff --git a/include/hw/loader.h b/include/hw/loader.h
16
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
26
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
27
--- a/include/hw/loader.h
18
--- a/hw/riscv/virt.c
28
+++ b/include/hw/loader.h
19
+++ b/hw/riscv/virt.c
29
@@ -XXX,XX +XXX,XX @@ ssize_t load_image_size(const char *filename, void *addr, size_t size);
20
@@ -XXX,XX +XXX,XX @@ static uint32_t imsic_num_bits(uint32_t count)
30
*
31
* Returns the size of the loaded image on success, -1 otherwise.
32
*/
33
-int load_image_targphys_as(const char *filename,
34
- hwaddr addr, uint64_t max_sz, AddressSpace *as);
35
+ssize_t load_image_targphys_as(const char *filename,
36
+ hwaddr addr, uint64_t max_sz, AddressSpace *as);
37
38
/**load_targphys_hex_as:
39
* @filename: Path to the .hex file
40
@@ -XXX,XX +XXX,XX @@ int load_image_targphys_as(const char *filename,
41
*
42
* Returns the size of the loaded .hex file on success, -1 otherwise.
43
*/
44
-int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as);
45
+ssize_t load_targphys_hex_as(const char *filename, hwaddr *entry,
46
+ AddressSpace *as);
47
48
/** load_image_targphys:
49
* Same as load_image_targphys_as(), but doesn't allow the caller to specify
50
* an AddressSpace.
51
*/
52
-int load_image_targphys(const char *filename, hwaddr,
53
- uint64_t max_sz);
54
+ssize_t load_image_targphys(const char *filename, hwaddr,
55
+ uint64_t max_sz);
56
57
/**
58
* load_image_mr: load an image into a memory region
59
@@ -XXX,XX +XXX,XX @@ int load_image_targphys(const char *filename, hwaddr,
60
* If the file is larger than the memory region's size the call will fail.
61
* Returns -1 on failure, or the size of the file.
62
*/
63
-int load_image_mr(const char *filename, MemoryRegion *mr);
64
+ssize_t load_image_mr(const char *filename, MemoryRegion *mr);
65
66
/* This is the limit on the maximum uncompressed image size that
67
* load_image_gzipped_buffer() and load_image_gzipped() will read. It prevents
68
@@ -XXX,XX +XXX,XX @@ int load_image_mr(const char *filename, MemoryRegion *mr);
69
*/
70
#define LOAD_IMAGE_MAX_GUNZIP_BYTES (256 << 20)
71
72
-int load_image_gzipped_buffer(const char *filename, uint64_t max_sz,
73
- uint8_t **buffer);
74
-int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
75
+ssize_t load_image_gzipped_buffer(const char *filename, uint64_t max_sz,
76
+ uint8_t **buffer);
77
+ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
78
79
#define ELF_LOAD_FAILED -1
80
#define ELF_LOAD_NOT_ELF -2
81
@@ -XXX,XX +XXX,XX @@ ssize_t load_elf(const char *filename,
82
*/
83
void load_elf_hdr(const char *filename, void *hdr, bool *is64, Error **errp);
84
85
-int load_aout(const char *filename, hwaddr addr, int max_sz,
86
- int bswap_needed, hwaddr target_page_size);
87
+ssize_t load_aout(const char *filename, hwaddr addr, int max_sz,
88
+ int bswap_needed, hwaddr target_page_size);
89
90
#define LOAD_UIMAGE_LOADADDR_INVALID (-1)
91
92
@@ -XXX,XX +XXX,XX @@ int load_aout(const char *filename, hwaddr addr, int max_sz,
93
*
94
* Returns the size of the loaded image on success, -1 otherwise.
95
*/
96
-int load_uimage_as(const char *filename, hwaddr *ep,
97
- hwaddr *loadaddr, int *is_linux,
98
- uint64_t (*translate_fn)(void *, uint64_t),
99
- void *translate_opaque, AddressSpace *as);
100
+ssize_t load_uimage_as(const char *filename, hwaddr *ep,
101
+ hwaddr *loadaddr, int *is_linux,
102
+ uint64_t (*translate_fn)(void *, uint64_t),
103
+ void *translate_opaque, AddressSpace *as);
104
105
/** load_uimage:
106
* Same as load_uimage_as(), but doesn't allow the caller to specify an
107
* AddressSpace.
108
*/
109
-int load_uimage(const char *filename, hwaddr *ep,
110
- hwaddr *loadaddr, int *is_linux,
111
- uint64_t (*translate_fn)(void *, uint64_t),
112
- void *translate_opaque);
113
+ssize_t load_uimage(const char *filename, hwaddr *ep,
114
+ hwaddr *loadaddr, int *is_linux,
115
+ uint64_t (*translate_fn)(void *, uint64_t),
116
+ void *translate_opaque);
117
118
/**
119
* load_ramdisk_as:
120
@@ -XXX,XX +XXX,XX @@ int load_uimage(const char *filename, hwaddr *ep,
121
*
122
* Returns the size of the loaded image on success, -1 otherwise.
123
*/
124
-int load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz,
125
- AddressSpace *as);
126
+ssize_t load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz,
127
+ AddressSpace *as);
128
129
/**
130
* load_ramdisk:
131
* Same as load_ramdisk_as(), but doesn't allow the caller to specify
132
* an AddressSpace.
133
*/
134
-int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz);
135
+ssize_t load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz);
136
137
ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen);
138
139
@@ -XXX,XX +XXX,XX @@ void pstrcpy_targphys(const char *name,
140
extern bool option_rom_has_mr;
141
extern bool rom_file_has_mr;
142
143
-int rom_add_file(const char *file, const char *fw_dir,
144
- hwaddr addr, int32_t bootindex,
145
- bool option_rom, MemoryRegion *mr, AddressSpace *as);
146
+ssize_t rom_add_file(const char *file, const char *fw_dir,
147
+ hwaddr addr, int32_t bootindex,
148
+ bool option_rom, MemoryRegion *mr, AddressSpace *as);
149
MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,
150
size_t max_len, hwaddr addr,
151
const char *fw_file_name,
152
@@ -XXX,XX +XXX,XX @@ void hmp_info_roms(Monitor *mon, const QDict *qdict);
153
#define rom_add_blob_fixed_as(_f, _b, _l, _a, _as) \
154
rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, _as, true)
155
156
-int rom_add_vga(const char *file);
157
-int rom_add_option(const char *file, int32_t bootindex);
158
+ssize_t rom_add_vga(const char *file);
159
+ssize_t rom_add_option(const char *file, int32_t bootindex);
160
161
/* This is the usual maximum in uboot, so if a uImage overflows this, it would
162
* overflow on real hardware too. */
163
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
164
index XXXXXXX..XXXXXXX 100644
165
--- a/hw/arm/armv7m.c
166
+++ b/hw/arm/armv7m.c
167
@@ -XXX,XX +XXX,XX @@ static void armv7m_reset(void *opaque)
168
169
void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size)
170
{
171
- int image_size;
172
+ ssize_t image_size;
173
uint64_t entry;
174
int big_endian;
175
AddressSpace *as;
176
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
177
index XXXXXXX..XXXXXXX 100644
178
--- a/hw/arm/boot.c
179
+++ b/hw/arm/boot.c
180
@@ -XXX,XX +XXX,XX @@ static int do_arm_linux_init(Object *obj, void *opaque)
181
return 0;
182
}
183
184
-static int64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry,
185
+static ssize_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry,
186
uint64_t *lowaddr, uint64_t *highaddr,
187
int elf_machine, AddressSpace *as)
188
{
189
@@ -XXX,XX +XXX,XX @@ static int64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry,
190
} elf_header;
191
int data_swab = 0;
192
bool big_endian;
193
- int64_t ret = -1;
194
+ ssize_t ret = -1;
195
Error *err = NULL;
196
197
198
@@ -XXX,XX +XXX,XX @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
199
/* Set up for a direct boot of a kernel image file. */
200
CPUState *cs;
201
AddressSpace *as = arm_boot_address_space(cpu, info);
202
- int kernel_size;
203
+ ssize_t kernel_size;
204
int initrd_size;
205
int is_linux = 0;
206
uint64_t elf_entry;
207
@@ -XXX,XX +XXX,XX @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
208
209
if (kernel_size > info->ram_size) {
210
error_report("kernel '%s' is too large to fit in RAM "
211
- "(kernel size %d, RAM size %" PRId64 ")",
212
+ "(kernel size %zd, RAM size %" PRId64 ")",
213
info->kernel_filename, kernel_size, info->ram_size);
214
exit(1);
215
}
216
diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
217
index XXXXXXX..XXXXXXX 100644
218
--- a/hw/core/generic-loader.c
219
+++ b/hw/core/generic-loader.c
220
@@ -XXX,XX +XXX,XX @@ static void generic_loader_realize(DeviceState *dev, Error **errp)
221
GenericLoaderState *s = GENERIC_LOADER(dev);
222
hwaddr entry;
223
int big_endian;
224
- int size = 0;
225
+ ssize_t size = 0;
226
227
s->set_pc = false;
228
229
diff --git a/hw/core/loader.c b/hw/core/loader.c
230
index XXXXXXX..XXXXXXX 100644
231
--- a/hw/core/loader.c
232
+++ b/hw/core/loader.c
233
@@ -XXX,XX +XXX,XX @@ ssize_t read_targphys(const char *name,
234
return did;
235
}
236
237
-int load_image_targphys(const char *filename,
238
- hwaddr addr, uint64_t max_sz)
239
+ssize_t load_image_targphys(const char *filename,
240
+ hwaddr addr, uint64_t max_sz)
241
{
242
return load_image_targphys_as(filename, addr, max_sz, NULL);
243
}
244
245
/* return the size or -1 if error */
246
-int load_image_targphys_as(const char *filename,
247
- hwaddr addr, uint64_t max_sz, AddressSpace *as)
248
+ssize_t load_image_targphys_as(const char *filename,
249
+ hwaddr addr, uint64_t max_sz, AddressSpace *as)
250
{
251
- int size;
252
+ ssize_t size;
253
254
size = get_image_size(filename);
255
if (size < 0 || size > max_sz) {
256
@@ -XXX,XX +XXX,XX @@ int load_image_targphys_as(const char *filename,
257
return size;
258
}
259
260
-int load_image_mr(const char *filename, MemoryRegion *mr)
261
+ssize_t load_image_mr(const char *filename, MemoryRegion *mr)
262
{
263
- int size;
264
+ ssize_t size;
265
266
if (!memory_access_is_direct(mr, false)) {
267
/* Can only load an image into RAM or ROM */
268
@@ -XXX,XX +XXX,XX @@ static void bswap_ahdr(struct exec *e)
269
: (_N_SEGMENT_ROUND (_N_TXTENDADDR(x, target_page_size), target_page_size)))
270
271
272
-int load_aout(const char *filename, hwaddr addr, int max_sz,
273
- int bswap_needed, hwaddr target_page_size)
274
+ssize_t load_aout(const char *filename, hwaddr addr, int max_sz,
275
+ int bswap_needed, hwaddr target_page_size)
276
{
277
int fd;
278
ssize_t size, ret;
279
@@ -XXX,XX +XXX,XX @@ toosmall:
280
}
281
282
/* Load a U-Boot image. */
283
-static int load_uboot_image(const char *filename, hwaddr *ep, hwaddr *loadaddr,
284
- int *is_linux, uint8_t image_type,
285
- uint64_t (*translate_fn)(void *, uint64_t),
286
- void *translate_opaque, AddressSpace *as)
287
+static ssize_t load_uboot_image(const char *filename, hwaddr *ep,
288
+ hwaddr *loadaddr, int *is_linux,
289
+ uint8_t image_type,
290
+ uint64_t (*translate_fn)(void *, uint64_t),
291
+ void *translate_opaque, AddressSpace *as)
292
{
293
int fd;
294
- int size;
295
+ ssize_t size;
296
hwaddr address;
297
uboot_image_header_t h;
298
uboot_image_header_t *hdr = &h;
299
@@ -XXX,XX +XXX,XX @@ out:
300
return ret;
21
return ret;
301
}
22
}
302
23
303
-int load_uimage(const char *filename, hwaddr *ep, hwaddr *loadaddr,
24
-static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
304
- int *is_linux,
25
- uint32_t *phandle, uint32_t *intc_phandles,
305
- uint64_t (*translate_fn)(void *, uint64_t),
26
- uint32_t *msi_m_phandle, uint32_t *msi_s_phandle)
306
- void *translate_opaque)
27
+static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr,
307
+ssize_t load_uimage(const char *filename, hwaddr *ep, hwaddr *loadaddr,
28
+ uint32_t *intc_phandles, uint32_t msi_phandle,
308
+ int *is_linux,
29
+ bool m_mode, uint32_t imsic_guest_bits)
309
+ uint64_t (*translate_fn)(void *, uint64_t),
310
+ void *translate_opaque)
311
{
30
{
312
return load_uboot_image(filename, ep, loadaddr, is_linux, IH_TYPE_KERNEL,
31
int cpu, socket;
313
translate_fn, translate_opaque, NULL);
32
char *imsic_name;
33
MachineState *ms = MACHINE(s);
34
int socket_count = riscv_socket_count(ms);
35
- uint32_t imsic_max_hart_per_socket, imsic_guest_bits;
36
+ uint32_t imsic_max_hart_per_socket;
37
uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size;
38
39
- *msi_m_phandle = (*phandle)++;
40
- *msi_s_phandle = (*phandle)++;
41
imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2);
42
imsic_regs = g_new0(uint32_t, socket_count * 4);
43
44
- /* M-level IMSIC node */
45
for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
46
imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
47
- imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT);
48
+ imsic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
49
}
50
- imsic_max_hart_per_socket = 0;
51
- for (socket = 0; socket < socket_count; socket++) {
52
- imsic_addr = memmap[VIRT_IMSIC_M].base +
53
- socket * VIRT_IMSIC_GROUP_MAX_SIZE;
54
- imsic_size = IMSIC_HART_SIZE(0) * s->soc[socket].num_harts;
55
- imsic_regs[socket * 4 + 0] = 0;
56
- imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr);
57
- imsic_regs[socket * 4 + 2] = 0;
58
- imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size);
59
- if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
60
- imsic_max_hart_per_socket = s->soc[socket].num_harts;
61
- }
62
- }
63
- imsic_name = g_strdup_printf("/soc/imsics@%lx",
64
- (unsigned long)memmap[VIRT_IMSIC_M].base);
65
- qemu_fdt_add_subnode(ms->fdt, imsic_name);
66
- qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible",
67
- "riscv,imsics");
68
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
69
- FDT_IMSIC_INT_CELLS);
70
- qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller",
71
- NULL, 0);
72
- qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller",
73
- NULL, 0);
74
- qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
75
- imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
76
- qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
77
- socket_count * sizeof(uint32_t) * 4);
78
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids",
79
- VIRT_IRQCHIP_NUM_MSIS);
80
- if (socket_count > 1) {
81
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits",
82
- imsic_num_bits(imsic_max_hart_per_socket));
83
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits",
84
- imsic_num_bits(socket_count));
85
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift",
86
- IMSIC_MMIO_GROUP_MIN_SHIFT);
87
- }
88
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_m_phandle);
89
-
90
- g_free(imsic_name);
91
92
- /* S-level IMSIC node */
93
- for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
94
- imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
95
- imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT);
96
- }
97
- imsic_guest_bits = imsic_num_bits(s->aia_guests + 1);
98
imsic_max_hart_per_socket = 0;
99
for (socket = 0; socket < socket_count; socket++) {
100
- imsic_addr = memmap[VIRT_IMSIC_S].base +
101
- socket * VIRT_IMSIC_GROUP_MAX_SIZE;
102
+ imsic_addr = base_addr + socket * VIRT_IMSIC_GROUP_MAX_SIZE;
103
imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) *
104
s->soc[socket].num_harts;
105
imsic_regs[socket * 4 + 0] = 0;
106
@@ -XXX,XX +XXX,XX @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
107
imsic_max_hart_per_socket = s->soc[socket].num_harts;
108
}
109
}
110
- imsic_name = g_strdup_printf("/soc/imsics@%lx",
111
- (unsigned long)memmap[VIRT_IMSIC_S].base);
112
+
113
+ imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr);
114
qemu_fdt_add_subnode(ms->fdt, imsic_name);
115
- qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible",
116
- "riscv,imsics");
117
+ qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics");
118
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
119
- FDT_IMSIC_INT_CELLS);
120
- qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller",
121
- NULL, 0);
122
- qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller",
123
- NULL, 0);
124
+ FDT_IMSIC_INT_CELLS);
125
+ qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0);
126
+ qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0);
127
qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
128
- imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
129
+ imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
130
qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
131
- socket_count * sizeof(uint32_t) * 4);
132
+ socket_count * sizeof(uint32_t) * 4);
133
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids",
134
- VIRT_IRQCHIP_NUM_MSIS);
135
+ VIRT_IRQCHIP_NUM_MSIS);
136
+
137
if (imsic_guest_bits) {
138
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,guest-index-bits",
139
- imsic_guest_bits);
140
+ imsic_guest_bits);
141
}
142
+
143
if (socket_count > 1) {
144
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits",
145
- imsic_num_bits(imsic_max_hart_per_socket));
146
+ imsic_num_bits(imsic_max_hart_per_socket));
147
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits",
148
- imsic_num_bits(socket_count));
149
+ imsic_num_bits(socket_count));
150
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift",
151
- IMSIC_MMIO_GROUP_MIN_SHIFT);
152
+ IMSIC_MMIO_GROUP_MIN_SHIFT);
153
}
154
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_s_phandle);
155
- g_free(imsic_name);
156
+ qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle);
157
158
+ g_free(imsic_name);
159
g_free(imsic_regs);
160
g_free(imsic_cells);
314
}
161
}
315
162
316
-int load_uimage_as(const char *filename, hwaddr *ep, hwaddr *loadaddr,
163
-static void create_fdt_socket_aplic(RISCVVirtState *s,
317
- int *is_linux,
164
- const MemMapEntry *memmap, int socket,
318
- uint64_t (*translate_fn)(void *, uint64_t),
165
- uint32_t msi_m_phandle,
319
- void *translate_opaque, AddressSpace *as)
166
- uint32_t msi_s_phandle,
320
+ssize_t load_uimage_as(const char *filename, hwaddr *ep, hwaddr *loadaddr,
167
- uint32_t *phandle,
321
+ int *is_linux,
168
- uint32_t *intc_phandles,
322
+ uint64_t (*translate_fn)(void *, uint64_t),
169
- uint32_t *aplic_phandles)
323
+ void *translate_opaque, AddressSpace *as)
170
+static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
171
+ uint32_t *phandle, uint32_t *intc_phandles,
172
+ uint32_t *msi_m_phandle, uint32_t *msi_s_phandle)
173
+{
174
+ *msi_m_phandle = (*phandle)++;
175
+ *msi_s_phandle = (*phandle)++;
176
+
177
+ if (!kvm_enabled()) {
178
+ /* M-level IMSIC node */
179
+ create_fdt_one_imsic(s, memmap[VIRT_IMSIC_M].base, intc_phandles,
180
+ *msi_m_phandle, true, 0);
181
+ }
182
+
183
+ /* S-level IMSIC node */
184
+ create_fdt_one_imsic(s, memmap[VIRT_IMSIC_S].base, intc_phandles,
185
+ *msi_s_phandle, false,
186
+ imsic_num_bits(s->aia_guests + 1));
187
+
188
+}
189
+
190
+static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
191
+ unsigned long aplic_addr, uint32_t aplic_size,
192
+ uint32_t msi_phandle,
193
+ uint32_t *intc_phandles,
194
+ uint32_t aplic_phandle,
195
+ uint32_t aplic_child_phandle,
196
+ bool m_mode)
324
{
197
{
325
return load_uboot_image(filename, ep, loadaddr, is_linux, IH_TYPE_KERNEL,
198
int cpu;
326
translate_fn, translate_opaque, as);
199
char *aplic_name;
200
uint32_t *aplic_cells;
201
- unsigned long aplic_addr;
202
MachineState *ms = MACHINE(s);
203
- uint32_t aplic_m_phandle, aplic_s_phandle;
204
205
- aplic_m_phandle = (*phandle)++;
206
- aplic_s_phandle = (*phandle)++;
207
aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
208
209
- /* M-level APLIC node */
210
for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
211
aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
212
- aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT);
213
+ aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
214
}
215
- aplic_addr = memmap[VIRT_APLIC_M].base +
216
- (memmap[VIRT_APLIC_M].size * socket);
217
+
218
aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
219
qemu_fdt_add_subnode(ms->fdt, aplic_name);
220
qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic");
221
qemu_fdt_setprop_cell(ms->fdt, aplic_name,
222
- "#interrupt-cells", FDT_APLIC_INT_CELLS);
223
+ "#interrupt-cells", FDT_APLIC_INT_CELLS);
224
qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0);
225
+
226
if (s->aia_type == VIRT_AIA_TYPE_APLIC) {
227
qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended",
228
- aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2);
229
+ aplic_cells,
230
+ s->soc[socket].num_harts * sizeof(uint32_t) * 2);
231
} else {
232
- qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent",
233
- msi_m_phandle);
234
+ qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle);
235
}
236
+
237
qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
238
- 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_M].size);
239
+ 0x0, aplic_addr, 0x0, aplic_size);
240
qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
241
- VIRT_IRQCHIP_NUM_SOURCES);
242
- qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children",
243
- aplic_s_phandle);
244
- qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate",
245
- aplic_s_phandle, 0x1, VIRT_IRQCHIP_NUM_SOURCES);
246
+ VIRT_IRQCHIP_NUM_SOURCES);
247
+
248
+ if (aplic_child_phandle) {
249
+ qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children",
250
+ aplic_child_phandle);
251
+ qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate",
252
+ aplic_child_phandle, 0x1,
253
+ VIRT_IRQCHIP_NUM_SOURCES);
254
+ }
255
+
256
riscv_socket_fdt_write_id(ms, aplic_name, socket);
257
- qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_m_phandle);
258
+ qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_phandle);
259
+
260
g_free(aplic_name);
261
+ g_free(aplic_cells);
262
+}
263
264
- /* S-level APLIC node */
265
- for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
266
- aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
267
- aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT);
268
+static void create_fdt_socket_aplic(RISCVVirtState *s,
269
+ const MemMapEntry *memmap, int socket,
270
+ uint32_t msi_m_phandle,
271
+ uint32_t msi_s_phandle,
272
+ uint32_t *phandle,
273
+ uint32_t *intc_phandles,
274
+ uint32_t *aplic_phandles)
275
+{
276
+ char *aplic_name;
277
+ unsigned long aplic_addr;
278
+ MachineState *ms = MACHINE(s);
279
+ uint32_t aplic_m_phandle, aplic_s_phandle;
280
+
281
+ aplic_m_phandle = (*phandle)++;
282
+ aplic_s_phandle = (*phandle)++;
283
+
284
+ if (!kvm_enabled()) {
285
+ /* M-level APLIC node */
286
+ aplic_addr = memmap[VIRT_APLIC_M].base +
287
+ (memmap[VIRT_APLIC_M].size * socket);
288
+ create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size,
289
+ msi_m_phandle, intc_phandles,
290
+ aplic_m_phandle, aplic_s_phandle,
291
+ true);
292
}
293
+
294
+ /* S-level APLIC node */
295
aplic_addr = memmap[VIRT_APLIC_S].base +
296
(memmap[VIRT_APLIC_S].size * socket);
297
+ create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size,
298
+ msi_s_phandle, intc_phandles,
299
+ aplic_s_phandle, 0,
300
+ false);
301
+
302
aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
303
- qemu_fdt_add_subnode(ms->fdt, aplic_name);
304
- qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic");
305
- qemu_fdt_setprop_cell(ms->fdt, aplic_name,
306
- "#interrupt-cells", FDT_APLIC_INT_CELLS);
307
- qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0);
308
- if (s->aia_type == VIRT_AIA_TYPE_APLIC) {
309
- qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended",
310
- aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2);
311
- } else {
312
- qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent",
313
- msi_s_phandle);
314
- }
315
- qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
316
- 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_S].size);
317
- qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
318
- VIRT_IRQCHIP_NUM_SOURCES);
319
- riscv_socket_fdt_write_id(ms, aplic_name, socket);
320
- qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_s_phandle);
321
322
if (!socket) {
323
platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name,
324
@@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
325
326
g_free(aplic_name);
327
328
- g_free(aplic_cells);
329
aplic_phandles[socket] = aplic_s_phandle;
327
}
330
}
328
331
329
/* Load a ramdisk. */
332
@@ -XXX,XX +XXX,XX @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests,
330
-int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz)
333
int i;
331
+ssize_t load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz)
334
hwaddr addr;
332
{
335
uint32_t guest_bits;
333
return load_ramdisk_as(filename, addr, max_sz, NULL);
336
- DeviceState *aplic_m;
337
- bool msimode = (aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) ? true : false;
338
+ DeviceState *aplic_s = NULL;
339
+ DeviceState *aplic_m = NULL;
340
+ bool msimode = aia_type == VIRT_AIA_TYPE_APLIC_IMSIC;
341
342
if (msimode) {
343
- /* Per-socket M-level IMSICs */
344
- addr = memmap[VIRT_IMSIC_M].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE;
345
- for (i = 0; i < hart_count; i++) {
346
- riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
347
- base_hartid + i, true, 1,
348
- VIRT_IRQCHIP_NUM_MSIS);
349
+ if (!kvm_enabled()) {
350
+ /* Per-socket M-level IMSICs */
351
+ addr = memmap[VIRT_IMSIC_M].base +
352
+ socket * VIRT_IMSIC_GROUP_MAX_SIZE;
353
+ for (i = 0; i < hart_count; i++) {
354
+ riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
355
+ base_hartid + i, true, 1,
356
+ VIRT_IRQCHIP_NUM_MSIS);
357
+ }
358
}
359
360
/* Per-socket S-level IMSICs */
361
@@ -XXX,XX +XXX,XX @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests,
362
}
363
}
364
365
- /* Per-socket M-level APLIC */
366
- aplic_m = riscv_aplic_create(
367
- memmap[VIRT_APLIC_M].base + socket * memmap[VIRT_APLIC_M].size,
368
- memmap[VIRT_APLIC_M].size,
369
- (msimode) ? 0 : base_hartid,
370
- (msimode) ? 0 : hart_count,
371
- VIRT_IRQCHIP_NUM_SOURCES,
372
- VIRT_IRQCHIP_NUM_PRIO_BITS,
373
- msimode, true, NULL);
374
-
375
- if (aplic_m) {
376
- /* Per-socket S-level APLIC */
377
- riscv_aplic_create(
378
- memmap[VIRT_APLIC_S].base + socket * memmap[VIRT_APLIC_S].size,
379
- memmap[VIRT_APLIC_S].size,
380
- (msimode) ? 0 : base_hartid,
381
- (msimode) ? 0 : hart_count,
382
- VIRT_IRQCHIP_NUM_SOURCES,
383
- VIRT_IRQCHIP_NUM_PRIO_BITS,
384
- msimode, false, aplic_m);
385
+ if (!kvm_enabled()) {
386
+ /* Per-socket M-level APLIC */
387
+ aplic_m = riscv_aplic_create(memmap[VIRT_APLIC_M].base +
388
+ socket * memmap[VIRT_APLIC_M].size,
389
+ memmap[VIRT_APLIC_M].size,
390
+ (msimode) ? 0 : base_hartid,
391
+ (msimode) ? 0 : hart_count,
392
+ VIRT_IRQCHIP_NUM_SOURCES,
393
+ VIRT_IRQCHIP_NUM_PRIO_BITS,
394
+ msimode, true, NULL);
395
}
396
397
- return aplic_m;
398
+ /* Per-socket S-level APLIC */
399
+ aplic_s = riscv_aplic_create(memmap[VIRT_APLIC_S].base +
400
+ socket * memmap[VIRT_APLIC_S].size,
401
+ memmap[VIRT_APLIC_S].size,
402
+ (msimode) ? 0 : base_hartid,
403
+ (msimode) ? 0 : hart_count,
404
+ VIRT_IRQCHIP_NUM_SOURCES,
405
+ VIRT_IRQCHIP_NUM_PRIO_BITS,
406
+ msimode, false, aplic_m);
407
+
408
+ return kvm_enabled() ? aplic_s : aplic_m;
334
}
409
}
335
410
336
-int load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz,
411
static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip)
337
- AddressSpace *as)
338
+ssize_t load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz,
339
+ AddressSpace *as)
340
{
341
return load_uboot_image(filename, NULL, &addr, NULL, IH_TYPE_RAMDISK,
342
NULL, NULL, as);
343
}
344
345
/* Load a gzip-compressed kernel to a dynamically allocated buffer. */
346
-int load_image_gzipped_buffer(const char *filename, uint64_t max_sz,
347
- uint8_t **buffer)
348
+ssize_t load_image_gzipped_buffer(const char *filename, uint64_t max_sz,
349
+ uint8_t **buffer)
350
{
351
uint8_t *compressed_data = NULL;
352
uint8_t *data = NULL;
353
@@ -XXX,XX +XXX,XX @@ int load_image_gzipped_buffer(const char *filename, uint64_t max_sz,
354
}
355
356
/* Load a gzip-compressed kernel. */
357
-int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz)
358
+ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz)
359
{
360
- int bytes;
361
+ ssize_t bytes;
362
uint8_t *data;
363
364
bytes = load_image_gzipped_buffer(filename, max_sz, &data);
365
@@ -XXX,XX +XXX,XX @@ static void *rom_set_mr(Rom *rom, Object *owner, const char *name, bool ro)
366
return data;
367
}
368
369
-int rom_add_file(const char *file, const char *fw_dir,
370
- hwaddr addr, int32_t bootindex,
371
- bool option_rom, MemoryRegion *mr,
372
- AddressSpace *as)
373
+ssize_t rom_add_file(const char *file, const char *fw_dir,
374
+ hwaddr addr, int32_t bootindex,
375
+ bool option_rom, MemoryRegion *mr,
376
+ AddressSpace *as)
377
{
378
MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
379
Rom *rom;
380
- int rc, fd = -1;
381
+ ssize_t rc;
382
+ int fd = -1;
383
char devpath[100];
384
385
if (as && mr) {
386
@@ -XXX,XX +XXX,XX @@ int rom_add_file(const char *file, const char *fw_dir,
387
lseek(fd, 0, SEEK_SET);
388
rc = read(fd, rom->data, rom->datasize);
389
if (rc != rom->datasize) {
390
- fprintf(stderr, "rom: file %-20s: read error: rc=%d (expected %zd)\n",
391
+ fprintf(stderr, "rom: file %-20s: read error: rc=%zd (expected %zd)\n",
392
rom->name, rc, rom->datasize);
393
goto err;
394
}
395
@@ -XXX,XX +XXX,XX @@ int rom_add_elf_program(const char *name, GMappedFile *mapped_file, void *data,
396
return 0;
397
}
398
399
-int rom_add_vga(const char *file)
400
+ssize_t rom_add_vga(const char *file)
401
{
402
return rom_add_file(file, "vgaroms", 0, -1, true, NULL, NULL);
403
}
404
405
-int rom_add_option(const char *file, int32_t bootindex)
406
+ssize_t rom_add_option(const char *file, int32_t bootindex)
407
{
408
return rom_add_file(file, "genroms", 0, bootindex, true, NULL, NULL);
409
}
410
@@ -XXX,XX +XXX,XX @@ out:
411
}
412
413
/* return size or -1 if error */
414
-int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as)
415
+ssize_t load_targphys_hex_as(const char *filename, hwaddr *entry,
416
+ AddressSpace *as)
417
{
418
gsize hex_blob_size;
419
gchar *hex_blob;
420
- int total_size = 0;
421
+ ssize_t total_size = 0;
422
423
if (!g_file_get_contents(filename, &hex_blob, &hex_blob_size, NULL)) {
424
return -1;
425
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
426
index XXXXXXX..XXXXXXX 100644
427
--- a/hw/i386/x86.c
428
+++ b/hw/i386/x86.c
429
@@ -XXX,XX +XXX,XX @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
430
char *filename;
431
MemoryRegion *bios, *isa_bios;
432
int bios_size, isa_bios_size;
433
- int ret;
434
+ ssize_t ret;
435
436
/* BIOS load */
437
bios_name = ms->firmware ?: default_firmware;
438
diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
439
index XXXXXXX..XXXXXXX 100644
440
--- a/hw/riscv/boot.c
441
+++ b/hw/riscv/boot.c
442
@@ -XXX,XX +XXX,XX @@ target_ulong riscv_load_firmware(const char *firmware_filename,
443
hwaddr firmware_load_addr,
444
symbol_fn_t sym_cb)
445
{
446
- uint64_t firmware_entry, firmware_size, firmware_end;
447
+ uint64_t firmware_entry, firmware_end;
448
+ ssize_t firmware_size;
449
450
if (load_elf_ram_sym(firmware_filename, NULL, NULL, NULL,
451
&firmware_entry, NULL, &firmware_end, NULL,
452
@@ -XXX,XX +XXX,XX @@ target_ulong riscv_load_kernel(const char *kernel_filename,
453
hwaddr riscv_load_initrd(const char *filename, uint64_t mem_size,
454
uint64_t kernel_entry, hwaddr *start)
455
{
456
- int size;
457
+ ssize_t size;
458
459
/*
460
* We want to put the initrd far enough into RAM that when the
461
--
412
--
462
2.36.1
413
2.41.0
diff view generated by jsdifflib
New patch
1
From: Yong-Xuan Wang <yongxuan.wang@sifive.com>
1
2
3
We check the in-kernel irqchip support when using KVM acceleration.
4
5
Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
6
Reviewed-by: Jim Shu <jim.shu@sifive.com>
7
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
8
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
9
Message-ID: <20230727102439.22554-3-yongxuan.wang@sifive.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
---
12
target/riscv/kvm.c | 10 +++++++++-
13
1 file changed, 9 insertions(+), 1 deletion(-)
14
15
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/kvm.c
18
+++ b/target/riscv/kvm.c
19
@@ -XXX,XX +XXX,XX @@ int kvm_arch_init(MachineState *ms, KVMState *s)
20
21
int kvm_arch_irqchip_create(KVMState *s)
22
{
23
- return 0;
24
+ if (kvm_kernel_irqchip_split()) {
25
+ error_report("-machine kernel_irqchip=split is not supported on RISC-V.");
26
+ exit(1);
27
+ }
28
+
29
+ /*
30
+ * We can create the VAIA using the newer device control API.
31
+ */
32
+ return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
33
}
34
35
int kvm_arch_process_async_events(CPUState *cs)
36
--
37
2.41.0
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: Yong-Xuan Wang <yongxuan.wang@sifive.com>
2
2
3
Signed-off-by: eop Chen <eop.chen@sifive.com>
3
We create a vAIA chip by using the KVM_DEV_TYPE_RISCV_AIA and then set up
4
Reviewed-by: Frank Chang <frank.chang@sifive.com>
4
the chip with the KVM_DEV_RISCV_AIA_GRP_* APIs.
5
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
5
We also extend KVM accelerator to specify the KVM AIA mode. The "riscv-aia"
6
Acked-by: Alistair Francis <alistair.francis@wdc.com>
6
parameter is passed along with --accel in QEMU command-line.
7
Message-Id: <165449614532.19704.7000832880482980398-10@git.sr.ht>
7
1) "riscv-aia=emul": IMSIC is emulated by hypervisor
8
2) "riscv-aia=hwaccel": use hardware guest IMSIC
9
3) "riscv-aia=auto": use the hardware guest IMSICs whenever available
10
otherwise we fallback to software emulation.
11
12
Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
13
Reviewed-by: Jim Shu <jim.shu@sifive.com>
14
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
15
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
16
Message-ID: <20230727102439.22554-4-yongxuan.wang@sifive.com>
8
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
17
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
9
---
18
---
10
target/riscv/vector_helper.c | 20 ++++++++++++++++++++
19
target/riscv/kvm_riscv.h | 4 +
11
target/riscv/insn_trans/trans_rvv.c.inc | 12 ++++++++----
20
target/riscv/kvm.c | 186 +++++++++++++++++++++++++++++++++++++++
12
2 files changed, 28 insertions(+), 4 deletions(-)
21
2 files changed, 190 insertions(+)
13
22
14
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
23
diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
15
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
16
--- a/target/riscv/vector_helper.c
25
--- a/target/riscv/kvm_riscv.h
17
+++ b/target/riscv/vector_helper.c
26
+++ b/target/riscv/kvm_riscv.h
18
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
27
@@ -XXX,XX +XXX,XX @@
19
uint32_t desc) \
28
void kvm_riscv_init_user_properties(Object *cpu_obj);
20
{ \
29
void kvm_riscv_reset_vcpu(RISCVCPU *cpu);
21
uint32_t vl = env->vl; \
30
void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level);
22
+ uint32_t esz = sizeof(ETYPE); \
31
+void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift,
23
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
32
+ uint64_t aia_irq_num, uint64_t aia_msi_num,
24
+ uint32_t vta = vext_vta(desc); \
33
+ uint64_t aplic_base, uint64_t imsic_base,
25
uint32_t i; \
34
+ uint64_t guest_num);
26
\
35
27
for (i = env->vstart; i < vl; i++) { \
36
#endif
28
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
37
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
29
*((ETYPE *)vd + H(i)) = s1; \
38
index XXXXXXX..XXXXXXX 100644
30
} \
39
--- a/target/riscv/kvm.c
31
env->vstart = 0; \
40
+++ b/target/riscv/kvm.c
32
+ /* set tail elements to 1s */ \
41
@@ -XXX,XX +XXX,XX @@
33
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
42
#include "exec/address-spaces.h"
43
#include "hw/boards.h"
44
#include "hw/irq.h"
45
+#include "hw/intc/riscv_imsic.h"
46
#include "qemu/log.h"
47
#include "hw/loader.h"
48
#include "kvm_riscv.h"
49
@@ -XXX,XX +XXX,XX @@
50
#include "chardev/char-fe.h"
51
#include "migration/migration.h"
52
#include "sysemu/runstate.h"
53
+#include "hw/riscv/numa.h"
54
55
static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
56
uint64_t idx)
57
@@ -XXX,XX +XXX,XX @@ bool kvm_arch_cpu_check_are_resettable(void)
58
return true;
34
}
59
}
35
60
36
GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
61
+static int aia_mode;
37
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
62
+
38
uint32_t desc) \
63
+static const char *kvm_aia_mode_str(uint64_t mode)
39
{ \
64
+{
40
uint32_t vl = env->vl; \
65
+ switch (mode) {
41
+ uint32_t esz = sizeof(ETYPE); \
66
+ case KVM_DEV_RISCV_AIA_MODE_EMUL:
42
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
67
+ return "emul";
43
+ uint32_t vta = vext_vta(desc); \
68
+ case KVM_DEV_RISCV_AIA_MODE_HWACCEL:
44
uint32_t i; \
69
+ return "hwaccel";
45
\
70
+ case KVM_DEV_RISCV_AIA_MODE_AUTO:
46
for (i = env->vstart; i < vl; i++) { \
71
+ default:
47
*((ETYPE *)vd + H(i)) = (ETYPE)s1; \
72
+ return "auto";
48
} \
73
+ };
49
env->vstart = 0; \
74
+}
50
+ /* set tail elements to 1s */ \
75
+
51
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
76
+static char *riscv_get_kvm_aia(Object *obj, Error **errp)
77
+{
78
+ return g_strdup(kvm_aia_mode_str(aia_mode));
79
+}
80
+
81
+static void riscv_set_kvm_aia(Object *obj, const char *val, Error **errp)
82
+{
83
+ if (!strcmp(val, "emul")) {
84
+ aia_mode = KVM_DEV_RISCV_AIA_MODE_EMUL;
85
+ } else if (!strcmp(val, "hwaccel")) {
86
+ aia_mode = KVM_DEV_RISCV_AIA_MODE_HWACCEL;
87
+ } else if (!strcmp(val, "auto")) {
88
+ aia_mode = KVM_DEV_RISCV_AIA_MODE_AUTO;
89
+ } else {
90
+ error_setg(errp, "Invalid KVM AIA mode");
91
+ error_append_hint(errp, "Valid values are emul, hwaccel, and auto.\n");
92
+ }
93
+}
94
+
95
void kvm_arch_accel_class_init(ObjectClass *oc)
96
{
97
+ object_class_property_add_str(oc, "riscv-aia", riscv_get_kvm_aia,
98
+ riscv_set_kvm_aia);
99
+ object_class_property_set_description(oc, "riscv-aia",
100
+ "Set KVM AIA mode. Valid values are "
101
+ "emul, hwaccel, and auto. Default "
102
+ "is auto.");
103
+ object_property_set_default_str(object_class_property_find(oc, "riscv-aia"),
104
+ "auto");
105
+}
106
+
107
+void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift,
108
+ uint64_t aia_irq_num, uint64_t aia_msi_num,
109
+ uint64_t aplic_base, uint64_t imsic_base,
110
+ uint64_t guest_num)
111
+{
112
+ int ret, i;
113
+ int aia_fd = -1;
114
+ uint64_t default_aia_mode;
115
+ uint64_t socket_count = riscv_socket_count(machine);
116
+ uint64_t max_hart_per_socket = 0;
117
+ uint64_t socket, base_hart, hart_count, socket_imsic_base, imsic_addr;
118
+ uint64_t socket_bits, hart_bits, guest_bits;
119
+
120
+ aia_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_RISCV_AIA, false);
121
+
122
+ if (aia_fd < 0) {
123
+ error_report("Unable to create in-kernel irqchip");
124
+ exit(1);
125
+ }
126
+
127
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
128
+ KVM_DEV_RISCV_AIA_CONFIG_MODE,
129
+ &default_aia_mode, false, NULL);
130
+ if (ret < 0) {
131
+ error_report("KVM AIA: failed to get current KVM AIA mode");
132
+ exit(1);
133
+ }
134
+ qemu_log("KVM AIA: default mode is %s\n",
135
+ kvm_aia_mode_str(default_aia_mode));
136
+
137
+ if (default_aia_mode != aia_mode) {
138
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
139
+ KVM_DEV_RISCV_AIA_CONFIG_MODE,
140
+ &aia_mode, true, NULL);
141
+ if (ret < 0)
142
+ warn_report("KVM AIA: failed to set KVM AIA mode");
143
+ else
144
+ qemu_log("KVM AIA: set current mode to %s\n",
145
+ kvm_aia_mode_str(aia_mode));
146
+ }
147
+
148
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
149
+ KVM_DEV_RISCV_AIA_CONFIG_SRCS,
150
+ &aia_irq_num, true, NULL);
151
+ if (ret < 0) {
152
+ error_report("KVM AIA: failed to set number of input irq lines");
153
+ exit(1);
154
+ }
155
+
156
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
157
+ KVM_DEV_RISCV_AIA_CONFIG_IDS,
158
+ &aia_msi_num, true, NULL);
159
+ if (ret < 0) {
160
+ error_report("KVM AIA: failed to set number of msi");
161
+ exit(1);
162
+ }
163
+
164
+ socket_bits = find_last_bit(&socket_count, BITS_PER_LONG) + 1;
165
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
166
+ KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS,
167
+ &socket_bits, true, NULL);
168
+ if (ret < 0) {
169
+ error_report("KVM AIA: failed to set group_bits");
170
+ exit(1);
171
+ }
172
+
173
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
174
+ KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT,
175
+ &group_shift, true, NULL);
176
+ if (ret < 0) {
177
+ error_report("KVM AIA: failed to set group_shift");
178
+ exit(1);
179
+ }
180
+
181
+ guest_bits = guest_num == 0 ? 0 :
182
+ find_last_bit(&guest_num, BITS_PER_LONG) + 1;
183
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
184
+ KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS,
185
+ &guest_bits, true, NULL);
186
+ if (ret < 0) {
187
+ error_report("KVM AIA: failed to set guest_bits");
188
+ exit(1);
189
+ }
190
+
191
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR,
192
+ KVM_DEV_RISCV_AIA_ADDR_APLIC,
193
+ &aplic_base, true, NULL);
194
+ if (ret < 0) {
195
+ error_report("KVM AIA: failed to set the base address of APLIC");
196
+ exit(1);
197
+ }
198
+
199
+ for (socket = 0; socket < socket_count; socket++) {
200
+ socket_imsic_base = imsic_base + socket * (1U << group_shift);
201
+ hart_count = riscv_socket_hart_count(machine, socket);
202
+ base_hart = riscv_socket_first_hartid(machine, socket);
203
+
204
+ if (max_hart_per_socket < hart_count) {
205
+ max_hart_per_socket = hart_count;
206
+ }
207
+
208
+ for (i = 0; i < hart_count; i++) {
209
+ imsic_addr = socket_imsic_base + i * IMSIC_HART_SIZE(guest_bits);
210
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR,
211
+ KVM_DEV_RISCV_AIA_ADDR_IMSIC(i + base_hart),
212
+ &imsic_addr, true, NULL);
213
+ if (ret < 0) {
214
+ error_report("KVM AIA: failed to set the IMSIC address for hart %d", i);
215
+ exit(1);
216
+ }
217
+ }
218
+ }
219
+
220
+ hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1;
221
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
222
+ KVM_DEV_RISCV_AIA_CONFIG_HART_BITS,
223
+ &hart_bits, true, NULL);
224
+ if (ret < 0) {
225
+ error_report("KVM AIA: failed to set hart_bits");
226
+ exit(1);
227
+ }
228
+
229
+ if (kvm_has_gsi_routing()) {
230
+ for (uint64_t idx = 0; idx < aia_irq_num + 1; ++idx) {
231
+ /* KVM AIA only has one APLIC instance */
232
+ kvm_irqchip_add_irq_route(kvm_state, idx, 0, idx);
233
+ }
234
+ kvm_gsi_routing_allowed = true;
235
+ kvm_irqchip_commit_routes(kvm_state);
236
+ }
237
+
238
+ ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CTRL,
239
+ KVM_DEV_RISCV_AIA_CTRL_INIT,
240
+ NULL, true, NULL);
241
+ if (ret < 0) {
242
+ error_report("KVM AIA: initialized fail");
243
+ exit(1);
244
+ }
245
+
246
+ kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled();
52
}
247
}
53
54
GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
55
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
56
CPURISCVState *env, uint32_t desc) \
57
{ \
58
uint32_t vl = env->vl; \
59
+ uint32_t esz = sizeof(ETYPE); \
60
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
61
+ uint32_t vta = vext_vta(desc); \
62
uint32_t i; \
63
\
64
for (i = env->vstart; i < vl; i++) { \
65
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
66
*((ETYPE *)vd + H(i)) = *(vt + H(i)); \
67
} \
68
env->vstart = 0; \
69
+ /* set tail elements to 1s */ \
70
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
71
}
72
73
GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
74
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
75
void *vs2, CPURISCVState *env, uint32_t desc) \
76
{ \
77
uint32_t vl = env->vl; \
78
+ uint32_t esz = sizeof(ETYPE); \
79
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
80
+ uint32_t vta = vext_vta(desc); \
81
uint32_t i; \
82
\
83
for (i = env->vstart; i < vl; i++) { \
84
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
85
*((ETYPE *)vd + H(i)) = d; \
86
} \
87
env->vstart = 0; \
88
+ /* set tail elements to 1s */ \
89
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
90
}
91
92
GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
93
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
94
index XXXXXXX..XXXXXXX 100644
95
--- a/target/riscv/insn_trans/trans_rvv.c.inc
96
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
97
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
98
vext_check_isa_ill(s) &&
99
/* vmv.v.v has rs2 = 0 and vm = 1 */
100
vext_check_sss(s, a->rd, a->rs1, 0, 1)) {
101
- if (s->vl_eq_vlmax) {
102
+ if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
103
tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd),
104
vreg_ofs(s, a->rs1),
105
MAXSZ(s), MAXSZ(s));
106
} else {
107
uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
108
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
109
static gen_helper_gvec_2_ptr * const fns[4] = {
110
gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h,
111
gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d,
112
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
113
114
s1 = get_gpr(s, a->rs1, EXT_SIGN);
115
116
- if (s->vl_eq_vlmax) {
117
+ if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
118
tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd),
119
MAXSZ(s), MAXSZ(s), s1);
120
} else {
121
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
122
TCGv_i64 s1_i64 = tcg_temp_new_i64();
123
TCGv_ptr dest = tcg_temp_new_ptr();
124
uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
125
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
126
static gen_helper_vmv_vx * const fns[4] = {
127
gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
128
gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
129
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
130
/* vmv.v.i has rs2 = 0 and vm = 1 */
131
vext_check_ss(s, a->rd, 0, 1)) {
132
int64_t simm = sextract64(a->rs1, 0, 5);
133
- if (s->vl_eq_vlmax) {
134
+ if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
135
tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd),
136
MAXSZ(s), MAXSZ(s), simm);
137
mark_vs_dirty(s);
138
@@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
139
TCGv_i64 s1;
140
TCGv_ptr dest;
141
uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
142
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
143
static gen_helper_vmv_vx * const fns[4] = {
144
gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
145
gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
146
@@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
147
148
TCGv_i64 t1;
149
150
- if (s->vl_eq_vlmax) {
151
+ if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
152
t1 = tcg_temp_new_i64();
153
/* NaN-box f[rs1] */
154
do_nanbox(s, t1, cpu_fpr[a->rs1]);
155
@@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
156
TCGv_ptr dest;
157
TCGv_i32 desc;
158
uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
159
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
160
static gen_helper_vmv_vx * const fns[3] = {
161
gen_helper_vmv_v_x_h,
162
gen_helper_vmv_v_x_w,
163
--
248
--
164
2.36.1
249
2.41.0
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: Yong-Xuan Wang <yongxuan.wang@sifive.com>
2
2
3
`vmadc` and `vmsbc` produces a mask value, they always operate with
3
KVM AIA can't emulate APLIC only. When "aia=aplic" parameter is passed,
4
a tail agnostic policy.
4
APLIC devices is emulated by QEMU. For "aia=aplic-imsic", remove the
5
mmio operations of APLIC when using KVM AIA and send wired interrupt
6
signal via KVM_IRQ_LINE API.
7
After KVM AIA enabled, MSI messages are delivered by KVM_SIGNAL_MSI API
8
when the IMSICs receive mmio write requests.
5
9
6
Signed-off-by: eop Chen <eop.chen@sifive.com>
10
Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
7
Reviewed-by: Frank Chang <frank.chang@sifive.com>
11
Reviewed-by: Jim Shu <jim.shu@sifive.com>
8
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
12
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
9
Acked-by: Alistair Francis <alistair.francis@wdc.com>
13
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
10
Message-Id: <165449614532.19704.7000832880482980398-7@git.sr.ht>
14
Message-ID: <20230727102439.22554-5-yongxuan.wang@sifive.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
15
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
---
16
---
13
target/riscv/internals.h | 5 +-
17
hw/intc/riscv_aplic.c | 56 ++++++++++++++++++++++++++++++-------------
14
target/riscv/vector_helper.c | 314 +++++++++++++-----------
18
hw/intc/riscv_imsic.c | 25 +++++++++++++++----
15
target/riscv/insn_trans/trans_rvv.c.inc | 13 +-
19
2 files changed, 61 insertions(+), 20 deletions(-)
16
3 files changed, 190 insertions(+), 142 deletions(-)
17
20
18
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
21
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
19
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
20
--- a/target/riscv/internals.h
23
--- a/hw/intc/riscv_aplic.c
21
+++ b/target/riscv/internals.h
24
+++ b/hw/intc/riscv_aplic.c
22
@@ -XXX,XX +XXX,XX @@
25
@@ -XXX,XX +XXX,XX @@
23
FIELD(VDATA, VM, 0, 1)
26
#include "hw/irq.h"
24
FIELD(VDATA, LMUL, 1, 3)
27
#include "target/riscv/cpu.h"
25
FIELD(VDATA, VTA, 4, 1)
28
#include "sysemu/sysemu.h"
26
-FIELD(VDATA, NF, 5, 4)
29
+#include "sysemu/kvm.h"
27
-FIELD(VDATA, WD, 5, 1)
30
#include "migration/vmstate.h"
28
+FIELD(VDATA, VTA_ALL_1S, 5, 1)
31
29
+FIELD(VDATA, NF, 6, 4)
32
#define APLIC_MAX_IDC (1UL << 14)
30
+FIELD(VDATA, WD, 6, 1)
33
@@ -XXX,XX +XXX,XX @@
31
34
32
/* float point classify helpers */
35
#define APLIC_IDC_CLAIMI 0x1c
33
target_ulong fclass_h(uint64_t frs1);
36
34
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
37
+/*
35
index XXXXXXX..XXXXXXX 100644
38
+ * KVM AIA only supports APLIC MSI, fallback to QEMU emulation if we want to use
36
--- a/target/riscv/vector_helper.c
39
+ * APLIC Wired.
37
+++ b/target/riscv/vector_helper.c
40
+ */
38
@@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_vta(uint32_t desc)
41
+static bool is_kvm_aia(bool msimode)
39
return FIELD_EX32(simd_data(desc), VDATA, VTA);
40
}
41
42
+static inline uint32_t vext_vta_all_1s(uint32_t desc)
43
+{
42
+{
44
+ return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
43
+ return kvm_irqchip_in_kernel() && msimode;
45
+}
44
+}
46
+
45
+
47
/*
46
static uint32_t riscv_aplic_read_input_word(RISCVAPLICState *aplic,
48
* Get the maximum number of elements can be operated.
47
uint32_t word)
49
*
50
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
51
52
static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
53
CPURISCVState *env, uint32_t desc,
54
- opivx2_fn fn)
55
+ opivx2_fn fn, uint32_t esz)
56
{
48
{
57
uint32_t vm = vext_vm(desc);
49
@@ -XXX,XX +XXX,XX @@ static uint32_t riscv_aplic_idc_claimi(RISCVAPLICState *aplic, uint32_t idc)
58
uint32_t vl = env->vl;
50
return topi;
59
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
60
+ uint32_t vta = vext_vta(desc);
61
uint32_t i;
62
63
for (i = env->vstart; i < vl; i++) {
64
@@ -XXX,XX +XXX,XX @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
65
fn(vd, s1, vs2, i);
66
}
67
env->vstart = 0;
68
+ /* set tail elements to 1s */
69
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
70
}
51
}
71
52
72
/* generate the helpers for OPIVX */
53
+static void riscv_kvm_aplic_request(void *opaque, int irq, int level)
73
-#define GEN_VEXT_VX(NAME) \
54
+{
74
+#define GEN_VEXT_VX(NAME, ESZ) \
55
+ kvm_set_irq(kvm_state, irq, !!level);
75
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
76
void *vs2, CPURISCVState *env, \
77
uint32_t desc) \
78
{ \
79
do_vext_vx(vd, v0, s1, vs2, env, desc, \
80
- do_##NAME); \
81
-}
82
-
83
-GEN_VEXT_VX(vadd_vx_b)
84
-GEN_VEXT_VX(vadd_vx_h)
85
-GEN_VEXT_VX(vadd_vx_w)
86
-GEN_VEXT_VX(vadd_vx_d)
87
-GEN_VEXT_VX(vsub_vx_b)
88
-GEN_VEXT_VX(vsub_vx_h)
89
-GEN_VEXT_VX(vsub_vx_w)
90
-GEN_VEXT_VX(vsub_vx_d)
91
-GEN_VEXT_VX(vrsub_vx_b)
92
-GEN_VEXT_VX(vrsub_vx_h)
93
-GEN_VEXT_VX(vrsub_vx_w)
94
-GEN_VEXT_VX(vrsub_vx_d)
95
+ do_##NAME, ESZ); \
96
+}
56
+}
97
+
57
+
98
+GEN_VEXT_VX(vadd_vx_b, 1)
58
static void riscv_aplic_request(void *opaque, int irq, int level)
99
+GEN_VEXT_VX(vadd_vx_h, 2)
100
+GEN_VEXT_VX(vadd_vx_w, 4)
101
+GEN_VEXT_VX(vadd_vx_d, 8)
102
+GEN_VEXT_VX(vsub_vx_b, 1)
103
+GEN_VEXT_VX(vsub_vx_h, 2)
104
+GEN_VEXT_VX(vsub_vx_w, 4)
105
+GEN_VEXT_VX(vsub_vx_d, 8)
106
+GEN_VEXT_VX(vrsub_vx_b, 1)
107
+GEN_VEXT_VX(vrsub_vx_h, 2)
108
+GEN_VEXT_VX(vrsub_vx_w, 4)
109
+GEN_VEXT_VX(vrsub_vx_d, 8)
110
111
void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
112
{
59
{
113
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
60
bool update = false;
114
RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
61
@@ -XXX,XX +XXX,XX @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp)
115
RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
62
uint32_t i;
116
RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
63
RISCVAPLICState *aplic = RISCV_APLIC(dev);
117
-GEN_VEXT_VX(vwaddu_vx_b)
64
118
-GEN_VEXT_VX(vwaddu_vx_h)
65
- aplic->bitfield_words = (aplic->num_irqs + 31) >> 5;
119
-GEN_VEXT_VX(vwaddu_vx_w)
66
- aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs);
120
-GEN_VEXT_VX(vwsubu_vx_b)
67
- aplic->state = g_new0(uint32_t, aplic->num_irqs);
121
-GEN_VEXT_VX(vwsubu_vx_h)
68
- aplic->target = g_new0(uint32_t, aplic->num_irqs);
122
-GEN_VEXT_VX(vwsubu_vx_w)
69
- if (!aplic->msimode) {
123
-GEN_VEXT_VX(vwadd_vx_b)
70
- for (i = 0; i < aplic->num_irqs; i++) {
124
-GEN_VEXT_VX(vwadd_vx_h)
71
- aplic->target[i] = 1;
125
-GEN_VEXT_VX(vwadd_vx_w)
72
+ if (!is_kvm_aia(aplic->msimode)) {
126
-GEN_VEXT_VX(vwsub_vx_b)
73
+ aplic->bitfield_words = (aplic->num_irqs + 31) >> 5;
127
-GEN_VEXT_VX(vwsub_vx_h)
74
+ aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs);
128
-GEN_VEXT_VX(vwsub_vx_w)
75
+ aplic->state = g_new0(uint32_t, aplic->num_irqs);
129
-GEN_VEXT_VX(vwaddu_wx_b)
76
+ aplic->target = g_new0(uint32_t, aplic->num_irqs);
130
-GEN_VEXT_VX(vwaddu_wx_h)
77
+ if (!aplic->msimode) {
131
-GEN_VEXT_VX(vwaddu_wx_w)
78
+ for (i = 0; i < aplic->num_irqs; i++) {
132
-GEN_VEXT_VX(vwsubu_wx_b)
79
+ aplic->target[i] = 1;
133
-GEN_VEXT_VX(vwsubu_wx_h)
80
+ }
134
-GEN_VEXT_VX(vwsubu_wx_w)
81
}
135
-GEN_VEXT_VX(vwadd_wx_b)
82
- }
136
-GEN_VEXT_VX(vwadd_wx_h)
83
- aplic->idelivery = g_new0(uint32_t, aplic->num_harts);
137
-GEN_VEXT_VX(vwadd_wx_w)
84
- aplic->iforce = g_new0(uint32_t, aplic->num_harts);
138
-GEN_VEXT_VX(vwsub_wx_b)
85
- aplic->ithreshold = g_new0(uint32_t, aplic->num_harts);
139
-GEN_VEXT_VX(vwsub_wx_h)
86
+ aplic->idelivery = g_new0(uint32_t, aplic->num_harts);
140
-GEN_VEXT_VX(vwsub_wx_w)
87
+ aplic->iforce = g_new0(uint32_t, aplic->num_harts);
141
+GEN_VEXT_VX(vwaddu_vx_b, 2)
88
+ aplic->ithreshold = g_new0(uint32_t, aplic->num_harts);
142
+GEN_VEXT_VX(vwaddu_vx_h, 4)
89
143
+GEN_VEXT_VX(vwaddu_vx_w, 8)
90
- memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops, aplic,
144
+GEN_VEXT_VX(vwsubu_vx_b, 2)
91
- TYPE_RISCV_APLIC, aplic->aperture_size);
145
+GEN_VEXT_VX(vwsubu_vx_h, 4)
92
- sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio);
146
+GEN_VEXT_VX(vwsubu_vx_w, 8)
93
+ memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops,
147
+GEN_VEXT_VX(vwadd_vx_b, 2)
94
+ aplic, TYPE_RISCV_APLIC, aplic->aperture_size);
148
+GEN_VEXT_VX(vwadd_vx_h, 4)
95
+ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio);
149
+GEN_VEXT_VX(vwadd_vx_w, 8)
96
+ }
150
+GEN_VEXT_VX(vwsub_vx_b, 2)
97
151
+GEN_VEXT_VX(vwsub_vx_h, 4)
98
/*
152
+GEN_VEXT_VX(vwsub_vx_w, 8)
99
* Only root APLICs have hardware IRQ lines. All non-root APLICs
153
+GEN_VEXT_VX(vwaddu_wx_b, 2)
100
* have IRQ lines delegated by their parent APLIC.
154
+GEN_VEXT_VX(vwaddu_wx_h, 4)
101
*/
155
+GEN_VEXT_VX(vwaddu_wx_w, 8)
102
if (!aplic->parent) {
156
+GEN_VEXT_VX(vwsubu_wx_b, 2)
103
- qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs);
157
+GEN_VEXT_VX(vwsubu_wx_h, 4)
104
+ if (is_kvm_aia(aplic->msimode)) {
158
+GEN_VEXT_VX(vwsubu_wx_w, 8)
105
+ qdev_init_gpio_in(dev, riscv_kvm_aplic_request, aplic->num_irqs);
159
+GEN_VEXT_VX(vwadd_wx_b, 2)
106
+ } else {
160
+GEN_VEXT_VX(vwadd_wx_h, 4)
107
+ qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs);
161
+GEN_VEXT_VX(vwadd_wx_w, 8)
108
+ }
162
+GEN_VEXT_VX(vwsub_wx_b, 2)
109
}
163
+GEN_VEXT_VX(vwsub_wx_h, 4)
110
164
+GEN_VEXT_VX(vwsub_wx_w, 8)
111
/* Create output IRQ lines for non-MSI mode */
165
112
@@ -XXX,XX +XXX,XX @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size,
166
/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
113
qdev_prop_set_bit(dev, "mmode", mmode);
167
#define DO_VADC(N, M, C) (N + M + C)
114
168
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
115
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
169
CPURISCVState *env, uint32_t desc) \
116
- sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
170
{ \
117
+
171
uint32_t vl = env->vl; \
118
+ if (!is_kvm_aia(msimode)) {
172
+ uint32_t esz = sizeof(ETYPE); \
119
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
173
+ uint32_t total_elems = \
120
+ }
174
+ vext_get_total_elems(env, desc, esz); \
121
175
+ uint32_t vta = vext_vta(desc); \
122
if (parent) {
176
uint32_t i; \
123
riscv_aplic_add_child(parent, dev);
177
\
124
diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c
178
for (i = env->vstart; i < vl; i++) { \
179
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
180
*((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
181
} \
182
env->vstart = 0; \
183
+ /* set tail elements to 1s */ \
184
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
185
}
186
187
GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
188
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
189
CPURISCVState *env, uint32_t desc) \
190
{ \
191
uint32_t vl = env->vl; \
192
+ uint32_t esz = sizeof(ETYPE); \
193
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
194
+ uint32_t vta = vext_vta(desc); \
195
uint32_t i; \
196
\
197
for (i = env->vstart; i < vl; i++) { \
198
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
199
*((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
200
} \
201
env->vstart = 0; \
202
+ /* set tail elements to 1s */ \
203
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
204
}
205
206
GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
207
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
208
{ \
209
uint32_t vl = env->vl; \
210
uint32_t vm = vext_vm(desc); \
211
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
212
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
213
uint32_t i; \
214
\
215
for (i = env->vstart; i < vl; i++) { \
216
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
217
vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
218
} \
219
env->vstart = 0; \
220
+ /* mask destination register are always tail-agnostic */ \
221
+ /* set tail elements to 1s */ \
222
+ if (vta_all_1s) { \
223
+ for (; i < total_elems; i++) { \
224
+ vext_set_elem_mask(vd, i, 1); \
225
+ } \
226
+ } \
227
}
228
229
GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
230
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
231
{ \
232
uint32_t vl = env->vl; \
233
uint32_t vm = vext_vm(desc); \
234
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
235
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
236
uint32_t i; \
237
\
238
for (i = env->vstart; i < vl; i++) { \
239
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
240
DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
241
} \
242
env->vstart = 0; \
243
+ /* mask destination register are always tail-agnostic */ \
244
+ /* set tail elements to 1s */ \
245
+ if (vta_all_1s) { \
246
+ for (; i < total_elems; i++) { \
247
+ vext_set_elem_mask(vd, i, 1); \
248
+ } \
249
+ } \
250
}
251
252
GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
253
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
254
RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
255
RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
256
RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
257
-GEN_VEXT_VX(vand_vx_b)
258
-GEN_VEXT_VX(vand_vx_h)
259
-GEN_VEXT_VX(vand_vx_w)
260
-GEN_VEXT_VX(vand_vx_d)
261
-GEN_VEXT_VX(vor_vx_b)
262
-GEN_VEXT_VX(vor_vx_h)
263
-GEN_VEXT_VX(vor_vx_w)
264
-GEN_VEXT_VX(vor_vx_d)
265
-GEN_VEXT_VX(vxor_vx_b)
266
-GEN_VEXT_VX(vxor_vx_h)
267
-GEN_VEXT_VX(vxor_vx_w)
268
-GEN_VEXT_VX(vxor_vx_d)
269
+GEN_VEXT_VX(vand_vx_b, 1)
270
+GEN_VEXT_VX(vand_vx_h, 2)
271
+GEN_VEXT_VX(vand_vx_w, 4)
272
+GEN_VEXT_VX(vand_vx_d, 8)
273
+GEN_VEXT_VX(vor_vx_b, 1)
274
+GEN_VEXT_VX(vor_vx_h, 2)
275
+GEN_VEXT_VX(vor_vx_w, 4)
276
+GEN_VEXT_VX(vor_vx_d, 8)
277
+GEN_VEXT_VX(vxor_vx_b, 1)
278
+GEN_VEXT_VX(vxor_vx_h, 2)
279
+GEN_VEXT_VX(vxor_vx_w, 4)
280
+GEN_VEXT_VX(vxor_vx_d, 8)
281
282
/* Vector Single-Width Bit Shift Instructions */
283
#define DO_SLL(N, M) (N << (M))
284
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
285
RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
286
RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
287
RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
288
-GEN_VEXT_VX(vminu_vx_b)
289
-GEN_VEXT_VX(vminu_vx_h)
290
-GEN_VEXT_VX(vminu_vx_w)
291
-GEN_VEXT_VX(vminu_vx_d)
292
-GEN_VEXT_VX(vmin_vx_b)
293
-GEN_VEXT_VX(vmin_vx_h)
294
-GEN_VEXT_VX(vmin_vx_w)
295
-GEN_VEXT_VX(vmin_vx_d)
296
-GEN_VEXT_VX(vmaxu_vx_b)
297
-GEN_VEXT_VX(vmaxu_vx_h)
298
-GEN_VEXT_VX(vmaxu_vx_w)
299
-GEN_VEXT_VX(vmaxu_vx_d)
300
-GEN_VEXT_VX(vmax_vx_b)
301
-GEN_VEXT_VX(vmax_vx_h)
302
-GEN_VEXT_VX(vmax_vx_w)
303
-GEN_VEXT_VX(vmax_vx_d)
304
+GEN_VEXT_VX(vminu_vx_b, 1)
305
+GEN_VEXT_VX(vminu_vx_h, 2)
306
+GEN_VEXT_VX(vminu_vx_w, 4)
307
+GEN_VEXT_VX(vminu_vx_d, 8)
308
+GEN_VEXT_VX(vmin_vx_b, 1)
309
+GEN_VEXT_VX(vmin_vx_h, 2)
310
+GEN_VEXT_VX(vmin_vx_w, 4)
311
+GEN_VEXT_VX(vmin_vx_d, 8)
312
+GEN_VEXT_VX(vmaxu_vx_b, 1)
313
+GEN_VEXT_VX(vmaxu_vx_h, 2)
314
+GEN_VEXT_VX(vmaxu_vx_w, 4)
315
+GEN_VEXT_VX(vmaxu_vx_d, 8)
316
+GEN_VEXT_VX(vmax_vx_b, 1)
317
+GEN_VEXT_VX(vmax_vx_h, 2)
318
+GEN_VEXT_VX(vmax_vx_w, 4)
319
+GEN_VEXT_VX(vmax_vx_d, 8)
320
321
/* Vector Single-Width Integer Multiply Instructions */
322
#define DO_MUL(N, M) (N * M)
323
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
324
RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
325
RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
326
RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
327
-GEN_VEXT_VX(vmul_vx_b)
328
-GEN_VEXT_VX(vmul_vx_h)
329
-GEN_VEXT_VX(vmul_vx_w)
330
-GEN_VEXT_VX(vmul_vx_d)
331
-GEN_VEXT_VX(vmulh_vx_b)
332
-GEN_VEXT_VX(vmulh_vx_h)
333
-GEN_VEXT_VX(vmulh_vx_w)
334
-GEN_VEXT_VX(vmulh_vx_d)
335
-GEN_VEXT_VX(vmulhu_vx_b)
336
-GEN_VEXT_VX(vmulhu_vx_h)
337
-GEN_VEXT_VX(vmulhu_vx_w)
338
-GEN_VEXT_VX(vmulhu_vx_d)
339
-GEN_VEXT_VX(vmulhsu_vx_b)
340
-GEN_VEXT_VX(vmulhsu_vx_h)
341
-GEN_VEXT_VX(vmulhsu_vx_w)
342
-GEN_VEXT_VX(vmulhsu_vx_d)
343
+GEN_VEXT_VX(vmul_vx_b, 1)
344
+GEN_VEXT_VX(vmul_vx_h, 2)
345
+GEN_VEXT_VX(vmul_vx_w, 4)
346
+GEN_VEXT_VX(vmul_vx_d, 8)
347
+GEN_VEXT_VX(vmulh_vx_b, 1)
348
+GEN_VEXT_VX(vmulh_vx_h, 2)
349
+GEN_VEXT_VX(vmulh_vx_w, 4)
350
+GEN_VEXT_VX(vmulh_vx_d, 8)
351
+GEN_VEXT_VX(vmulhu_vx_b, 1)
352
+GEN_VEXT_VX(vmulhu_vx_h, 2)
353
+GEN_VEXT_VX(vmulhu_vx_w, 4)
354
+GEN_VEXT_VX(vmulhu_vx_d, 8)
355
+GEN_VEXT_VX(vmulhsu_vx_b, 1)
356
+GEN_VEXT_VX(vmulhsu_vx_h, 2)
357
+GEN_VEXT_VX(vmulhsu_vx_w, 4)
358
+GEN_VEXT_VX(vmulhsu_vx_d, 8)
359
360
/* Vector Integer Divide Instructions */
361
#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
362
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
363
RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
364
RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
365
RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
366
-GEN_VEXT_VX(vdivu_vx_b)
367
-GEN_VEXT_VX(vdivu_vx_h)
368
-GEN_VEXT_VX(vdivu_vx_w)
369
-GEN_VEXT_VX(vdivu_vx_d)
370
-GEN_VEXT_VX(vdiv_vx_b)
371
-GEN_VEXT_VX(vdiv_vx_h)
372
-GEN_VEXT_VX(vdiv_vx_w)
373
-GEN_VEXT_VX(vdiv_vx_d)
374
-GEN_VEXT_VX(vremu_vx_b)
375
-GEN_VEXT_VX(vremu_vx_h)
376
-GEN_VEXT_VX(vremu_vx_w)
377
-GEN_VEXT_VX(vremu_vx_d)
378
-GEN_VEXT_VX(vrem_vx_b)
379
-GEN_VEXT_VX(vrem_vx_h)
380
-GEN_VEXT_VX(vrem_vx_w)
381
-GEN_VEXT_VX(vrem_vx_d)
382
+GEN_VEXT_VX(vdivu_vx_b, 1)
383
+GEN_VEXT_VX(vdivu_vx_h, 2)
384
+GEN_VEXT_VX(vdivu_vx_w, 4)
385
+GEN_VEXT_VX(vdivu_vx_d, 8)
386
+GEN_VEXT_VX(vdiv_vx_b, 1)
387
+GEN_VEXT_VX(vdiv_vx_h, 2)
388
+GEN_VEXT_VX(vdiv_vx_w, 4)
389
+GEN_VEXT_VX(vdiv_vx_d, 8)
390
+GEN_VEXT_VX(vremu_vx_b, 1)
391
+GEN_VEXT_VX(vremu_vx_h, 2)
392
+GEN_VEXT_VX(vremu_vx_w, 4)
393
+GEN_VEXT_VX(vremu_vx_d, 8)
394
+GEN_VEXT_VX(vrem_vx_b, 1)
395
+GEN_VEXT_VX(vrem_vx_h, 2)
396
+GEN_VEXT_VX(vrem_vx_w, 4)
397
+GEN_VEXT_VX(vrem_vx_d, 8)
398
399
/* Vector Widening Integer Multiply Instructions */
400
RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
401
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
402
RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
403
RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
404
RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
405
-GEN_VEXT_VX(vwmul_vx_b)
406
-GEN_VEXT_VX(vwmul_vx_h)
407
-GEN_VEXT_VX(vwmul_vx_w)
408
-GEN_VEXT_VX(vwmulu_vx_b)
409
-GEN_VEXT_VX(vwmulu_vx_h)
410
-GEN_VEXT_VX(vwmulu_vx_w)
411
-GEN_VEXT_VX(vwmulsu_vx_b)
412
-GEN_VEXT_VX(vwmulsu_vx_h)
413
-GEN_VEXT_VX(vwmulsu_vx_w)
414
+GEN_VEXT_VX(vwmul_vx_b, 2)
415
+GEN_VEXT_VX(vwmul_vx_h, 4)
416
+GEN_VEXT_VX(vwmul_vx_w, 8)
417
+GEN_VEXT_VX(vwmulu_vx_b, 2)
418
+GEN_VEXT_VX(vwmulu_vx_h, 4)
419
+GEN_VEXT_VX(vwmulu_vx_w, 8)
420
+GEN_VEXT_VX(vwmulsu_vx_b, 2)
421
+GEN_VEXT_VX(vwmulsu_vx_h, 4)
422
+GEN_VEXT_VX(vwmulsu_vx_w, 8)
423
424
/* Vector Single-Width Integer Multiply-Add Instructions */
425
#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
426
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
427
RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
428
RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
429
RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
430
-GEN_VEXT_VX(vmacc_vx_b)
431
-GEN_VEXT_VX(vmacc_vx_h)
432
-GEN_VEXT_VX(vmacc_vx_w)
433
-GEN_VEXT_VX(vmacc_vx_d)
434
-GEN_VEXT_VX(vnmsac_vx_b)
435
-GEN_VEXT_VX(vnmsac_vx_h)
436
-GEN_VEXT_VX(vnmsac_vx_w)
437
-GEN_VEXT_VX(vnmsac_vx_d)
438
-GEN_VEXT_VX(vmadd_vx_b)
439
-GEN_VEXT_VX(vmadd_vx_h)
440
-GEN_VEXT_VX(vmadd_vx_w)
441
-GEN_VEXT_VX(vmadd_vx_d)
442
-GEN_VEXT_VX(vnmsub_vx_b)
443
-GEN_VEXT_VX(vnmsub_vx_h)
444
-GEN_VEXT_VX(vnmsub_vx_w)
445
-GEN_VEXT_VX(vnmsub_vx_d)
446
+GEN_VEXT_VX(vmacc_vx_b, 1)
447
+GEN_VEXT_VX(vmacc_vx_h, 2)
448
+GEN_VEXT_VX(vmacc_vx_w, 4)
449
+GEN_VEXT_VX(vmacc_vx_d, 8)
450
+GEN_VEXT_VX(vnmsac_vx_b, 1)
451
+GEN_VEXT_VX(vnmsac_vx_h, 2)
452
+GEN_VEXT_VX(vnmsac_vx_w, 4)
453
+GEN_VEXT_VX(vnmsac_vx_d, 8)
454
+GEN_VEXT_VX(vmadd_vx_b, 1)
455
+GEN_VEXT_VX(vmadd_vx_h, 2)
456
+GEN_VEXT_VX(vmadd_vx_w, 4)
457
+GEN_VEXT_VX(vmadd_vx_d, 8)
458
+GEN_VEXT_VX(vnmsub_vx_b, 1)
459
+GEN_VEXT_VX(vnmsub_vx_h, 2)
460
+GEN_VEXT_VX(vnmsub_vx_w, 4)
461
+GEN_VEXT_VX(vnmsub_vx_d, 8)
462
463
/* Vector Widening Integer Multiply-Add Instructions */
464
RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
465
@@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
466
RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
467
RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
468
RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
469
-GEN_VEXT_VX(vwmaccu_vx_b)
470
-GEN_VEXT_VX(vwmaccu_vx_h)
471
-GEN_VEXT_VX(vwmaccu_vx_w)
472
-GEN_VEXT_VX(vwmacc_vx_b)
473
-GEN_VEXT_VX(vwmacc_vx_h)
474
-GEN_VEXT_VX(vwmacc_vx_w)
475
-GEN_VEXT_VX(vwmaccsu_vx_b)
476
-GEN_VEXT_VX(vwmaccsu_vx_h)
477
-GEN_VEXT_VX(vwmaccsu_vx_w)
478
-GEN_VEXT_VX(vwmaccus_vx_b)
479
-GEN_VEXT_VX(vwmaccus_vx_h)
480
-GEN_VEXT_VX(vwmaccus_vx_w)
481
+GEN_VEXT_VX(vwmaccu_vx_b, 2)
482
+GEN_VEXT_VX(vwmaccu_vx_h, 4)
483
+GEN_VEXT_VX(vwmaccu_vx_w, 8)
484
+GEN_VEXT_VX(vwmacc_vx_b, 2)
485
+GEN_VEXT_VX(vwmacc_vx_h, 4)
486
+GEN_VEXT_VX(vwmacc_vx_w, 8)
487
+GEN_VEXT_VX(vwmaccsu_vx_b, 2)
488
+GEN_VEXT_VX(vwmaccsu_vx_h, 4)
489
+GEN_VEXT_VX(vwmaccsu_vx_w, 8)
490
+GEN_VEXT_VX(vwmaccus_vx_b, 2)
491
+GEN_VEXT_VX(vwmaccus_vx_h, 4)
492
+GEN_VEXT_VX(vwmaccus_vx_w, 8)
493
494
/* Vector Integer Merge and Move Instructions */
495
#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
496
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
497
index XXXXXXX..XXXXXXX 100644
125
index XXXXXXX..XXXXXXX 100644
498
--- a/target/riscv/insn_trans/trans_rvv.c.inc
126
--- a/hw/intc/riscv_imsic.c
499
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
127
+++ b/hw/intc/riscv_imsic.c
500
@@ -XXX,XX +XXX,XX @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
128
@@ -XXX,XX +XXX,XX @@
501
129
#include "target/riscv/cpu.h"
502
data = FIELD_DP32(data, VDATA, VM, vm);
130
#include "target/riscv/cpu_bits.h"
503
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
131
#include "sysemu/sysemu.h"
504
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
132
+#include "sysemu/kvm.h"
505
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
133
#include "migration/vmstate.h"
506
desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8,
134
507
s->cfg_ptr->vlen / 8, data));
135
#define IMSIC_MMIO_PAGE_LE 0x00
508
136
@@ -XXX,XX +XXX,XX @@ static void riscv_imsic_write(void *opaque, hwaddr addr, uint64_t value,
509
@@ -XXX,XX +XXX,XX @@ do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn,
137
goto err;
510
return false;
511
}
138
}
512
139
513
- if (a->vm && s->vl_eq_vlmax) {
140
+#if defined(CONFIG_KVM)
514
+ if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
141
+ if (kvm_irqchip_in_kernel()) {
515
TCGv_i64 src1 = tcg_temp_new_i64();
142
+ struct kvm_msi msi;
516
143
+
517
tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN));
144
+ msi.address_lo = extract64(imsic->mmio.addr + addr, 0, 32);
518
@@ -XXX,XX +XXX,XX @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
145
+ msi.address_hi = extract64(imsic->mmio.addr + addr, 32, 32);
519
146
+ msi.data = le32_to_cpu(value);
520
data = FIELD_DP32(data, VDATA, VM, vm);
147
+
521
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
148
+ kvm_vm_ioctl(kvm_state, KVM_SIGNAL_MSI, &msi);
522
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
149
+
523
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
150
+ return;
524
desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8,
151
+ }
525
s->cfg_ptr->vlen / 8, data));
152
+#endif
526
153
+
527
@@ -XXX,XX +XXX,XX @@ do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn,
154
/* Writes only supported for MSI little-endian registers */
528
return false;
155
page = addr >> IMSIC_MMIO_PAGE_SHIFT;
529
}
156
if ((addr & (IMSIC_MMIO_PAGE_SZ - 1)) == IMSIC_MMIO_PAGE_LE) {
530
157
@@ -XXX,XX +XXX,XX @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp)
531
- if (a->vm && s->vl_eq_vlmax) {
158
CPUState *cpu = cpu_by_arch_id(imsic->hartid);
532
+ if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
159
CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
533
gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
160
534
extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s));
161
- imsic->num_eistate = imsic->num_pages * imsic->num_irqs;
535
mark_vs_dirty(s);
162
- imsic->eidelivery = g_new0(uint32_t, imsic->num_pages);
536
@@ -XXX,XX +XXX,XX @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
163
- imsic->eithreshold = g_new0(uint32_t, imsic->num_pages);
537
164
- imsic->eistate = g_new0(uint32_t, imsic->num_eistate);
538
data = FIELD_DP32(data, VDATA, VM, a->vm);
165
+ if (!kvm_irqchip_in_kernel()) {
539
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
166
+ imsic->num_eistate = imsic->num_pages * imsic->num_irqs;
540
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
167
+ imsic->eidelivery = g_new0(uint32_t, imsic->num_pages);
541
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
168
+ imsic->eithreshold = g_new0(uint32_t, imsic->num_pages);
542
vreg_ofs(s, a->rs1),
169
+ imsic->eistate = g_new0(uint32_t, imsic->num_eistate);
543
vreg_ofs(s, a->rs2),
170
+ }
544
@@ -XXX,XX +XXX,XX @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
171
545
172
memory_region_init_io(&imsic->mmio, OBJECT(dev), &riscv_imsic_ops,
546
data = FIELD_DP32(data, VDATA, VM, a->vm);
173
imsic, TYPE_RISCV_IMSIC,
547
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
548
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
549
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
550
vreg_ofs(s, a->rs1),
551
vreg_ofs(s, a->rs2),
552
@@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
553
\
554
data = FIELD_DP32(data, VDATA, VM, a->vm); \
555
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
556
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
557
+ data = \
558
+ FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
559
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
560
vreg_ofs(s, a->rs1), \
561
vreg_ofs(s, a->rs2), cpu_env, \
562
--
174
--
563
2.36.1
175
2.41.0
diff view generated by jsdifflib
1
From: Atish Patra <atishp@rivosinc.com>
1
From: Yong-Xuan Wang <yongxuan.wang@sifive.com>
2
2
3
fw_cfg DT node is generated after the create_fdt without any check
3
Select KVM AIA when the host kernel has in-kernel AIA chip support.
4
if the DT is being loaded from the commandline. This results in
4
Since KVM AIA only has one APLIC instance, we map the QEMU APLIC
5
FDT_ERR_EXISTS error if dtb is loaded from the commandline.
5
devices to KVM APLIC.
6
6
7
Generate fw_cfg node only if the DT is not loaded from the commandline.
7
Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
8
8
Reviewed-by: Jim Shu <jim.shu@sifive.com>
9
Signed-off-by: Atish Patra <atishp@rivosinc.com>
9
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
10
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
10
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
11
Message-Id: <20220526203500.847165-1-atishp@rivosinc.com>
11
Message-ID: <20230727102439.22554-6-yongxuan.wang@sifive.com>
12
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
---
13
---
14
hw/riscv/virt.c | 28 ++++++++++++++++++----------
14
hw/riscv/virt.c | 94 +++++++++++++++++++++++++++++++++----------------
15
1 file changed, 18 insertions(+), 10 deletions(-)
15
1 file changed, 63 insertions(+), 31 deletions(-)
16
16
17
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
17
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/riscv/virt.c
19
--- a/hw/riscv/virt.c
20
+++ b/hw/riscv/virt.c
20
+++ b/hw/riscv/virt.c
21
@@ -XXX,XX +XXX,XX @@ static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap)
21
@@ -XXX,XX +XXX,XX @@
22
g_free(name);
22
#include "hw/riscv/virt.h"
23
}
23
#include "hw/riscv/boot.h"
24
24
#include "hw/riscv/numa.h"
25
+static void create_fdt_fw_cfg(RISCVVirtState *s, const MemMapEntry *memmap)
25
+#include "kvm_riscv.h"
26
#include "hw/intc/riscv_aclint.h"
27
#include "hw/intc/riscv_aplic.h"
28
#include "hw/intc/riscv_imsic.h"
29
@@ -XXX,XX +XXX,XX @@
30
#error "Can't accommodate all IMSIC groups in address space"
31
#endif
32
33
+/* KVM AIA only supports APLIC MSI. APLIC Wired is always emulated by QEMU. */
34
+static bool virt_use_kvm_aia(RISCVVirtState *s)
26
+{
35
+{
27
+ char *nodename;
36
+ return kvm_irqchip_in_kernel() && s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC;
28
+ MachineState *mc = MACHINE(s);
29
+ hwaddr base = memmap[VIRT_FW_CFG].base;
30
+ hwaddr size = memmap[VIRT_FW_CFG].size;
31
+
32
+ nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
33
+ qemu_fdt_add_subnode(mc->fdt, nodename);
34
+ qemu_fdt_setprop_string(mc->fdt, nodename,
35
+ "compatible", "qemu,fw-cfg-mmio");
36
+ qemu_fdt_setprop_sized_cells(mc->fdt, nodename, "reg",
37
+ 2, base, 2, size);
38
+ qemu_fdt_setprop(mc->fdt, nodename, "dma-coherent", NULL, 0);
39
+ g_free(nodename);
40
+}
37
+}
41
+
38
+
42
static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
39
static const MemMapEntry virt_memmap[] = {
43
uint64_t mem_size, const char *cmdline, bool is_32_bit)
40
[VIRT_DEBUG] = { 0x0, 0x100 },
41
[VIRT_MROM] = { 0x1000, 0xf000 },
42
@@ -XXX,XX +XXX,XX @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
43
uint32_t *intc_phandles,
44
uint32_t aplic_phandle,
45
uint32_t aplic_child_phandle,
46
- bool m_mode)
47
+ bool m_mode, int num_harts)
44
{
48
{
45
@@ -XXX,XX +XXX,XX @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
49
int cpu;
46
create_fdt_rtc(s, memmap, irq_mmio_phandle);
50
char *aplic_name;
47
51
uint32_t *aplic_cells;
48
create_fdt_flash(s, memmap);
52
MachineState *ms = MACHINE(s);
49
+ create_fdt_fw_cfg(s, memmap);
53
50
54
- aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
51
update_bootargs:
55
+ aplic_cells = g_new0(uint32_t, num_harts * 2);
52
if (cmdline && *cmdline) {
56
53
@@ -XXX,XX +XXX,XX @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
57
- for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
54
static FWCfgState *create_fw_cfg(const MachineState *mc)
58
+ for (cpu = 0; cpu < num_harts; cpu++) {
59
aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
60
aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
61
}
62
@@ -XXX,XX +XXX,XX @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
63
64
if (s->aia_type == VIRT_AIA_TYPE_APLIC) {
65
qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended",
66
- aplic_cells,
67
- s->soc[socket].num_harts * sizeof(uint32_t) * 2);
68
+ aplic_cells, num_harts * sizeof(uint32_t) * 2);
69
} else {
70
qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle);
71
}
72
@@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
73
uint32_t msi_s_phandle,
74
uint32_t *phandle,
75
uint32_t *intc_phandles,
76
- uint32_t *aplic_phandles)
77
+ uint32_t *aplic_phandles,
78
+ int num_harts)
55
{
79
{
56
hwaddr base = virt_memmap[VIRT_FW_CFG].base;
80
char *aplic_name;
57
- hwaddr size = virt_memmap[VIRT_FW_CFG].size;
81
unsigned long aplic_addr;
58
FWCfgState *fw_cfg;
82
@@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
59
- char *nodename;
83
create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size,
60
84
msi_m_phandle, intc_phandles,
61
fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16,
85
aplic_m_phandle, aplic_s_phandle,
62
&address_space_memory);
86
- true);
63
fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)mc->smp.cpus);
87
+ true, num_harts);
64
88
}
65
- nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
89
66
- qemu_fdt_add_subnode(mc->fdt, nodename);
90
/* S-level APLIC node */
67
- qemu_fdt_setprop_string(mc->fdt, nodename,
91
@@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
68
- "compatible", "qemu,fw-cfg-mmio");
92
create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size,
69
- qemu_fdt_setprop_sized_cells(mc->fdt, nodename, "reg",
93
msi_s_phandle, intc_phandles,
70
- 2, base, 2, size);
94
aplic_s_phandle, 0,
71
- qemu_fdt_setprop(mc->fdt, nodename, "dma-coherent", NULL, 0);
95
- false);
72
- g_free(nodename);
96
+ false, num_harts);
73
return fw_cfg;
97
74
}
98
aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
75
99
100
@@ -XXX,XX +XXX,XX @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
101
*msi_pcie_phandle = msi_s_phandle;
102
}
103
104
- phandle_pos = ms->smp.cpus;
105
- for (socket = (socket_count - 1); socket >= 0; socket--) {
106
- phandle_pos -= s->soc[socket].num_harts;
107
-
108
- if (s->aia_type == VIRT_AIA_TYPE_NONE) {
109
- create_fdt_socket_plic(s, memmap, socket, phandle,
110
- &intc_phandles[phandle_pos], xplic_phandles);
111
- } else {
112
- create_fdt_socket_aplic(s, memmap, socket,
113
- msi_m_phandle, msi_s_phandle, phandle,
114
- &intc_phandles[phandle_pos], xplic_phandles);
115
+ /* KVM AIA only has one APLIC instance */
116
+ if (virt_use_kvm_aia(s)) {
117
+ create_fdt_socket_aplic(s, memmap, 0,
118
+ msi_m_phandle, msi_s_phandle, phandle,
119
+ &intc_phandles[0], xplic_phandles,
120
+ ms->smp.cpus);
121
+ } else {
122
+ phandle_pos = ms->smp.cpus;
123
+ for (socket = (socket_count - 1); socket >= 0; socket--) {
124
+ phandle_pos -= s->soc[socket].num_harts;
125
+
126
+ if (s->aia_type == VIRT_AIA_TYPE_NONE) {
127
+ create_fdt_socket_plic(s, memmap, socket, phandle,
128
+ &intc_phandles[phandle_pos],
129
+ xplic_phandles);
130
+ } else {
131
+ create_fdt_socket_aplic(s, memmap, socket,
132
+ msi_m_phandle, msi_s_phandle, phandle,
133
+ &intc_phandles[phandle_pos],
134
+ xplic_phandles,
135
+ s->soc[socket].num_harts);
136
+ }
137
}
138
}
139
140
g_free(intc_phandles);
141
142
- for (socket = 0; socket < socket_count; socket++) {
143
- if (socket == 0) {
144
- *irq_mmio_phandle = xplic_phandles[socket];
145
- *irq_virtio_phandle = xplic_phandles[socket];
146
- *irq_pcie_phandle = xplic_phandles[socket];
147
- }
148
- if (socket == 1) {
149
- *irq_virtio_phandle = xplic_phandles[socket];
150
- *irq_pcie_phandle = xplic_phandles[socket];
151
- }
152
- if (socket == 2) {
153
- *irq_pcie_phandle = xplic_phandles[socket];
154
+ if (virt_use_kvm_aia(s)) {
155
+ *irq_mmio_phandle = xplic_phandles[0];
156
+ *irq_virtio_phandle = xplic_phandles[0];
157
+ *irq_pcie_phandle = xplic_phandles[0];
158
+ } else {
159
+ for (socket = 0; socket < socket_count; socket++) {
160
+ if (socket == 0) {
161
+ *irq_mmio_phandle = xplic_phandles[socket];
162
+ *irq_virtio_phandle = xplic_phandles[socket];
163
+ *irq_pcie_phandle = xplic_phandles[socket];
164
+ }
165
+ if (socket == 1) {
166
+ *irq_virtio_phandle = xplic_phandles[socket];
167
+ *irq_pcie_phandle = xplic_phandles[socket];
168
+ }
169
+ if (socket == 2) {
170
+ *irq_pcie_phandle = xplic_phandles[socket];
171
+ }
172
}
173
}
174
175
@@ -XXX,XX +XXX,XX @@ static void virt_machine_init(MachineState *machine)
176
}
177
}
178
179
+ if (virt_use_kvm_aia(s)) {
180
+ kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT,
181
+ VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS,
182
+ memmap[VIRT_APLIC_S].base,
183
+ memmap[VIRT_IMSIC_S].base,
184
+ s->aia_guests);
185
+ }
186
+
187
if (riscv_is_32bit(&s->soc[0])) {
188
#if HOST_LONG_BITS == 64
189
/* limit RAM size in a 32-bit system */
76
--
190
--
77
2.36.1
191
2.41.0
diff view generated by jsdifflib
New patch
1
From: Conor Dooley <conor.dooley@microchip.com>
1
2
3
On a dtb dumped from the virt machine, dt-validate complains:
4
soc: pmu: {'riscv,event-to-mhpmcounters': [[1, 1, 524281], [2, 2, 524284], [65561, 65561, 524280], [65563, 65563, 524280], [65569, 65569, 524280]], 'compatible': ['riscv,pmu']} should not be valid under {'type': 'object'}
5
from schema $id: http://devicetree.org/schemas/simple-bus.yaml#
6
That's pretty cryptic, but running the dtb back through dtc produces
7
something a lot more reasonable:
8
Warning (simple_bus_reg): /soc/pmu: missing or empty reg/ranges property
9
10
Moving the riscv,pmu node out of the soc bus solves the problem.
11
12
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
13
Acked-by: Alistair Francis <alistair.francis@wdc.com>
14
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
15
Message-ID: <20230727-groom-decline-2c57ce42841c@spud>
16
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
17
---
18
hw/riscv/virt.c | 2 +-
19
1 file changed, 1 insertion(+), 1 deletion(-)
20
21
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/riscv/virt.c
24
+++ b/hw/riscv/virt.c
25
@@ -XXX,XX +XXX,XX @@ static void create_fdt_pmu(RISCVVirtState *s)
26
MachineState *ms = MACHINE(s);
27
RISCVCPU hart = s->soc[0].harts[0];
28
29
- pmu_name = g_strdup_printf("/soc/pmu");
30
+ pmu_name = g_strdup_printf("/pmu");
31
qemu_fdt_add_subnode(ms->fdt, pmu_name);
32
qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible", "riscv,pmu");
33
riscv_pmu_generate_fdt_node(ms->fdt, hart.cfg.pmu_num, pmu_name);
34
--
35
2.41.0
diff view generated by jsdifflib
1
From: Andrew Bresticker <abrestic@rivosinc.com>
1
From: Weiwei Li <liweiwei@iscas.ac.cn>
2
2
3
Whether or not VSEIP is pending isn't reflected in env->mip and must
3
The Svadu specification updated the name of the *envcfg bit from
4
instead be determined from hstatus.vgein and hgeip. As a result a
4
HADE to ADUE.
5
CPU in WFI won't wake on a VSEIP, which violates the WFI behavior as
6
specified in the privileged ISA. Just use riscv_cpu_all_pending()
7
instead, which already accounts for VSEIP.
8
5
9
Signed-off-by: Andrew Bresticker <abrestic@rivosinc.com>
6
Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
10
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
11
Message-Id: <20220531210544.181322-1-abrestic@rivosinc.com>
8
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
9
Message-ID: <20230816141916.66898-1-liweiwei@iscas.ac.cn>
12
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
---
11
---
14
target/riscv/cpu.h | 1 +
12
target/riscv/cpu_bits.h | 8 ++++----
15
target/riscv/cpu.c | 2 +-
13
target/riscv/cpu.c | 4 ++--
16
target/riscv/cpu_helper.c | 2 +-
14
target/riscv/cpu_helper.c | 6 +++---
17
3 files changed, 3 insertions(+), 2 deletions(-)
15
target/riscv/csr.c | 12 ++++++------
16
4 files changed, 15 insertions(+), 15 deletions(-)
18
17
19
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
18
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
20
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
21
--- a/target/riscv/cpu.h
20
--- a/target/riscv/cpu_bits.h
22
+++ b/target/riscv/cpu.h
21
+++ b/target/riscv/cpu_bits.h
23
@@ -XXX,XX +XXX,XX @@ int riscv_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
22
@@ -XXX,XX +XXX,XX @@ typedef enum RISCVException {
24
int riscv_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
23
#define MENVCFG_CBIE (3UL << 4)
25
int riscv_cpu_hviprio_index2irq(int index, int *out_irq, int *out_rdzero);
24
#define MENVCFG_CBCFE BIT(6)
26
uint8_t riscv_cpu_default_priority(int irq);
25
#define MENVCFG_CBZE BIT(7)
27
+uint64_t riscv_cpu_all_pending(CPURISCVState *env);
26
-#define MENVCFG_HADE (1ULL << 61)
28
int riscv_cpu_mirq_pending(CPURISCVState *env);
27
+#define MENVCFG_ADUE (1ULL << 61)
29
int riscv_cpu_sirq_pending(CPURISCVState *env);
28
#define MENVCFG_PBMTE (1ULL << 62)
30
int riscv_cpu_vsirq_pending(CPURISCVState *env);
29
#define MENVCFG_STCE (1ULL << 63)
30
31
/* For RV32 */
32
-#define MENVCFGH_HADE BIT(29)
33
+#define MENVCFGH_ADUE BIT(29)
34
#define MENVCFGH_PBMTE BIT(30)
35
#define MENVCFGH_STCE BIT(31)
36
37
@@ -XXX,XX +XXX,XX @@ typedef enum RISCVException {
38
#define HENVCFG_CBIE MENVCFG_CBIE
39
#define HENVCFG_CBCFE MENVCFG_CBCFE
40
#define HENVCFG_CBZE MENVCFG_CBZE
41
-#define HENVCFG_HADE MENVCFG_HADE
42
+#define HENVCFG_ADUE MENVCFG_ADUE
43
#define HENVCFG_PBMTE MENVCFG_PBMTE
44
#define HENVCFG_STCE MENVCFG_STCE
45
46
/* For RV32 */
47
-#define HENVCFGH_HADE MENVCFGH_HADE
48
+#define HENVCFGH_ADUE MENVCFGH_ADUE
49
#define HENVCFGH_PBMTE MENVCFGH_PBMTE
50
#define HENVCFGH_STCE MENVCFGH_STCE
51
31
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
52
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
32
index XXXXXXX..XXXXXXX 100644
53
index XXXXXXX..XXXXXXX 100644
33
--- a/target/riscv/cpu.c
54
--- a/target/riscv/cpu.c
34
+++ b/target/riscv/cpu.c
55
+++ b/target/riscv/cpu.c
35
@@ -XXX,XX +XXX,XX @@ static bool riscv_cpu_has_work(CPUState *cs)
56
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_reset_hold(Object *obj)
36
* Definition of the WFI instruction requires it to ignore the privilege
57
env->two_stage_lookup = false;
37
* mode and delegation registers, but respect individual enables
58
38
*/
59
env->menvcfg = (cpu->cfg.ext_svpbmt ? MENVCFG_PBMTE : 0) |
39
- return (env->mip & env->mie) != 0;
60
- (cpu->cfg.ext_svadu ? MENVCFG_HADE : 0);
40
+ return riscv_cpu_all_pending(env) != 0;
61
+ (cpu->cfg.ext_svadu ? MENVCFG_ADUE : 0);
41
#else
62
env->henvcfg = (cpu->cfg.ext_svpbmt ? HENVCFG_PBMTE : 0) |
42
return true;
63
- (cpu->cfg.ext_svadu ? HENVCFG_HADE : 0);
43
#endif
64
+ (cpu->cfg.ext_svadu ? HENVCFG_ADUE : 0);
65
66
/* Initialized default priorities of local interrupts. */
67
for (i = 0; i < ARRAY_SIZE(env->miprio); i++) {
44
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
68
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
45
index XXXXXXX..XXXXXXX 100644
69
index XXXXXXX..XXXXXXX 100644
46
--- a/target/riscv/cpu_helper.c
70
--- a/target/riscv/cpu_helper.c
47
+++ b/target/riscv/cpu_helper.c
71
+++ b/target/riscv/cpu_helper.c
48
@@ -XXX,XX +XXX,XX @@ static int riscv_cpu_pending_to_irq(CPURISCVState *env,
72
@@ -XXX,XX +XXX,XX @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
49
return best_irq;
73
}
74
75
bool pbmte = env->menvcfg & MENVCFG_PBMTE;
76
- bool hade = env->menvcfg & MENVCFG_HADE;
77
+ bool adue = env->menvcfg & MENVCFG_ADUE;
78
79
if (first_stage && two_stage && env->virt_enabled) {
80
pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE);
81
- hade = hade && (env->henvcfg & HENVCFG_HADE);
82
+ adue = adue && (env->henvcfg & HENVCFG_ADUE);
83
}
84
85
int ptshift = (levels - 1) * ptidxbits;
86
@@ -XXX,XX +XXX,XX @@ restart:
87
88
/* Page table updates need to be atomic with MTTCG enabled */
89
if (updated_pte != pte && !is_debug) {
90
- if (!hade) {
91
+ if (!adue) {
92
return TRANSLATE_FAIL;
93
}
94
95
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
96
index XXXXXXX..XXXXXXX 100644
97
--- a/target/riscv/csr.c
98
+++ b/target/riscv/csr.c
99
@@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno,
100
if (riscv_cpu_mxl(env) == MXL_RV64) {
101
mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) |
102
(cfg->ext_sstc ? MENVCFG_STCE : 0) |
103
- (cfg->ext_svadu ? MENVCFG_HADE : 0);
104
+ (cfg->ext_svadu ? MENVCFG_ADUE : 0);
105
}
106
env->menvcfg = (env->menvcfg & ~mask) | (val & mask);
107
108
@@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfgh(CPURISCVState *env, int csrno,
109
const RISCVCPUConfig *cfg = riscv_cpu_cfg(env);
110
uint64_t mask = (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) |
111
(cfg->ext_sstc ? MENVCFG_STCE : 0) |
112
- (cfg->ext_svadu ? MENVCFG_HADE : 0);
113
+ (cfg->ext_svadu ? MENVCFG_ADUE : 0);
114
uint64_t valh = (uint64_t)val << 32;
115
116
env->menvcfg = (env->menvcfg & ~mask) | (valh & mask);
117
@@ -XXX,XX +XXX,XX @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno,
118
* henvcfg.stce is read_only 0 when menvcfg.stce = 0
119
* henvcfg.hade is read_only 0 when menvcfg.hade = 0
120
*/
121
- *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) |
122
+ *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) |
123
env->menvcfg);
124
return RISCV_EXCP_NONE;
50
}
125
}
51
126
@@ -XXX,XX +XXX,XX @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno,
52
-static uint64_t riscv_cpu_all_pending(CPURISCVState *env)
127
}
53
+uint64_t riscv_cpu_all_pending(CPURISCVState *env)
128
129
if (riscv_cpu_mxl(env) == MXL_RV64) {
130
- mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE);
131
+ mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE);
132
}
133
134
env->henvcfg = (env->henvcfg & ~mask) | (val & mask);
135
@@ -XXX,XX +XXX,XX @@ static RISCVException read_henvcfgh(CPURISCVState *env, int csrno,
136
return ret;
137
}
138
139
- *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) |
140
+ *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) |
141
env->menvcfg)) >> 32;
142
return RISCV_EXCP_NONE;
143
}
144
@@ -XXX,XX +XXX,XX @@ static RISCVException write_henvcfgh(CPURISCVState *env, int csrno,
145
target_ulong val)
54
{
146
{
55
uint32_t gein = get_field(env->hstatus, HSTATUS_VGEIN);
147
uint64_t mask = env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE |
56
uint64_t vsgein = (env->hgeip & (1ULL << gein)) ? MIP_VSEIP : 0;
148
- HENVCFG_HADE);
149
+ HENVCFG_ADUE);
150
uint64_t valh = (uint64_t)val << 32;
151
RISCVException ret;
152
57
--
153
--
58
2.36.1
154
2.41.0
diff view generated by jsdifflib
1
From: Alistair Francis <alistair.francis@wdc.com>
1
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
2
2
3
There are currently two types of RISC-V CPUs:
3
In the same emulated RISC-V host, the 'host' KVM CPU takes 4 times
4
- Generic CPUs (base or any) that allow complete custimisation
4
longer to boot than the 'rv64' KVM CPU.
5
- "Named" CPUs that match existing hardware
6
5
7
Users can use the base CPUs to custimise the extensions that they want, for
6
The reason is an unintended behavior of riscv_cpu_satp_mode_finalize()
8
example -cpu rv64,v=true.
7
when satp_mode.supported = 0, i.e. when cpu_init() does not set
8
satp_mode_max_supported(). satp_mode_max_from_map(map) does:
9
9
10
We originally exposed these as part of the named CPUs as well, but that was
10
31 - __builtin_clz(map)
11
by accident.
12
11
13
Exposing the CPU properties to named CPUs means that we accidently
12
This means that, if satp_mode.supported = 0, satp_mode_supported_max
14
enable extensions that don't exist on the CPUs by default. For example
13
wil be '31 - 32'. But this is C, so satp_mode_supported_max will gladly
15
the SiFive E CPU currently support the zba extension, which is a bug.
14
set it to UINT_MAX (4294967295). After that, if the user didn't set a
15
satp_mode, set_satp_mode_default_map(cpu) will make
16
16
17
This patch instead only exposes the CPU extensions to the generic CPUs.
17
cfg.satp_mode.map = cfg.satp_mode.supported
18
18
19
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
19
So satp_mode.map = 0. And then satp_mode_map_max will be set to
20
Reviewed-by: Bin Meng <bmeng.cn@gmail.com>
20
satp_mode_max_from_map(cpu->cfg.satp_mode.map), i.e. also UINT_MAX. The
21
Message-Id: <20220608061437.314434-1-alistair.francis@opensource.wdc.com>
21
guard "satp_mode_map_max > satp_mode_supported_max" doesn't protect us
22
here since both are UINT_MAX.
23
24
And finally we have 2 loops:
25
26
for (int i = satp_mode_map_max - 1; i >= 0; --i) {
27
28
Which are, in fact, 2 loops from UINT_MAX -1 to -1. This is where the
29
extra delay when booting the 'host' CPU is coming from.
30
31
Commit 43d1de32f8 already set a precedence for satp_mode.supported = 0
32
in a different manner. We're doing the same here. If supported == 0,
33
interpret as 'the CPU wants the OS to handle satp mode alone' and skip
34
satp_mode_finalize().
35
36
We'll also put a guard in satp_mode_max_from_map() to assert out if map
37
is 0 since the function is not ready to deal with it.
38
39
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
40
Fixes: 6f23aaeb9b ("riscv: Allow user to set the satp mode")
41
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
42
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
43
Message-ID: <20230817152903.694926-1-dbarboza@ventanamicro.com>
22
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
44
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
23
---
45
---
24
target/riscv/cpu.c | 57 +++++++++++++++++++++++++++++++++++++---------
46
target/riscv/cpu.c | 23 ++++++++++++++++++++---
25
1 file changed, 46 insertions(+), 11 deletions(-)
47
1 file changed, 20 insertions(+), 3 deletions(-)
26
48
27
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
49
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
28
index XXXXXXX..XXXXXXX 100644
50
index XXXXXXX..XXXXXXX 100644
29
--- a/target/riscv/cpu.c
51
--- a/target/riscv/cpu.c
30
+++ b/target/riscv/cpu.c
52
+++ b/target/riscv/cpu.c
31
@@ -XXX,XX +XXX,XX @@ static const char * const riscv_intr_names[] = {
53
@@ -XXX,XX +XXX,XX @@ static uint8_t satp_mode_from_str(const char *satp_mode_str)
32
"reserved"
54
33
};
55
uint8_t satp_mode_max_from_map(uint32_t map)
34
56
{
35
+static void register_cpu_props(DeviceState *dev);
57
+ /*
58
+ * 'map = 0' will make us return (31 - 32), which C will
59
+ * happily overflow to UINT_MAX. There's no good result to
60
+ * return if 'map = 0' (e.g. returning 0 will be ambiguous
61
+ * with the result for 'map = 1').
62
+ *
63
+ * Assert out if map = 0. Callers will have to deal with
64
+ * it outside of this function.
65
+ */
66
+ g_assert(map > 0);
36
+
67
+
37
const char *riscv_cpu_get_trap_name(target_ulong cause, bool async)
68
/* map here has at least one bit set, so no problem with clz */
69
return 31 - __builtin_clz(map);
70
}
71
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
72
static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp)
38
{
73
{
39
if (async) {
74
bool rv32 = riscv_cpu_mxl(&cpu->env) == MXL_RV32;
40
@@ -XXX,XX +XXX,XX @@ static void riscv_any_cpu_init(Object *obj)
75
- uint8_t satp_mode_map_max;
41
set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVU);
76
- uint8_t satp_mode_supported_max =
42
#endif
77
- satp_mode_max_from_map(cpu->cfg.satp_mode.supported);
43
set_priv_version(env, PRIV_VERSION_1_12_0);
78
+ uint8_t satp_mode_map_max, satp_mode_supported_max;
44
+ register_cpu_props(DEVICE(obj));
45
}
46
47
#if defined(TARGET_RISCV64)
48
@@ -XXX,XX +XXX,XX @@ static void rv64_base_cpu_init(Object *obj)
49
CPURISCVState *env = &RISCV_CPU(obj)->env;
50
/* We set this in the realise function */
51
set_misa(env, MXL_RV64, 0);
52
+ register_cpu_props(DEVICE(obj));
53
}
54
55
static void rv64_sifive_u_cpu_init(Object *obj)
56
@@ -XXX,XX +XXX,XX @@ static void rv64_sifive_u_cpu_init(Object *obj)
57
static void rv64_sifive_e_cpu_init(Object *obj)
58
{
59
CPURISCVState *env = &RISCV_CPU(obj)->env;
60
+ RISCVCPU *cpu = RISCV_CPU(obj);
61
+
79
+
62
set_misa(env, MXL_RV64, RVI | RVM | RVA | RVC | RVU);
80
+ /* The CPU wants the OS to decide which satp mode to use */
63
set_priv_version(env, PRIV_VERSION_1_10_0);
81
+ if (cpu->cfg.satp_mode.supported == 0) {
64
- qdev_prop_set_bit(DEVICE(obj), "mmu", false);
82
+ return;
65
+ cpu->cfg.mmu = false;
83
+ }
66
}
67
68
static void rv128_base_cpu_init(Object *obj)
69
@@ -XXX,XX +XXX,XX @@ static void rv128_base_cpu_init(Object *obj)
70
CPURISCVState *env = &RISCV_CPU(obj)->env;
71
/* We set this in the realise function */
72
set_misa(env, MXL_RV128, 0);
73
+ register_cpu_props(DEVICE(obj));
74
}
75
#else
76
static void rv32_base_cpu_init(Object *obj)
77
@@ -XXX,XX +XXX,XX @@ static void rv32_base_cpu_init(Object *obj)
78
CPURISCVState *env = &RISCV_CPU(obj)->env;
79
/* We set this in the realise function */
80
set_misa(env, MXL_RV32, 0);
81
+ register_cpu_props(DEVICE(obj));
82
}
83
84
static void rv32_sifive_u_cpu_init(Object *obj)
85
@@ -XXX,XX +XXX,XX @@ static void rv32_sifive_u_cpu_init(Object *obj)
86
static void rv32_sifive_e_cpu_init(Object *obj)
87
{
88
CPURISCVState *env = &RISCV_CPU(obj)->env;
89
+ RISCVCPU *cpu = RISCV_CPU(obj);
90
+
84
+
91
set_misa(env, MXL_RV32, RVI | RVM | RVA | RVC | RVU);
85
+ satp_mode_supported_max =
92
set_priv_version(env, PRIV_VERSION_1_10_0);
86
+ satp_mode_max_from_map(cpu->cfg.satp_mode.supported);
93
- qdev_prop_set_bit(DEVICE(obj), "mmu", false);
87
94
+ cpu->cfg.mmu = false;
88
if (cpu->cfg.satp_mode.map == 0) {
95
}
89
if (cpu->cfg.satp_mode.init == 0) {
96
97
static void rv32_ibex_cpu_init(Object *obj)
98
{
99
CPURISCVState *env = &RISCV_CPU(obj)->env;
100
+ RISCVCPU *cpu = RISCV_CPU(obj);
101
+
102
set_misa(env, MXL_RV32, RVI | RVM | RVC | RVU);
103
set_priv_version(env, PRIV_VERSION_1_10_0);
104
- qdev_prop_set_bit(DEVICE(obj), "mmu", false);
105
- qdev_prop_set_bit(DEVICE(obj), "x-epmp", true);
106
+ cpu->cfg.mmu = false;
107
+ cpu->cfg.epmp = true;
108
}
109
110
static void rv32_imafcu_nommu_cpu_init(Object *obj)
111
{
112
CPURISCVState *env = &RISCV_CPU(obj)->env;
113
+ RISCVCPU *cpu = RISCV_CPU(obj);
114
+
115
set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVC | RVU);
116
set_priv_version(env, PRIV_VERSION_1_10_0);
117
set_resetvec(env, DEFAULT_RSTVEC);
118
- qdev_prop_set_bit(DEVICE(obj), "mmu", false);
119
+ cpu->cfg.mmu = false;
120
}
121
#endif
122
123
@@ -XXX,XX +XXX,XX @@ static void riscv_host_cpu_init(Object *obj)
124
#elif defined(TARGET_RISCV64)
125
set_misa(env, MXL_RV64, 0);
126
#endif
127
+ register_cpu_props(DEVICE(obj));
128
}
129
#endif
130
131
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_init(Object *obj)
132
{
133
RISCVCPU *cpu = RISCV_CPU(obj);
134
135
+ cpu->cfg.ext_counters = true;
136
+ cpu->cfg.ext_ifencei = true;
137
+ cpu->cfg.ext_icsr = true;
138
+ cpu->cfg.mmu = true;
139
+ cpu->cfg.pmp = true;
140
+
141
cpu_set_cpustate_pointers(cpu);
142
143
#ifndef CONFIG_USER_ONLY
144
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_init(Object *obj)
145
#endif /* CONFIG_USER_ONLY */
146
}
147
148
-static Property riscv_cpu_properties[] = {
149
+static Property riscv_cpu_extensions[] = {
150
/* Defaults for standard extensions */
151
DEFINE_PROP_BOOL("i", RISCVCPU, cfg.ext_i, true),
152
DEFINE_PROP_BOOL("e", RISCVCPU, cfg.ext_e, false),
153
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = {
154
DEFINE_PROP_BOOL("Zve64f", RISCVCPU, cfg.ext_zve64f, false),
155
DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true),
156
DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true),
157
- DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true),
158
159
DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec),
160
DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec),
161
DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128),
162
DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64),
163
164
- DEFINE_PROP_UINT32("mvendorid", RISCVCPU, cfg.mvendorid, 0),
165
- DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID),
166
- DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID),
167
-
168
DEFINE_PROP_BOOL("svinval", RISCVCPU, cfg.ext_svinval, false),
169
DEFINE_PROP_BOOL("svnapot", RISCVCPU, cfg.ext_svnapot, false),
170
DEFINE_PROP_BOOL("svpbmt", RISCVCPU, cfg.ext_svpbmt, false),
171
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = {
172
DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
173
DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false),
174
175
+ DEFINE_PROP_END_OF_LIST(),
176
+};
177
+
178
+static void register_cpu_props(DeviceState *dev)
179
+{
180
+ Property *prop;
181
+
182
+ for (prop = riscv_cpu_extensions; prop && prop->name; prop++) {
183
+ qdev_property_add_static(dev, prop);
184
+ }
185
+}
186
+
187
+static Property riscv_cpu_properties[] = {
188
+ DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true),
189
+
190
+ DEFINE_PROP_UINT32("mvendorid", RISCVCPU, cfg.mvendorid, 0),
191
+ DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID),
192
+ DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID),
193
+
194
DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC),
195
196
DEFINE_PROP_BOOL("short-isa-string", RISCVCPU, cfg.short_isa_string, false),
197
--
90
--
198
2.36.1
91
2.41.0
diff view generated by jsdifflib
1
From: Weiwei Li <liweiwei@iscas.ac.cn>
1
From: Vineet Gupta <vineetg@rivosinc.com>
2
2
3
Add support for the zmmul extension v0.1. This extension includes all
3
zicond is now codegen supported in both llvm and gcc.
4
multiplication operations from the M extension but not the divide ops.
5
4
6
Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
5
This change allows seamless enabling/testing of zicond in downstream
7
Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
6
projects. e.g. currently riscv-gnu-toolchain parses elf attributes
8
Reviewed-by: Víctor Colombo <victor.colombo@eldorado.org.br>
7
to create a cmdline for qemu but fails short of enabling it because of
8
the "x-" prefix.
9
10
Signed-off-by: Vineet Gupta <vineetg@rivosinc.com>
11
Message-ID: <20230808181715.436395-1-vineetg@rivosinc.com>
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
12
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
10
Message-Id: <20220531030732.3850-1-liweiwei@iscas.ac.cn>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
13
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
---
14
---
13
target/riscv/cpu.h | 1 +
15
target/riscv/cpu.c | 2 +-
14
target/riscv/cpu.c | 7 +++++++
16
1 file changed, 1 insertion(+), 1 deletion(-)
15
target/riscv/insn_trans/trans_rvm.c.inc | 18 ++++++++++++------
16
3 files changed, 20 insertions(+), 6 deletions(-)
17
17
18
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/target/riscv/cpu.h
21
+++ b/target/riscv/cpu.h
22
@@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig {
23
bool ext_zhinxmin;
24
bool ext_zve32f;
25
bool ext_zve64f;
26
+ bool ext_zmmul;
27
28
uint32_t mvendorid;
29
uint64_t marchid;
30
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
18
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
31
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
32
--- a/target/riscv/cpu.c
20
--- a/target/riscv/cpu.c
33
+++ b/target/riscv/cpu.c
21
+++ b/target/riscv/cpu.c
34
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
22
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
35
cpu->cfg.ext_ifencei = true;
23
DEFINE_PROP_BOOL("zcf", RISCVCPU, cfg.ext_zcf, false),
36
}
24
DEFINE_PROP_BOOL("zcmp", RISCVCPU, cfg.ext_zcmp, false),
37
25
DEFINE_PROP_BOOL("zcmt", RISCVCPU, cfg.ext_zcmt, false),
38
+ if (cpu->cfg.ext_m && cpu->cfg.ext_zmmul) {
26
+ DEFINE_PROP_BOOL("zicond", RISCVCPU, cfg.ext_zicond, false),
39
+ warn_report("Zmmul will override M");
27
40
+ cpu->cfg.ext_m = false;
28
/* Vendor-specific custom extensions */
41
+ }
29
DEFINE_PROP_BOOL("xtheadba", RISCVCPU, cfg.ext_xtheadba, false),
42
+
30
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = {
43
if (cpu->cfg.ext_i && cpu->cfg.ext_e) {
31
DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false),
44
error_setg(errp,
45
"I and E extensions are incompatible");
46
@@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = {
47
32
48
/* These are experimental so mark with 'x-' */
33
/* These are experimental so mark with 'x-' */
49
DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
34
- DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false),
50
+ DEFINE_PROP_BOOL("x-zmmul", RISCVCPU, cfg.ext_zmmul, false),
35
51
/* ePMP 0.9.3 */
36
/* ePMP 0.9.3 */
52
DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
37
DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
53
DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false),
54
@@ -XXX,XX +XXX,XX @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int max_str_len)
55
struct isa_ext_data isa_edata_arr[] = {
56
ISA_EDATA_ENTRY(zicsr, ext_icsr),
57
ISA_EDATA_ENTRY(zifencei, ext_ifencei),
58
+ ISA_EDATA_ENTRY(zmmul, ext_zmmul),
59
ISA_EDATA_ENTRY(zfh, ext_zfh),
60
ISA_EDATA_ENTRY(zfhmin, ext_zfhmin),
61
ISA_EDATA_ENTRY(zfinx, ext_zfinx),
62
diff --git a/target/riscv/insn_trans/trans_rvm.c.inc b/target/riscv/insn_trans/trans_rvm.c.inc
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/riscv/insn_trans/trans_rvm.c.inc
65
+++ b/target/riscv/insn_trans/trans_rvm.c.inc
66
@@ -XXX,XX +XXX,XX @@
67
* this program. If not, see <http://www.gnu.org/licenses/>.
68
*/
69
70
+#define REQUIRE_M_OR_ZMMUL(ctx) do { \
71
+ if (!ctx->cfg_ptr->ext_zmmul && !has_ext(ctx, RVM)) { \
72
+ return false; \
73
+ } \
74
+} while (0)
75
+
76
static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, TCGv bh)
77
{
78
TCGv tmpl = tcg_temp_new();
79
@@ -XXX,XX +XXX,XX @@ static void gen_mul_i128(TCGv rl, TCGv rh,
80
81
static bool trans_mul(DisasContext *ctx, arg_mul *a)
82
{
83
- REQUIRE_EXT(ctx, RVM);
84
+ REQUIRE_M_OR_ZMMUL(ctx);
85
return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128);
86
}
87
88
@@ -XXX,XX +XXX,XX @@ static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2)
89
90
static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
91
{
92
- REQUIRE_EXT(ctx, RVM);
93
+ REQUIRE_M_OR_ZMMUL(ctx);
94
return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w,
95
gen_mulh_i128);
96
}
97
@@ -XXX,XX +XXX,XX @@ static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2)
98
99
static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
100
{
101
- REQUIRE_EXT(ctx, RVM);
102
+ REQUIRE_M_OR_ZMMUL(ctx);
103
return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w,
104
gen_mulhsu_i128);
105
}
106
@@ -XXX,XX +XXX,XX @@ static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
107
108
static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
109
{
110
- REQUIRE_EXT(ctx, RVM);
111
+ REQUIRE_M_OR_ZMMUL(ctx);
112
/* gen_mulh_w works for either sign as input. */
113
return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w,
114
gen_mulhu_i128);
115
@@ -XXX,XX +XXX,XX @@ static bool trans_remu(DisasContext *ctx, arg_remu *a)
116
static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
117
{
118
REQUIRE_64_OR_128BIT(ctx);
119
- REQUIRE_EXT(ctx, RVM);
120
+ REQUIRE_M_OR_ZMMUL(ctx);
121
ctx->ol = MXL_RV32;
122
return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL);
123
}
124
@@ -XXX,XX +XXX,XX @@ static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
125
static bool trans_muld(DisasContext *ctx, arg_muld *a)
126
{
127
REQUIRE_128BIT(ctx);
128
- REQUIRE_EXT(ctx, RVM);
129
+ REQUIRE_M_OR_ZMMUL(ctx);
130
ctx->ol = MXL_RV64;
131
return gen_arith(ctx, a, EXT_SIGN, tcg_gen_mul_tl, NULL);
132
}
133
--
38
--
134
2.36.1
39
2.41.0
diff view generated by jsdifflib
1
From: Alistair Francis <alistair.francis@wdc.com>
1
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
2
2
3
Since commit ad40be27 "target/riscv: Support start kernel directly by
3
A build with --enable-debug and without KVM will fail as follows:
4
KVM" we have been overflowing the addr_config on "M,MS..."
5
configurations, as reported https://gitlab.com/qemu-project/qemu/-/issues/1050.
6
4
7
This commit changes the loop in sifive_plic_create() from iterating over
5
/usr/bin/ld: libqemu-riscv64-softmmu.fa.p/hw_riscv_virt.c.o: in function `virt_machine_init':
8
the number of harts to just iterating over the addr_config. The
6
./qemu/build/../hw/riscv/virt.c:1465: undefined reference to `kvm_riscv_aia_create'
9
addr_config is based on the hart_config, and will contain interrup details
10
for all harts. This way we can't iterate past the end of addr_config.
11
7
12
Fixes: ad40be27084536 ("target/riscv: Support start kernel directly by KVM")
8
This happens because the code block with "if virt_use_kvm_aia(s)" isn't
13
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1050
9
being ignored by the debug build, resulting in an undefined reference to
10
a KVM only function.
11
12
Add a 'kvm_enabled()' conditional together with virt_use_kvm_aia() will
13
make the compiler crop the kvm_riscv_aia_create() call entirely from a
14
non-KVM build. Note that adding the 'kvm_enabled()' conditional inside
15
virt_use_kvm_aia() won't fix the build because this function would need
16
to be inlined multiple times to make the compiler zero out the entire
17
block.
18
19
While we're at it, use kvm_enabled() in all instances where
20
virt_use_kvm_aia() is checked to allow the compiler to elide these other
21
kvm-only instances as well.
22
23
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
24
Fixes: dbdb99948e ("target/riscv: select KVM AIA in riscv virt machine")
25
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
26
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
27
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
28
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
29
Message-ID: <20230830133503.711138-2-dbarboza@ventanamicro.com>
14
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
30
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
15
Reviewed-by: Mingwang Li <limingwang@huawei.com>
16
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
17
Message-Id: <20220601013631.196854-1-alistair.francis@opensource.wdc.com>
18
---
31
---
19
hw/intc/sifive_plic.c | 19 +++++++++----------
32
hw/riscv/virt.c | 6 +++---
20
1 file changed, 9 insertions(+), 10 deletions(-)
33
1 file changed, 3 insertions(+), 3 deletions(-)
21
34
22
diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c
35
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
23
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/intc/sifive_plic.c
37
--- a/hw/riscv/virt.c
25
+++ b/hw/intc/sifive_plic.c
38
+++ b/hw/riscv/virt.c
26
@@ -XXX,XX +XXX,XX @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config,
39
@@ -XXX,XX +XXX,XX @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
27
uint32_t context_stride, uint32_t aperture_size)
40
}
28
{
41
29
DeviceState *dev = qdev_new(TYPE_SIFIVE_PLIC);
42
/* KVM AIA only has one APLIC instance */
30
- int i, j = 0;
43
- if (virt_use_kvm_aia(s)) {
31
+ int i;
44
+ if (kvm_enabled() && virt_use_kvm_aia(s)) {
32
SiFivePLICState *plic;
45
create_fdt_socket_aplic(s, memmap, 0,
33
46
msi_m_phandle, msi_s_phandle, phandle,
34
assert(enable_stride == (enable_stride & -enable_stride));
47
&intc_phandles[0], xplic_phandles,
35
@@ -XXX,XX +XXX,XX @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config,
48
@@ -XXX,XX +XXX,XX @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
36
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
49
37
50
g_free(intc_phandles);
38
plic = SIFIVE_PLIC(dev);
51
39
- for (i = 0; i < num_harts; i++) {
52
- if (virt_use_kvm_aia(s)) {
40
- CPUState *cpu = qemu_get_cpu(hartid_base + i);
53
+ if (kvm_enabled() && virt_use_kvm_aia(s)) {
41
54
*irq_mmio_phandle = xplic_phandles[0];
42
- if (plic->addr_config[j].mode == PLICMode_M) {
55
*irq_virtio_phandle = xplic_phandles[0];
43
- j++;
56
*irq_pcie_phandle = xplic_phandles[0];
44
- qdev_connect_gpio_out(dev, num_harts + i,
57
@@ -XXX,XX +XXX,XX @@ static void virt_machine_init(MachineState *machine)
45
+ for (i = 0; i < plic->num_addrs; i++) {
46
+ int cpu_num = plic->addr_config[i].hartid;
47
+ CPUState *cpu = qemu_get_cpu(hartid_base + cpu_num);
48
+
49
+ if (plic->addr_config[i].mode == PLICMode_M) {
50
+ qdev_connect_gpio_out(dev, num_harts + cpu_num,
51
qdev_get_gpio_in(DEVICE(cpu), IRQ_M_EXT));
52
}
53
-
54
- if (plic->addr_config[j].mode == PLICMode_S) {
55
- j++;
56
- qdev_connect_gpio_out(dev, i,
57
+ if (plic->addr_config[i].mode == PLICMode_S) {
58
+ qdev_connect_gpio_out(dev, cpu_num,
59
qdev_get_gpio_in(DEVICE(cpu), IRQ_S_EXT));
60
}
58
}
61
}
59
}
60
61
- if (virt_use_kvm_aia(s)) {
62
+ if (kvm_enabled() && virt_use_kvm_aia(s)) {
63
kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT,
64
VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS,
65
memmap[VIRT_APLIC_S].base,
62
--
66
--
63
2.36.1
67
2.41.0
68
69
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
2
2
3
Compares write mask registers, and so always operate under a tail-
3
Commit 6df0b37e2ab breaks a --enable-debug build in a non-KVM
4
agnostic policy.
4
environment with the following error:
5
5
6
Signed-off-by: eop Chen <eop.chen@sifive.com>
6
/usr/bin/ld: libqemu-riscv64-softmmu.fa.p/hw_intc_riscv_aplic.c.o: in function `riscv_kvm_aplic_request':
7
Reviewed-by: Frank Chang <frank.chang@sifive.com>
7
./qemu/build/../hw/intc/riscv_aplic.c:486: undefined reference to `kvm_set_irq'
8
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
8
collect2: error: ld returned 1 exit status
9
Acked-by: Alistair Francis <alistair.francis@wdc.com>
9
10
Message-Id: <165449614532.19704.7000832880482980398-9@git.sr.ht>
10
This happens because the debug build will poke into the
11
'if (is_kvm_aia(aplic->msimode))' block and fail to find a reference to
12
the KVM only function riscv_kvm_aplic_request().
13
14
There are multiple solutions to fix this. We'll go with the same
15
solution from the previous patch, i.e. add a kvm_enabled() conditional
16
to filter out the block. But there's a catch: riscv_kvm_aplic_request()
17
is a local function that would end up being used if the compiler crops
18
the block, and this won't work. Quoting Richard Henderson's explanation
19
in [1]:
20
21
"(...) the compiler won't eliminate entire unused functions with -O0"
22
23
We'll solve it by moving riscv_kvm_aplic_request() to kvm.c and add its
24
declaration in kvm_riscv.h, where all other KVM specific public
25
functions are already declared. Other archs handles KVM specific code in
26
this manner and we expect to do the same from now on.
27
28
[1] https://lore.kernel.org/qemu-riscv/d2f1ad02-eb03-138f-9d08-db676deeed05@linaro.org/
29
30
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
31
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
32
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
33
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
34
Message-ID: <20230830133503.711138-3-dbarboza@ventanamicro.com>
11
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
35
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
12
---
36
---
13
target/riscv/vector_helper.c | 18 ++++++++++++++++++
37
target/riscv/kvm_riscv.h | 1 +
14
1 file changed, 18 insertions(+)
38
hw/intc/riscv_aplic.c | 8 ++------
39
target/riscv/kvm.c | 5 +++++
40
3 files changed, 8 insertions(+), 6 deletions(-)
15
41
16
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
42
diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
17
index XXXXXXX..XXXXXXX 100644
43
index XXXXXXX..XXXXXXX 100644
18
--- a/target/riscv/vector_helper.c
44
--- a/target/riscv/kvm_riscv.h
19
+++ b/target/riscv/vector_helper.c
45
+++ b/target/riscv/kvm_riscv.h
20
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
46
@@ -XXX,XX +XXX,XX @@ void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift,
21
{ \
47
uint64_t aia_irq_num, uint64_t aia_msi_num,
22
uint32_t vm = vext_vm(desc); \
48
uint64_t aplic_base, uint64_t imsic_base,
23
uint32_t vl = env->vl; \
49
uint64_t guest_num);
24
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
50
+void riscv_kvm_aplic_request(void *opaque, int irq, int level);
25
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
51
26
uint32_t i; \
52
#endif
27
\
53
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
28
for (i = env->vstart; i < vl; i++) { \
54
index XXXXXXX..XXXXXXX 100644
29
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
55
--- a/hw/intc/riscv_aplic.c
30
vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
56
+++ b/hw/intc/riscv_aplic.c
31
} \
57
@@ -XXX,XX +XXX,XX @@
32
env->vstart = 0; \
58
#include "target/riscv/cpu.h"
33
+ /* mask destination register are always tail-agnostic */ \
59
#include "sysemu/sysemu.h"
34
+ /* set tail elements to 1s */ \
60
#include "sysemu/kvm.h"
35
+ if (vta_all_1s) { \
61
+#include "kvm_riscv.h"
36
+ for (; i < total_elems; i++) { \
62
#include "migration/vmstate.h"
37
+ vext_set_elem_mask(vd, i, 1); \
63
38
+ } \
64
#define APLIC_MAX_IDC (1UL << 14)
39
+ } \
65
@@ -XXX,XX +XXX,XX @@ static uint32_t riscv_aplic_idc_claimi(RISCVAPLICState *aplic, uint32_t idc)
66
return topi;
40
}
67
}
41
68
42
GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
69
-static void riscv_kvm_aplic_request(void *opaque, int irq, int level)
43
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
70
-{
44
{ \
71
- kvm_set_irq(kvm_state, irq, !!level);
45
uint32_t vm = vext_vm(desc); \
72
-}
46
uint32_t vl = env->vl; \
73
-
47
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
74
static void riscv_aplic_request(void *opaque, int irq, int level)
48
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
75
{
49
uint32_t i; \
76
bool update = false;
50
\
77
@@ -XXX,XX +XXX,XX @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp)
51
for (i = env->vstart; i < vl; i++) { \
78
* have IRQ lines delegated by their parent APLIC.
52
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
79
*/
53
DO_OP(s2, (ETYPE)(target_long)s1)); \
80
if (!aplic->parent) {
54
} \
81
- if (is_kvm_aia(aplic->msimode)) {
55
env->vstart = 0; \
82
+ if (kvm_enabled() && is_kvm_aia(aplic->msimode)) {
56
+ /* mask destination register are always tail-agnostic */ \
83
qdev_init_gpio_in(dev, riscv_kvm_aplic_request, aplic->num_irqs);
57
+ /* set tail elements to 1s */ \
84
} else {
58
+ if (vta_all_1s) { \
85
qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs);
59
+ for (; i < total_elems; i++) { \
86
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
60
+ vext_set_elem_mask(vd, i, 1); \
87
index XXXXXXX..XXXXXXX 100644
61
+ } \
88
--- a/target/riscv/kvm.c
62
+ } \
89
+++ b/target/riscv/kvm.c
63
}
90
@@ -XXX,XX +XXX,XX @@
64
91
#include "sysemu/runstate.h"
65
GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
92
#include "hw/riscv/numa.h"
93
94
+void riscv_kvm_aplic_request(void *opaque, int irq, int level)
95
+{
96
+ kvm_set_irq(kvm_state, irq, !!level);
97
+}
98
+
99
static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
100
uint64_t idx)
101
{
66
--
102
--
67
2.36.1
103
2.41.0
104
105
diff view generated by jsdifflib
New patch
1
From: Robbin Ehn <rehn@rivosinc.com>
1
2
3
This patch adds the new extensions in
4
linux 6.5 to the hwprobe syscall.
5
6
And fixes RVC check to OR with correct value.
7
The previous variable contains 0 therefore it
8
did work.
9
10
Signed-off-by: Robbin Ehn <rehn@rivosinc.com>
11
Acked-by: Richard Henderson <richard.henderson@linaro.org>
12
Acked-by: Alistair Francis <alistair.francis@wdc.com>
13
Message-ID: <bc82203b72d7efb30f1b4a8f9eb3d94699799dc8.camel@rivosinc.com>
14
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
15
---
16
linux-user/syscall.c | 14 +++++++++++++-
17
1 file changed, 13 insertions(+), 1 deletion(-)
18
19
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/linux-user/syscall.c
22
+++ b/linux-user/syscall.c
23
@@ -XXX,XX +XXX,XX @@ static int do_getdents64(abi_long dirfd, abi_long arg2, abi_long count)
24
#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
25
#define RISCV_HWPROBE_IMA_FD (1 << 0)
26
#define RISCV_HWPROBE_IMA_C (1 << 1)
27
+#define RISCV_HWPROBE_IMA_V (1 << 2)
28
+#define RISCV_HWPROBE_EXT_ZBA (1 << 3)
29
+#define RISCV_HWPROBE_EXT_ZBB (1 << 4)
30
+#define RISCV_HWPROBE_EXT_ZBS (1 << 5)
31
32
#define RISCV_HWPROBE_KEY_CPUPERF_0 5
33
#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
34
@@ -XXX,XX +XXX,XX @@ static void risc_hwprobe_fill_pairs(CPURISCVState *env,
35
riscv_has_ext(env, RVD) ?
36
RISCV_HWPROBE_IMA_FD : 0;
37
value |= riscv_has_ext(env, RVC) ?
38
- RISCV_HWPROBE_IMA_C : pair->value;
39
+ RISCV_HWPROBE_IMA_C : 0;
40
+ value |= riscv_has_ext(env, RVV) ?
41
+ RISCV_HWPROBE_IMA_V : 0;
42
+ value |= cfg->ext_zba ?
43
+ RISCV_HWPROBE_EXT_ZBA : 0;
44
+ value |= cfg->ext_zbb ?
45
+ RISCV_HWPROBE_EXT_ZBB : 0;
46
+ value |= cfg->ext_zbs ?
47
+ RISCV_HWPROBE_EXT_ZBS : 0;
48
__put_user(value, &pair->value);
49
break;
50
case RISCV_HWPROBE_KEY_CPUPERF_0:
51
--
52
2.41.0
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: Ard Biesheuvel <ardb@kernel.org>
2
2
3
No functional change intended in this commit.
3
Use the accelerated SubBytes/ShiftRows/AddRoundKey AES helper to
4
implement the first half of the key schedule derivation. This does not
5
actually involve shifting rows, so clone the same value into all four
6
columns of the AES vector to counter that operation.
4
7
5
Signed-off-by: eop Chen <eop.chen@sifive.com>
8
Cc: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
9
Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Message-Id: <165449614532.19704.7000832880482980398-2@git.sr.ht>
10
Cc: Palmer Dabbelt <palmer@dabbelt.com>
11
Cc: Alistair Francis <alistair.francis@wdc.com>
12
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
13
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Message-ID: <20230831154118.138727-1-ardb@kernel.org>
8
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
16
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
9
---
17
---
10
target/riscv/vector_helper.c | 35 ++++++++++++++++-------------------
18
target/riscv/crypto_helper.c | 17 +++++------------
11
1 file changed, 16 insertions(+), 19 deletions(-)
19
1 file changed, 5 insertions(+), 12 deletions(-)
12
20
13
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
21
diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c
14
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
15
--- a/target/riscv/vector_helper.c
23
--- a/target/riscv/crypto_helper.c
16
+++ b/target/riscv/vector_helper.c
24
+++ b/target/riscv/crypto_helper.c
17
@@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
25
@@ -XXX,XX +XXX,XX @@ target_ulong HELPER(aes64ks1i)(target_ulong rs1, target_ulong rnum)
18
target_ulong stride, CPURISCVState *env,
26
19
uint32_t desc, uint32_t vm,
27
uint8_t enc_rnum = rnum;
20
vext_ldst_elem_fn *ldst_elem,
28
uint32_t temp = (RS1 >> 32) & 0xFFFFFFFF;
21
- uint32_t esz, uintptr_t ra, MMUAccessType access_type)
29
- uint8_t rcon_ = 0;
22
+ uint32_t esz, uintptr_t ra)
30
- target_ulong result;
23
{
31
+ AESState t, rc = {};
24
uint32_t i, k;
32
25
uint32_t nf = vext_nf(desc);
33
if (enc_rnum != 0xA) {
26
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
34
temp = ror32(temp, 8); /* Rotate right by 8 */
27
{ \
35
- rcon_ = round_consts[enc_rnum];
28
uint32_t vm = vext_vm(desc); \
36
+ rc.w[0] = rc.w[1] = round_consts[enc_rnum];
29
vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
37
}
30
- ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
38
31
+ ctzl(sizeof(ETYPE)), GETPC()); \
39
- temp = ((uint32_t)AES_sbox[(temp >> 24) & 0xFF] << 24) |
40
- ((uint32_t)AES_sbox[(temp >> 16) & 0xFF] << 16) |
41
- ((uint32_t)AES_sbox[(temp >> 8) & 0xFF] << 8) |
42
- ((uint32_t)AES_sbox[(temp >> 0) & 0xFF] << 0);
43
+ t.w[0] = t.w[1] = t.w[2] = t.w[3] = temp;
44
+ aesenc_SB_SR_AK(&t, &t, &rc, false);
45
46
- temp ^= rcon_;
47
-
48
- result = ((uint64_t)temp << 32) | temp;
49
-
50
- return result;
51
+ return t.d[0];
32
}
52
}
33
53
34
GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
54
target_ulong HELPER(aes64im)(target_ulong rs1)
35
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
36
{ \
37
uint32_t vm = vext_vm(desc); \
38
vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
39
- ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
40
+ ctzl(sizeof(ETYPE)), GETPC()); \
41
}
42
43
GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
44
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
45
static void
46
vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
47
vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
48
- uintptr_t ra, MMUAccessType access_type)
49
+ uintptr_t ra)
50
{
51
uint32_t i, k;
52
uint32_t nf = vext_nf(desc);
53
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
54
{ \
55
uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
56
vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
57
- ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
58
+ ctzl(sizeof(ETYPE)), GETPC()); \
59
} \
60
\
61
void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
62
CPURISCVState *env, uint32_t desc) \
63
{ \
64
vext_ldst_us(vd, base, env, desc, LOAD_FN, \
65
- ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
66
+ ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
67
}
68
69
GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
70
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
71
{ \
72
uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
73
vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
74
- ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
75
+ ctzl(sizeof(ETYPE)), GETPC()); \
76
} \
77
\
78
void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
79
CPURISCVState *env, uint32_t desc) \
80
{ \
81
vext_ldst_us(vd, base, env, desc, STORE_FN, \
82
- ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
83
+ ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
84
}
85
86
GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
87
@@ -XXX,XX +XXX,XX @@ void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
88
/* evl = ceil(vl/8) */
89
uint8_t evl = (env->vl + 7) >> 3;
90
vext_ldst_us(vd, base, env, desc, lde_b,
91
- 0, evl, GETPC(), MMU_DATA_LOAD);
92
+ 0, evl, GETPC());
93
}
94
95
void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
96
@@ -XXX,XX +XXX,XX @@ void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
97
/* evl = ceil(vl/8) */
98
uint8_t evl = (env->vl + 7) >> 3;
99
vext_ldst_us(vd, base, env, desc, ste_b,
100
- 0, evl, GETPC(), MMU_DATA_STORE);
101
+ 0, evl, GETPC());
102
}
103
104
/*
105
@@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
106
void *vs2, CPURISCVState *env, uint32_t desc,
107
vext_get_index_addr get_index_addr,
108
vext_ldst_elem_fn *ldst_elem,
109
- uint32_t esz, uintptr_t ra, MMUAccessType access_type)
110
+ uint32_t esz, uintptr_t ra)
111
{
112
uint32_t i, k;
113
uint32_t nf = vext_nf(desc);
114
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
115
void *vs2, CPURISCVState *env, uint32_t desc) \
116
{ \
117
vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
118
- LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
119
+ LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
120
}
121
122
GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
123
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
124
{ \
125
vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
126
STORE_FN, ctzl(sizeof(ETYPE)), \
127
- GETPC(), MMU_DATA_STORE); \
128
+ GETPC()); \
129
}
130
131
GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
132
@@ -XXX,XX +XXX,XX @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
133
*/
134
static void
135
vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
136
- vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
137
- MMUAccessType access_type)
138
+ vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra)
139
{
140
uint32_t i, k, off, pos;
141
uint32_t nf = vext_nf(desc);
142
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, target_ulong base, \
143
CPURISCVState *env, uint32_t desc) \
144
{ \
145
vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
146
- ctzl(sizeof(ETYPE)), GETPC(), \
147
- MMU_DATA_LOAD); \
148
+ ctzl(sizeof(ETYPE)), GETPC()); \
149
}
150
151
GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
152
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, target_ulong base, \
153
CPURISCVState *env, uint32_t desc) \
154
{ \
155
vext_ldst_whole(vd, base, env, desc, STORE_FN, \
156
- ctzl(sizeof(ETYPE)), GETPC(), \
157
- MMU_DATA_STORE); \
158
+ ctzl(sizeof(ETYPE)), GETPC()); \
159
}
160
161
GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
162
--
55
--
163
2.36.1
56
2.41.0
57
58
diff view generated by jsdifflib
1
From: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
2
3
Add an MXL_RV128 case in two switches so that no error is triggered when
3
riscv_trigger_init() had been called on reset events that can happen
4
using the -cpu x-rv128 option.
4
several times for a CPU and it allocated timers for itrigger. If old
5
timers were present, they were simply overwritten by the new timers,
6
resulting in a memory leak.
5
7
6
Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
8
Divide riscv_trigger_init() into two functions, namely
7
Acked-by: Alistair Francis <alistair.francis@wdc.com>
9
riscv_trigger_realize() and riscv_trigger_reset() and call them in
8
Reviewed-by: Bin Meng <bmeng.cn@gmail.com>
10
appropriate timing. The timer allocation will happen only once for a
9
Message-Id: <20220602155246.38837-1-frederic.petrot@univ-grenoble-alpes.fr>
11
CPU in riscv_trigger_realize().
12
13
Fixes: 5a4ae64cac ("target/riscv: Add itrigger support when icount is enabled")
14
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
15
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
16
Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
17
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
18
Message-ID: <20230818034059.9146-1-akihiko.odaki@daynix.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
19
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
---
20
---
12
target/riscv/debug.c | 2 ++
21
target/riscv/debug.h | 3 ++-
13
1 file changed, 2 insertions(+)
22
target/riscv/cpu.c | 8 +++++++-
23
target/riscv/debug.c | 15 ++++++++++++---
24
3 files changed, 21 insertions(+), 5 deletions(-)
14
25
26
diff --git a/target/riscv/debug.h b/target/riscv/debug.h
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/riscv/debug.h
29
+++ b/target/riscv/debug.h
30
@@ -XXX,XX +XXX,XX @@ void riscv_cpu_debug_excp_handler(CPUState *cs);
31
bool riscv_cpu_debug_check_breakpoint(CPUState *cs);
32
bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp);
33
34
-void riscv_trigger_init(CPURISCVState *env);
35
+void riscv_trigger_realize(CPURISCVState *env);
36
+void riscv_trigger_reset_hold(CPURISCVState *env);
37
38
bool riscv_itrigger_enabled(CPURISCVState *env);
39
void riscv_itrigger_update_priv(CPURISCVState *env);
40
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/target/riscv/cpu.c
43
+++ b/target/riscv/cpu.c
44
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_reset_hold(Object *obj)
45
46
#ifndef CONFIG_USER_ONLY
47
if (cpu->cfg.debug) {
48
- riscv_trigger_init(env);
49
+ riscv_trigger_reset_hold(env);
50
}
51
52
if (kvm_enabled()) {
53
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
54
55
riscv_cpu_register_gdb_regs_for_features(cs);
56
57
+#ifndef CONFIG_USER_ONLY
58
+ if (cpu->cfg.debug) {
59
+ riscv_trigger_realize(&cpu->env);
60
+ }
61
+#endif
62
+
63
qemu_init_vcpu(cs);
64
cpu_reset(cs);
65
15
diff --git a/target/riscv/debug.c b/target/riscv/debug.c
66
diff --git a/target/riscv/debug.c b/target/riscv/debug.c
16
index XXXXXXX..XXXXXXX 100644
67
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/debug.c
68
--- a/target/riscv/debug.c
18
+++ b/target/riscv/debug.c
69
+++ b/target/riscv/debug.c
19
@@ -XXX,XX +XXX,XX @@ static inline target_ulong trigger_type(CPURISCVState *env,
70
@@ -XXX,XX +XXX,XX @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
20
tdata1 = RV32_TYPE(type);
71
return false;
21
break;
72
}
22
case MXL_RV64:
73
23
+ case MXL_RV128:
74
-void riscv_trigger_init(CPURISCVState *env)
24
tdata1 = RV64_TYPE(type);
75
+void riscv_trigger_realize(CPURISCVState *env)
25
break;
76
+{
26
default:
77
+ int i;
27
@@ -XXX,XX +XXX,XX @@ static target_ulong tdata1_validate(CPURISCVState *env, target_ulong val,
78
+
28
tdata1 = RV32_TYPE(t);
79
+ for (i = 0; i < RV_MAX_TRIGGERS; i++) {
29
break;
80
+ env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL,
30
case MXL_RV64:
81
+ riscv_itrigger_timer_cb, env);
31
+ case MXL_RV128:
82
+ }
32
type = extract64(val, 60, 4);
83
+}
33
dmode = extract64(val, 59, 1);
84
+
34
tdata1 = RV64_TYPE(t);
85
+void riscv_trigger_reset_hold(CPURISCVState *env)
86
{
87
target_ulong tdata1 = build_tdata1(env, TRIGGER_TYPE_AD_MATCH, 0, 0);
88
int i;
89
@@ -XXX,XX +XXX,XX @@ void riscv_trigger_init(CPURISCVState *env)
90
env->tdata3[i] = 0;
91
env->cpu_breakpoint[i] = NULL;
92
env->cpu_watchpoint[i] = NULL;
93
- env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL,
94
- riscv_itrigger_timer_cb, env);
95
+ timer_del(env->itrigger_timer[i]);
96
}
97
}
35
--
98
--
36
2.36.1
99
2.41.0
100
101
diff view generated by jsdifflib
New patch
1
From: Leon Schuermann <leons@opentitan.org>
1
2
3
When the rule-lock bypass (RLB) bit is set in the mseccfg CSR, the PMP
4
configuration lock bits must not apply. While this behavior is
5
implemented for the pmpcfgX CSRs, this bit is not respected for
6
changes to the pmpaddrX CSRs. This patch ensures that pmpaddrX CSR
7
writes work even on locked regions when the global rule-lock bypass is
8
enabled.
9
10
Signed-off-by: Leon Schuermann <leons@opentitan.org>
11
Reviewed-by: Mayuresh Chitale <mchitale@ventanamicro.com>
12
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
13
Message-ID: <20230829215046.1430463-1-leon@is.currently.online>
14
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
15
---
16
target/riscv/pmp.c | 4 ++++
17
1 file changed, 4 insertions(+)
18
19
diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/riscv/pmp.c
22
+++ b/target/riscv/pmp.c
23
@@ -XXX,XX +XXX,XX @@ static inline uint8_t pmp_get_a_field(uint8_t cfg)
24
*/
25
static inline int pmp_is_locked(CPURISCVState *env, uint32_t pmp_index)
26
{
27
+ /* mseccfg.RLB is set */
28
+ if (MSECCFG_RLB_ISSET(env)) {
29
+ return 0;
30
+ }
31
32
if (env->pmp_state.pmp[pmp_index].cfg_reg & PMP_LOCK) {
33
return 1;
34
--
35
2.41.0
diff view generated by jsdifflib
New patch
1
From: Tommy Wu <tommy.wu@sifive.com>
1
2
3
According to the new spec, when vsiselect has a reserved value, attempts
4
from M-mode or HS-mode to access vsireg, or from VS-mode to access
5
sireg, should preferably raise an illegal instruction exception.
6
7
Signed-off-by: Tommy Wu <tommy.wu@sifive.com>
8
Reviewed-by: Frank Chang <frank.chang@sifive.com>
9
Message-ID: <20230816061647.600672-1-tommy.wu@sifive.com>
10
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
11
---
12
target/riscv/csr.c | 7 +++++--
13
1 file changed, 5 insertions(+), 2 deletions(-)
14
15
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/target/riscv/csr.c
18
+++ b/target/riscv/csr.c
19
@@ -XXX,XX +XXX,XX @@ static int rmw_iprio(target_ulong xlen,
20
static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val,
21
target_ulong new_val, target_ulong wr_mask)
22
{
23
- bool virt;
24
+ bool virt, isel_reserved;
25
uint8_t *iprio;
26
int ret = -EINVAL;
27
target_ulong priv, isel, vgein;
28
@@ -XXX,XX +XXX,XX @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val,
29
30
/* Decode register details from CSR number */
31
virt = false;
32
+ isel_reserved = false;
33
switch (csrno) {
34
case CSR_MIREG:
35
iprio = env->miprio;
36
@@ -XXX,XX +XXX,XX @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val,
37
riscv_cpu_mxl_bits(env)),
38
val, new_val, wr_mask);
39
}
40
+ } else {
41
+ isel_reserved = true;
42
}
43
44
done:
45
if (ret) {
46
- return (env->virt_enabled && virt) ?
47
+ return (env->virt_enabled && virt && !isel_reserved) ?
48
RISCV_EXCP_VIRT_INSTRUCTION_FAULT : RISCV_EXCP_ILLEGAL_INST;
49
}
50
return RISCV_EXCP_NONE;
51
--
52
2.41.0
diff view generated by jsdifflib
1
From: eopXD <yueh.ting.chen@gmail.com>
1
From: Nikita Shubin <n.shubin@yadro.com>
2
2
3
Signed-off-by: eop Chen <eop.chen@sifive.com>
3
As per ISA:
4
Reviewed-by: Frank Chang <frank.chang@sifive.com>
4
5
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
5
"For CSRRWI, if rd=x0, then the instruction shall not read the CSR and
6
Acked-by: Alistair Francis <alistair.francis@wdc.com>
6
shall not cause any of the side effects that might occur on a CSR read."
7
Message-Id: <165449614532.19704.7000832880482980398-15@git.sr.ht>
7
8
trans_csrrwi() and trans_csrrw() call do_csrw() if rd=x0, do_csrw() calls
9
riscv_csrrw_do64(), via helper_csrw() passing NULL as *ret_value.
10
11
Signed-off-by: Nikita Shubin <n.shubin@yadro.com>
12
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
13
Message-ID: <20230808090914.17634-1-nikita.shubin@maquefel.me>
8
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
14
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
9
---
15
---
10
target/riscv/vector_helper.c | 40 +++++++++++++++++++++++++
16
target/riscv/csr.c | 24 +++++++++++++++---------
11
target/riscv/insn_trans/trans_rvv.c.inc | 7 +++--
17
1 file changed, 15 insertions(+), 9 deletions(-)
12
2 files changed, 45 insertions(+), 2 deletions(-)
13
18
14
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
19
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
15
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
16
--- a/target/riscv/vector_helper.c
21
--- a/target/riscv/csr.c
17
+++ b/target/riscv/vector_helper.c
22
+++ b/target/riscv/csr.c
18
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
23
@@ -XXX,XX +XXX,XX @@ static RISCVException riscv_csrrw_do64(CPURISCVState *env, int csrno,
19
{ \
24
target_ulong write_mask)
20
uint32_t vm = vext_vm(desc); \
25
{
21
uint32_t vl = env->vl; \
26
RISCVException ret;
22
+ uint32_t esz = sizeof(ETYPE); \
27
- target_ulong old_value;
23
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
28
+ target_ulong old_value = 0;
24
+ uint32_t vta = vext_vta(desc); \
29
25
target_ulong offset = s1, i_min, i; \
30
/* execute combined read/write operation if it exists */
26
\
31
if (csr_ops[csrno].op) {
27
i_min = MAX(env->vstart, offset); \
32
return csr_ops[csrno].op(env, csrno, ret_value, new_value, write_mask);
28
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
29
} \
30
*((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
31
} \
32
+ /* set tail elements to 1s */ \
33
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
34
}
35
36
/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
37
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
38
uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
39
uint32_t vm = vext_vm(desc); \
40
uint32_t vl = env->vl; \
41
+ uint32_t esz = sizeof(ETYPE); \
42
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
43
+ uint32_t vta = vext_vta(desc); \
44
target_ulong i_max, i; \
45
\
46
i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
47
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
48
} \
49
\
50
env->vstart = 0; \
51
+ /* set tail elements to 1s */ \
52
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
53
}
54
55
/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
56
@@ -XXX,XX +XXX,XX @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
57
typedef uint##BITWIDTH##_t ETYPE; \
58
uint32_t vm = vext_vm(desc); \
59
uint32_t vl = env->vl; \
60
+ uint32_t esz = sizeof(ETYPE); \
61
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
62
+ uint32_t vta = vext_vta(desc); \
63
uint32_t i; \
64
\
65
for (i = env->vstart; i < vl; i++) { \
66
@@ -XXX,XX +XXX,XX @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
67
} \
68
} \
69
env->vstart = 0; \
70
+ /* set tail elements to 1s */ \
71
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
72
}
73
74
GEN_VEXT_VSLIE1UP(8, H1)
75
@@ -XXX,XX +XXX,XX @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
76
typedef uint##BITWIDTH##_t ETYPE; \
77
uint32_t vm = vext_vm(desc); \
78
uint32_t vl = env->vl; \
79
+ uint32_t esz = sizeof(ETYPE); \
80
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
81
+ uint32_t vta = vext_vta(desc); \
82
uint32_t i; \
83
\
84
for (i = env->vstart; i < vl; i++) { \
85
@@ -XXX,XX +XXX,XX @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
86
} \
87
} \
88
env->vstart = 0; \
89
+ /* set tail elements to 1s */ \
90
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
91
}
92
93
GEN_VEXT_VSLIDE1DOWN(8, H1)
94
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
95
uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
96
uint32_t vm = vext_vm(desc); \
97
uint32_t vl = env->vl; \
98
+ uint32_t esz = sizeof(TS2); \
99
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
100
+ uint32_t vta = vext_vta(desc); \
101
uint64_t index; \
102
uint32_t i; \
103
\
104
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
105
} \
106
} \
107
env->vstart = 0; \
108
+ /* set tail elements to 1s */ \
109
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
110
}
111
112
/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
113
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
114
uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
115
uint32_t vm = vext_vm(desc); \
116
uint32_t vl = env->vl; \
117
+ uint32_t esz = sizeof(ETYPE); \
118
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
119
+ uint32_t vta = vext_vta(desc); \
120
uint64_t index = s1; \
121
uint32_t i; \
122
\
123
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
124
} \
125
} \
126
env->vstart = 0; \
127
+ /* set tail elements to 1s */ \
128
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
129
}
130
131
/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
132
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
133
CPURISCVState *env, uint32_t desc) \
134
{ \
135
uint32_t vl = env->vl; \
136
+ uint32_t esz = sizeof(ETYPE); \
137
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
138
+ uint32_t vta = vext_vta(desc); \
139
uint32_t num = 0, i; \
140
\
141
for (i = env->vstart; i < vl; i++) { \
142
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
143
num++; \
144
} \
145
env->vstart = 0; \
146
+ /* set tail elements to 1s */ \
147
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
148
}
149
150
/* Compress into vd elements of vs2 where vs1 is enabled */
151
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \
152
{ \
153
uint32_t vl = env->vl; \
154
uint32_t vm = vext_vm(desc); \
155
+ uint32_t esz = sizeof(ETYPE); \
156
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
157
+ uint32_t vta = vext_vta(desc); \
158
uint32_t i; \
159
\
160
for (i = env->vstart; i < vl; i++) { \
161
@@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \
162
*((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
163
} \
164
env->vstart = 0; \
165
+ /* set tail elements to 1s */ \
166
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
167
}
168
169
GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
170
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
171
index XXXXXXX..XXXXXXX 100644
172
--- a/target/riscv/insn_trans/trans_rvv.c.inc
173
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
174
@@ -XXX,XX +XXX,XX @@ static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a)
175
return false;
176
}
33
}
177
34
178
- if (a->vm && s->vl_eq_vlmax) {
35
- /* if no accessor exists then return failure */
179
+ if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
36
- if (!csr_ops[csrno].read) {
180
int scale = s->lmul - (s->sew + 3);
37
- return RISCV_EXCP_ILLEGAL_INST;
181
int vlmax = s->cfg_ptr->vlen >> -scale;
38
- }
182
TCGv_i64 dest = tcg_temp_new_i64();
39
- /* read old value */
183
@@ -XXX,XX +XXX,XX @@ static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a)
40
- ret = csr_ops[csrno].read(env, csrno, &old_value);
184
return false;
41
- if (ret != RISCV_EXCP_NONE) {
42
- return ret;
43
+ /*
44
+ * ret_value == NULL means that rd=x0 and we're coming from helper_csrw()
45
+ * and we can't throw side effects caused by CSR reads.
46
+ */
47
+ if (ret_value) {
48
+ /* if no accessor exists then return failure */
49
+ if (!csr_ops[csrno].read) {
50
+ return RISCV_EXCP_ILLEGAL_INST;
51
+ }
52
+ /* read old value */
53
+ ret = csr_ops[csrno].read(env, csrno, &old_value);
54
+ if (ret != RISCV_EXCP_NONE) {
55
+ return ret;
56
+ }
185
}
57
}
186
58
187
- if (a->vm && s->vl_eq_vlmax) {
59
/* write value if writable and write mask set, otherwise drop writes */
188
+ if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
189
int scale = s->lmul - (s->sew + 3);
190
int vlmax = s->cfg_ptr->vlen >> -scale;
191
if (a->rs1 >= vlmax) {
192
@@ -XXX,XX +XXX,XX @@ static bool trans_vcompress_vm(DisasContext *s, arg_r *a)
193
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
194
195
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
196
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
197
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
198
vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
199
cpu_env, s->cfg_ptr->vlen / 8,
200
@@ -XXX,XX +XXX,XX @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq)
201
}
202
203
data = FIELD_DP32(data, VDATA, VM, a->vm);
204
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
205
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
206
207
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
208
vreg_ofs(s, a->rs2), cpu_env,
209
--
60
--
210
2.36.1
61
2.41.0
diff view generated by jsdifflib