1 | From: Alistair Francis <alistair.francis@wdc.com> | 1 | The following changes since commit c5ea91da443b458352c1b629b490ee6631775cb4: |
---|---|---|---|
2 | 2 | ||
3 | The following changes since commit 9cc1bf1ebca550f8d90f967ccd2b6d2e00e81387: | 3 | Merge tag 'pull-trivial-patches' of https://gitlab.com/mjt0k/qemu into staging (2023-09-08 10:06:25 -0400) |
4 | |||
5 | Merge tag 'pull-xen-20220609' of https://xenbits.xen.org/git-http/people/aperard/qemu-dm into staging (2022-06-09 08:25:17 -0700) | ||
6 | 4 | ||
7 | are available in the Git repository at: | 5 | are available in the Git repository at: |
8 | 6 | ||
9 | git@github.com:alistair23/qemu.git tags/pull-riscv-to-apply-20220610 | 7 | https://github.com/alistair23/qemu.git tags/pull-riscv-to-apply-20230911 |
10 | 8 | ||
11 | for you to fetch changes up to 07314158f6aa4d2589520c194a7531b9364a8d54: | 9 | for you to fetch changes up to e7a03409f29e2da59297d55afbaec98c96e43e3a: |
12 | 10 | ||
13 | target/riscv: trans_rvv: Avoid assert for RV32 and e64 (2022-06-10 09:42:12 +1000) | 11 | target/riscv: don't read CSR in riscv_csrrw_do64 (2023-09-11 11:45:55 +1000) |
14 | 12 | ||
15 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
16 | Fourth RISC-V PR for QEMU 7.1 | 14 | First RISC-V PR for 8.2 |
17 | 15 | ||
18 | * Update MAINTAINERS | 16 | * Remove 'host' CPU from TCG |
19 | * Add support for Zmmul extension | 17 | * riscv_htif Fixup printing on big endian hosts |
20 | * Fixup FDT errors when supplying device tree from the command line for virt machine | 18 | * Add zmmul isa string |
21 | * Avoid overflowing the addr_config buffer in the SiFive PLIC | 19 | * Add smepmp isa string |
22 | * Support -device loader addresses above 2GB | 20 | * Fix page_check_range use in fault-only-first |
23 | * Correctly wake from WFI on VS-level external interrupts | 21 | * Use existing lookup tables for MixColumns |
24 | * Fixes for RV128 support | 22 | * Add RISC-V vector cryptographic instruction set support |
25 | * Support Vector extension tail agnostic setting elements' bits to all 1s | 23 | * Implement WARL behaviour for mcountinhibit/mcounteren |
26 | * Don't expose the CPU properties on named CPUs | 24 | * Add Zihintntl extension ISA string to DTS |
27 | * Fix vector extension assert for RV32 | 25 | * Fix zfa fleq.d and fltq.d |
26 | * Fix upper/lower mtime write calculation | ||
27 | * Make rtc variable names consistent | ||
28 | * Use abi type for linux-user target_ucontext | ||
29 | * Add RISC-V KVM AIA Support | ||
30 | * Fix riscv,pmu DT node path in the virt machine | ||
31 | * Update CSR bits name for svadu extension | ||
32 | * Mark zicond non-experimental | ||
33 | * Fix satp_mode_finalize() when satp_mode.supported = 0 | ||
34 | * Fix non-KVM --enable-debug build | ||
35 | * Add new extensions to hwprobe | ||
36 | * Use accelerated helper for AES64KS1I | ||
37 | * Allocate itrigger timers only once | ||
38 | * Respect mseccfg.RLB for pmpaddrX changes | ||
39 | * Align the AIA model to v1.0 ratified spec | ||
40 | * Don't read the CSR in riscv_csrrw_do64 | ||
28 | 41 | ||
29 | ---------------------------------------------------------------- | 42 | ---------------------------------------------------------------- |
30 | Alistair Francis (4): | 43 | Akihiko Odaki (1): |
31 | MAINTAINERS: Cover hw/core/uboot_image.h within Generic Loader section | 44 | target/riscv: Allocate itrigger timers only once |
32 | hw/intc: sifive_plic: Avoid overflowing the addr_config buffer | ||
33 | target/riscv: Don't expose the CPU properties on names CPUs | ||
34 | target/riscv: trans_rvv: Avoid assert for RV32 and e64 | ||
35 | 45 | ||
36 | Andrew Bresticker (1): | 46 | Ard Biesheuvel (2): |
37 | target/riscv: Wake on VS-level external interrupts | 47 | target/riscv: Use existing lookup tables for MixColumns |
48 | target/riscv: Use accelerated helper for AES64KS1I | ||
38 | 49 | ||
39 | Atish Patra (1): | 50 | Conor Dooley (1): |
40 | hw/riscv: virt: Generate fw_cfg DT node correctly | 51 | hw/riscv: virt: Fix riscv,pmu DT node path |
41 | 52 | ||
42 | Frédéric Pétrot (1): | 53 | Daniel Henrique Barboza (6): |
43 | target/riscv/debug.c: keep experimental rv128 support working | 54 | target/riscv/cpu.c: do not run 'host' CPU with TCG |
55 | target/riscv/cpu.c: add zmmul isa string | ||
56 | target/riscv/cpu.c: add smepmp isa string | ||
57 | target/riscv: fix satp_mode_finalize() when satp_mode.supported = 0 | ||
58 | hw/riscv/virt.c: fix non-KVM --enable-debug build | ||
59 | hw/intc/riscv_aplic.c fix non-KVM --enable-debug build | ||
44 | 60 | ||
45 | Jamie Iles (1): | 61 | Dickon Hood (2): |
46 | hw/core/loader: return image sizes as ssize_t | 62 | target/riscv: Refactor translation of vector-widening instruction |
63 | target/riscv: Add Zvbb ISA extension support | ||
64 | |||
65 | Jason Chien (3): | ||
66 | target/riscv: Add Zihintntl extension ISA string to DTS | ||
67 | hw/intc: Fix upper/lower mtime write calculation | ||
68 | hw/intc: Make rtc variable names consistent | ||
69 | |||
70 | Kiran Ostrolenk (4): | ||
71 | target/riscv: Refactor some of the generic vector functionality | ||
72 | target/riscv: Refactor vector-vector translation macro | ||
73 | target/riscv: Refactor some of the generic vector functionality | ||
74 | target/riscv: Add Zvknh ISA extension support | ||
75 | |||
76 | LIU Zhiwei (3): | ||
77 | target/riscv: Fix page_check_range use in fault-only-first | ||
78 | target/riscv: Fix zfa fleq.d and fltq.d | ||
79 | linux-user/riscv: Use abi type for target_ucontext | ||
80 | |||
81 | Lawrence Hunter (2): | ||
82 | target/riscv: Add Zvbc ISA extension support | ||
83 | target/riscv: Add Zvksh ISA extension support | ||
84 | |||
85 | Leon Schuermann (1): | ||
86 | target/riscv/pmp.c: respect mseccfg.RLB for pmpaddrX changes | ||
87 | |||
88 | Max Chou (3): | ||
89 | crypto: Create sm4_subword | ||
90 | crypto: Add SM4 constant parameter CK | ||
91 | target/riscv: Add Zvksed ISA extension support | ||
92 | |||
93 | Nazar Kazakov (4): | ||
94 | target/riscv: Remove redundant "cpu_vl == 0" checks | ||
95 | target/riscv: Move vector translation checks | ||
96 | target/riscv: Add Zvkned ISA extension support | ||
97 | target/riscv: Add Zvkg ISA extension support | ||
98 | |||
99 | Nikita Shubin (1): | ||
100 | target/riscv: don't read CSR in riscv_csrrw_do64 | ||
101 | |||
102 | Rob Bradford (1): | ||
103 | target/riscv: Implement WARL behaviour for mcountinhibit/mcounteren | ||
104 | |||
105 | Robbin Ehn (1): | ||
106 | linux-user/riscv: Add new extensions to hwprobe | ||
107 | |||
108 | Thomas Huth (2): | ||
109 | hw/char/riscv_htif: Fix printing of console characters on big endian hosts | ||
110 | hw/char/riscv_htif: Fix the console syscall on big endian hosts | ||
111 | |||
112 | Tommy Wu (1): | ||
113 | target/riscv: Align the AIA model to v1.0 ratified spec | ||
114 | |||
115 | Vineet Gupta (1): | ||
116 | riscv: zicond: make non-experimental | ||
47 | 117 | ||
48 | Weiwei Li (1): | 118 | Weiwei Li (1): |
49 | target/riscv: add support for zmmul extension v0.1 | 119 | target/riscv: Update CSR bits name for svadu extension |
50 | 120 | ||
51 | eopXD (16): | 121 | Yong-Xuan Wang (5): |
52 | target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed | 122 | target/riscv: support the AIA device emulation with KVM enabled |
53 | target/riscv: rvv: Prune redundant access_type parameter passed | 123 | target/riscv: check the in-kernel irqchip support |
54 | target/riscv: rvv: Rename ambiguous esz | 124 | target/riscv: Create an KVM AIA irqchip |
55 | target/riscv: rvv: Early exit when vstart >= vl | 125 | target/riscv: update APLIC and IMSIC to support KVM AIA |
56 | target/riscv: rvv: Add tail agnostic for vv instructions | 126 | target/riscv: select KVM AIA in riscv virt machine |
57 | target/riscv: rvv: Add tail agnostic for vector load / store instructions | ||
58 | target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions | ||
59 | target/riscv: rvv: Add tail agnostic for vector integer shift instructions | ||
60 | target/riscv: rvv: Add tail agnostic for vector integer comparison instructions | ||
61 | target/riscv: rvv: Add tail agnostic for vector integer merge and move instructions | ||
62 | target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic instructions | ||
63 | target/riscv: rvv: Add tail agnostic for vector floating-point instructions | ||
64 | target/riscv: rvv: Add tail agnostic for vector reduction instructions | ||
65 | target/riscv: rvv: Add tail agnostic for vector mask instructions | ||
66 | target/riscv: rvv: Add tail agnostic for vector permutation instructions | ||
67 | target/riscv: rvv: Add option 'rvv_ta_all_1s' to enable optional tail agnostic behavior | ||
68 | 127 | ||
69 | include/hw/loader.h | 55 +- | 128 | include/crypto/aes.h | 7 + |
70 | target/riscv/cpu.h | 4 + | 129 | include/crypto/sm4.h | 9 + |
71 | target/riscv/internals.h | 6 +- | 130 | target/riscv/cpu_bits.h | 8 +- |
72 | hw/arm/armv7m.c | 2 +- | 131 | target/riscv/cpu_cfg.h | 9 + |
73 | hw/arm/boot.c | 8 +- | 132 | target/riscv/debug.h | 3 +- |
74 | hw/core/generic-loader.c | 2 +- | 133 | target/riscv/helper.h | 98 +++ |
75 | hw/core/loader.c | 81 +- | 134 | target/riscv/kvm_riscv.h | 5 + |
76 | hw/i386/x86.c | 2 +- | 135 | target/riscv/vector_internals.h | 228 +++++++ |
77 | hw/intc/sifive_plic.c | 19 +- | 136 | target/riscv/insn32.decode | 58 ++ |
78 | hw/riscv/boot.c | 5 +- | 137 | crypto/aes.c | 4 +- |
79 | hw/riscv/virt.c | 28 +- | 138 | crypto/sm4.c | 10 + |
80 | target/riscv/cpu.c | 68 +- | 139 | hw/char/riscv_htif.c | 12 +- |
81 | target/riscv/cpu_helper.c | 4 +- | 140 | hw/intc/riscv_aclint.c | 11 +- |
82 | target/riscv/debug.c | 2 + | 141 | hw/intc/riscv_aplic.c | 52 +- |
83 | target/riscv/translate.c | 4 + | 142 | hw/intc/riscv_imsic.c | 25 +- |
84 | target/riscv/vector_helper.c | 1588 +++++++++++++++++++------------ | 143 | hw/riscv/virt.c | 374 ++++++------ |
85 | target/riscv/insn_trans/trans_rvm.c.inc | 18 +- | 144 | linux-user/riscv/signal.c | 4 +- |
86 | target/riscv/insn_trans/trans_rvv.c.inc | 106 ++- | 145 | linux-user/syscall.c | 14 +- |
87 | MAINTAINERS | 1 + | 146 | target/arm/tcg/crypto_helper.c | 10 +- |
88 | 19 files changed, 1244 insertions(+), 759 deletions(-) | 147 | target/riscv/cpu.c | 83 ++- |
148 | target/riscv/cpu_helper.c | 6 +- | ||
149 | target/riscv/crypto_helper.c | 51 +- | ||
150 | target/riscv/csr.c | 54 +- | ||
151 | target/riscv/debug.c | 15 +- | ||
152 | target/riscv/kvm.c | 201 ++++++- | ||
153 | target/riscv/pmp.c | 4 + | ||
154 | target/riscv/translate.c | 1 + | ||
155 | target/riscv/vcrypto_helper.c | 970 ++++++++++++++++++++++++++++++ | ||
156 | target/riscv/vector_helper.c | 245 +------- | ||
157 | target/riscv/vector_internals.c | 81 +++ | ||
158 | target/riscv/insn_trans/trans_rvv.c.inc | 171 +++--- | ||
159 | target/riscv/insn_trans/trans_rvvk.c.inc | 606 +++++++++++++++++++ | ||
160 | target/riscv/insn_trans/trans_rvzfa.c.inc | 4 +- | ||
161 | target/riscv/meson.build | 4 +- | ||
162 | 34 files changed, 2785 insertions(+), 652 deletions(-) | ||
163 | create mode 100644 target/riscv/vector_internals.h | ||
164 | create mode 100644 target/riscv/vcrypto_helper.c | ||
165 | create mode 100644 target/riscv/vector_internals.c | ||
166 | create mode 100644 target/riscv/insn_trans/trans_rvvk.c.inc | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
1 | 2 | ||
3 | The 'host' CPU is available in a CONFIG_KVM build and it's currently | ||
4 | available for all accels, but is a KVM only CPU. This means that in a | ||
5 | RISC-V KVM capable host we can do things like this: | ||
6 | |||
7 | $ ./build/qemu-system-riscv64 -M virt,accel=tcg -cpu host --nographic | ||
8 | qemu-system-riscv64: H extension requires priv spec 1.12.0 | ||
9 | |||
10 | This CPU does not have a priv spec because we don't filter its extensions | ||
11 | via priv spec. We shouldn't be reaching riscv_cpu_realize_tcg() at all | ||
12 | with the 'host' CPU. | ||
13 | |||
14 | We don't have a way to filter the 'host' CPU out of the available CPU | ||
15 | options (-cpu help) if the build includes both KVM and TCG. What we can | ||
16 | do is to error out during riscv_cpu_realize_tcg() if the user chooses | ||
17 | the 'host' CPU with accel=tcg: | ||
18 | |||
19 | $ ./build/qemu-system-riscv64 -M virt,accel=tcg -cpu host --nographic | ||
20 | qemu-system-riscv64: 'host' CPU is not compatible with TCG acceleration | ||
21 | |||
22 | Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
23 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
24 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
25 | Message-Id: <20230721133411.474105-1-dbarboza@ventanamicro.com> | ||
26 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
27 | --- | ||
28 | target/riscv/cpu.c | 5 +++++ | ||
29 | 1 file changed, 5 insertions(+) | ||
30 | |||
31 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/target/riscv/cpu.c | ||
34 | +++ b/target/riscv/cpu.c | ||
35 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize_tcg(DeviceState *dev, Error **errp) | ||
36 | CPURISCVState *env = &cpu->env; | ||
37 | Error *local_err = NULL; | ||
38 | |||
39 | + if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_CPU_HOST)) { | ||
40 | + error_setg(errp, "'host' CPU is not compatible with TCG acceleration"); | ||
41 | + return; | ||
42 | + } | ||
43 | + | ||
44 | riscv_cpu_validate_misa_mxl(cpu, &local_err); | ||
45 | if (local_err != NULL) { | ||
46 | error_propagate(errp, local_err); | ||
47 | -- | ||
48 | 2.41.0 | ||
49 | |||
50 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Thomas Huth <thuth@redhat.com> | ||
1 | 2 | ||
3 | The character that should be printed is stored in the 64 bit "payload" | ||
4 | variable. The code currently tries to print it by taking the address | ||
5 | of the variable and passing this pointer to qemu_chr_fe_write(). However, | ||
6 | this only works on little endian hosts where the least significant bits | ||
7 | are stored on the lowest address. To do this in a portable way, we have | ||
8 | to store the value in an uint8_t variable instead. | ||
9 | |||
10 | Fixes: 5033606780 ("RISC-V HTIF Console") | ||
11 | Signed-off-by: Thomas Huth <thuth@redhat.com> | ||
12 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | Reviewed-by: Bin Meng <bmeng@tinylab.org> | ||
14 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
15 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
16 | Message-Id: <20230721094720.902454-2-thuth@redhat.com> | ||
17 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
18 | --- | ||
19 | hw/char/riscv_htif.c | 3 ++- | ||
20 | 1 file changed, 2 insertions(+), 1 deletion(-) | ||
21 | |||
22 | diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/hw/char/riscv_htif.c | ||
25 | +++ b/hw/char/riscv_htif.c | ||
26 | @@ -XXX,XX +XXX,XX @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written) | ||
27 | s->tohost = 0; /* clear to indicate we read */ | ||
28 | return; | ||
29 | } else if (cmd == HTIF_CONSOLE_CMD_PUTC) { | ||
30 | - qemu_chr_fe_write(&s->chr, (uint8_t *)&payload, 1); | ||
31 | + uint8_t ch = (uint8_t)payload; | ||
32 | + qemu_chr_fe_write(&s->chr, &ch, 1); | ||
33 | resp = 0x100 | (uint8_t)payload; | ||
34 | } else { | ||
35 | qemu_log("HTIF device %d: unknown command\n", device); | ||
36 | -- | ||
37 | 2.41.0 | ||
38 | |||
39 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Thomas Huth <thuth@redhat.com> | ||
1 | 2 | ||
3 | Values that have been read via cpu_physical_memory_read() from the | ||
4 | guest's memory have to be swapped in case the host endianess differs | ||
5 | from the guest. | ||
6 | |||
7 | Fixes: a6e13e31d5 ("riscv_htif: Support console output via proxy syscall") | ||
8 | Signed-off-by: Thomas Huth <thuth@redhat.com> | ||
9 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
10 | Reviewed-by: Bin Meng <bmeng@tinylab.org> | ||
11 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
12 | Message-Id: <20230721094720.902454-3-thuth@redhat.com> | ||
13 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
14 | --- | ||
15 | hw/char/riscv_htif.c | 9 +++++---- | ||
16 | 1 file changed, 5 insertions(+), 4 deletions(-) | ||
17 | |||
18 | diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/hw/char/riscv_htif.c | ||
21 | +++ b/hw/char/riscv_htif.c | ||
22 | @@ -XXX,XX +XXX,XX @@ | ||
23 | #include "qemu/timer.h" | ||
24 | #include "qemu/error-report.h" | ||
25 | #include "exec/address-spaces.h" | ||
26 | +#include "exec/tswap.h" | ||
27 | #include "sysemu/dma.h" | ||
28 | |||
29 | #define RISCV_DEBUG_HTIF 0 | ||
30 | @@ -XXX,XX +XXX,XX @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written) | ||
31 | } else { | ||
32 | uint64_t syscall[8]; | ||
33 | cpu_physical_memory_read(payload, syscall, sizeof(syscall)); | ||
34 | - if (syscall[0] == PK_SYS_WRITE && | ||
35 | - syscall[1] == HTIF_DEV_CONSOLE && | ||
36 | - syscall[3] == HTIF_CONSOLE_CMD_PUTC) { | ||
37 | + if (tswap64(syscall[0]) == PK_SYS_WRITE && | ||
38 | + tswap64(syscall[1]) == HTIF_DEV_CONSOLE && | ||
39 | + tswap64(syscall[3]) == HTIF_CONSOLE_CMD_PUTC) { | ||
40 | uint8_t ch; | ||
41 | - cpu_physical_memory_read(syscall[2], &ch, 1); | ||
42 | + cpu_physical_memory_read(tswap64(syscall[2]), &ch, 1); | ||
43 | qemu_chr_fe_write(&s->chr, &ch, 1); | ||
44 | resp = 0x100 | (uint8_t)payload; | ||
45 | } else { | ||
46 | -- | ||
47 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
---|---|---|---|
2 | 2 | ||
3 | No functional change intended in this commit. | 3 | zmmul was promoted from experimental to ratified in commit 6d00ffad4e95. |
4 | Add a riscv,isa string for it. | ||
4 | 5 | ||
5 | Signed-off-by: eop Chen <eop.chen@sifive.com> | 6 | Fixes: 6d00ffad4e95 ("target/riscv: move zmmul out of the experimental properties") |
6 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | 7 | Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
7 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 8 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> |
8 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 9 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> |
9 | Message-Id: <165449614532.19704.7000832880482980398-3@git.sr.ht> | 10 | Message-Id: <20230720132424.371132-2-dbarboza@ventanamicro.com> |
10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
11 | --- | 12 | --- |
12 | target/riscv/vector_helper.c | 76 ++++++++++++++++++------------------ | 13 | target/riscv/cpu.c | 1 + |
13 | 1 file changed, 38 insertions(+), 38 deletions(-) | 14 | 1 file changed, 1 insertion(+) |
14 | 15 | ||
15 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 16 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
16 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/vector_helper.c | 18 | --- a/target/riscv/cpu.c |
18 | +++ b/target/riscv/vector_helper.c | 19 | +++ b/target/riscv/cpu.c |
19 | @@ -XXX,XX +XXX,XX @@ static inline int32_t vext_lmul(uint32_t desc) | 20 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { |
20 | /* | 21 | ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr), |
21 | * Get the maximum number of elements can be operated. | 22 | ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei), |
22 | * | 23 | ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause), |
23 | - * esz: log2 of element size in bytes. | 24 | + ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul), |
24 | + * log2_esz: log2 of element size in bytes. | 25 | ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs), |
25 | */ | 26 | ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa), |
26 | -static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) | 27 | ISA_EXT_DATA_ENTRY(zfbfmin, PRIV_VERSION_1_12_0, ext_zfbfmin), |
27 | +static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) | ||
28 | { | ||
29 | /* | ||
30 | * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. | ||
31 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) | ||
32 | uint32_t vlenb = simd_maxsz(desc); | ||
33 | |||
34 | /* Return VLMAX */ | ||
35 | - int scale = vext_lmul(desc) - esz; | ||
36 | + int scale = vext_lmul(desc) - log2_esz; | ||
37 | return scale < 0 ? vlenb >> -scale : vlenb << scale; | ||
38 | } | ||
39 | |||
40 | @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, | ||
41 | target_ulong stride, CPURISCVState *env, | ||
42 | uint32_t desc, uint32_t vm, | ||
43 | vext_ldst_elem_fn *ldst_elem, | ||
44 | - uint32_t esz, uintptr_t ra) | ||
45 | + uint32_t log2_esz, uintptr_t ra) | ||
46 | { | ||
47 | uint32_t i, k; | ||
48 | uint32_t nf = vext_nf(desc); | ||
49 | - uint32_t max_elems = vext_max_elems(desc, esz); | ||
50 | + uint32_t max_elems = vext_max_elems(desc, log2_esz); | ||
51 | |||
52 | for (i = env->vstart; i < env->vl; i++, env->vstart++) { | ||
53 | if (!vm && !vext_elem_mask(v0, i)) { | ||
54 | @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, | ||
55 | |||
56 | k = 0; | ||
57 | while (k < nf) { | ||
58 | - target_ulong addr = base + stride * i + (k << esz); | ||
59 | + target_ulong addr = base + stride * i + (k << log2_esz); | ||
60 | ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); | ||
61 | k++; | ||
62 | } | ||
63 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) | ||
64 | /* unmasked unit-stride load and store operation*/ | ||
65 | static void | ||
66 | vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
67 | - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, | ||
68 | + vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, | ||
69 | uintptr_t ra) | ||
70 | { | ||
71 | uint32_t i, k; | ||
72 | uint32_t nf = vext_nf(desc); | ||
73 | - uint32_t max_elems = vext_max_elems(desc, esz); | ||
74 | + uint32_t max_elems = vext_max_elems(desc, log2_esz); | ||
75 | |||
76 | /* load bytes from guest memory */ | ||
77 | for (i = env->vstart; i < evl; i++, env->vstart++) { | ||
78 | k = 0; | ||
79 | while (k < nf) { | ||
80 | - target_ulong addr = base + ((i * nf + k) << esz); | ||
81 | + target_ulong addr = base + ((i * nf + k) << log2_esz); | ||
82 | ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); | ||
83 | k++; | ||
84 | } | ||
85 | @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, | ||
86 | void *vs2, CPURISCVState *env, uint32_t desc, | ||
87 | vext_get_index_addr get_index_addr, | ||
88 | vext_ldst_elem_fn *ldst_elem, | ||
89 | - uint32_t esz, uintptr_t ra) | ||
90 | + uint32_t log2_esz, uintptr_t ra) | ||
91 | { | ||
92 | uint32_t i, k; | ||
93 | uint32_t nf = vext_nf(desc); | ||
94 | uint32_t vm = vext_vm(desc); | ||
95 | - uint32_t max_elems = vext_max_elems(desc, esz); | ||
96 | + uint32_t max_elems = vext_max_elems(desc, log2_esz); | ||
97 | |||
98 | /* load bytes from guest memory */ | ||
99 | for (i = env->vstart; i < env->vl; i++, env->vstart++) { | ||
100 | @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, | ||
101 | |||
102 | k = 0; | ||
103 | while (k < nf) { | ||
104 | - abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); | ||
105 | + abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); | ||
106 | ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); | ||
107 | k++; | ||
108 | } | ||
109 | @@ -XXX,XX +XXX,XX @@ static inline void | ||
110 | vext_ldff(void *vd, void *v0, target_ulong base, | ||
111 | CPURISCVState *env, uint32_t desc, | ||
112 | vext_ldst_elem_fn *ldst_elem, | ||
113 | - uint32_t esz, uintptr_t ra) | ||
114 | + uint32_t log2_esz, uintptr_t ra) | ||
115 | { | ||
116 | void *host; | ||
117 | uint32_t i, k, vl = 0; | ||
118 | uint32_t nf = vext_nf(desc); | ||
119 | uint32_t vm = vext_vm(desc); | ||
120 | - uint32_t max_elems = vext_max_elems(desc, esz); | ||
121 | + uint32_t max_elems = vext_max_elems(desc, log2_esz); | ||
122 | target_ulong addr, offset, remain; | ||
123 | |||
124 | /* probe every access*/ | ||
125 | @@ -XXX,XX +XXX,XX @@ vext_ldff(void *vd, void *v0, target_ulong base, | ||
126 | if (!vm && !vext_elem_mask(v0, i)) { | ||
127 | continue; | ||
128 | } | ||
129 | - addr = adjust_addr(env, base + i * (nf << esz)); | ||
130 | + addr = adjust_addr(env, base + i * (nf << log2_esz)); | ||
131 | if (i == 0) { | ||
132 | - probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); | ||
133 | + probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); | ||
134 | } else { | ||
135 | /* if it triggers an exception, no need to check watchpoint */ | ||
136 | - remain = nf << esz; | ||
137 | + remain = nf << log2_esz; | ||
138 | while (remain > 0) { | ||
139 | offset = -(addr | TARGET_PAGE_MASK); | ||
140 | host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, | ||
141 | @@ -XXX,XX +XXX,XX @@ ProbeSuccess: | ||
142 | continue; | ||
143 | } | ||
144 | while (k < nf) { | ||
145 | - target_ulong addr = base + ((i * nf + k) << esz); | ||
146 | + target_ulong addr = base + ((i * nf + k) << log2_esz); | ||
147 | ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); | ||
148 | k++; | ||
149 | } | ||
150 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) | ||
151 | */ | ||
152 | static void | ||
153 | vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
154 | - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra) | ||
155 | + vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) | ||
156 | { | ||
157 | uint32_t i, k, off, pos; | ||
158 | uint32_t nf = vext_nf(desc); | ||
159 | uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; | ||
160 | - uint32_t max_elems = vlenb >> esz; | ||
161 | + uint32_t max_elems = vlenb >> log2_esz; | ||
162 | |||
163 | k = env->vstart / max_elems; | ||
164 | off = env->vstart % max_elems; | ||
165 | @@ -XXX,XX +XXX,XX @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
166 | if (off) { | ||
167 | /* load/store rest of elements of current segment pointed by vstart */ | ||
168 | for (pos = off; pos < max_elems; pos++, env->vstart++) { | ||
169 | - target_ulong addr = base + ((pos + k * max_elems) << esz); | ||
170 | + target_ulong addr = base + ((pos + k * max_elems) << log2_esz); | ||
171 | ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); | ||
172 | } | ||
173 | k++; | ||
174 | @@ -XXX,XX +XXX,XX @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
175 | /* load/store elements for rest of segments */ | ||
176 | for (; k < nf; k++) { | ||
177 | for (i = 0; i < max_elems; i++, env->vstart++) { | ||
178 | - target_ulong addr = base + ((i + k * max_elems) << esz); | ||
179 | + target_ulong addr = base + ((i + k * max_elems) << log2_esz); | ||
180 | ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); | ||
181 | } | ||
182 | } | ||
183 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) | ||
184 | GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) | ||
185 | GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) | ||
186 | |||
187 | -#define GEN_VEXT_VSLIE1UP(ESZ, H) \ | ||
188 | -static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
189 | - CPURISCVState *env, uint32_t desc) \ | ||
190 | +#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ | ||
191 | +static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ | ||
192 | + void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
193 | { \ | ||
194 | - typedef uint##ESZ##_t ETYPE; \ | ||
195 | + typedef uint##BITWIDTH##_t ETYPE; \ | ||
196 | uint32_t vm = vext_vm(desc); \ | ||
197 | uint32_t vl = env->vl; \ | ||
198 | uint32_t i; \ | ||
199 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIE1UP(16, H2) | ||
200 | GEN_VEXT_VSLIE1UP(32, H4) | ||
201 | GEN_VEXT_VSLIE1UP(64, H8) | ||
202 | |||
203 | -#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ | ||
204 | +#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ | ||
205 | void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
206 | CPURISCVState *env, uint32_t desc) \ | ||
207 | { \ | ||
208 | - vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ | ||
209 | + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ | ||
210 | } | ||
211 | |||
212 | /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ | ||
213 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) | ||
214 | GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) | ||
215 | GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) | ||
216 | |||
217 | -#define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ | ||
218 | -static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
219 | - CPURISCVState *env, uint32_t desc) \ | ||
220 | +#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ | ||
221 | +static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ | ||
222 | + void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
223 | { \ | ||
224 | - typedef uint##ESZ##_t ETYPE; \ | ||
225 | + typedef uint##BITWIDTH##_t ETYPE; \ | ||
226 | uint32_t vm = vext_vm(desc); \ | ||
227 | uint32_t vl = env->vl; \ | ||
228 | uint32_t i; \ | ||
229 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1DOWN(16, H2) | ||
230 | GEN_VEXT_VSLIDE1DOWN(32, H4) | ||
231 | GEN_VEXT_VSLIDE1DOWN(64, H8) | ||
232 | |||
233 | -#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ | ||
234 | +#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ | ||
235 | void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
236 | CPURISCVState *env, uint32_t desc) \ | ||
237 | { \ | ||
238 | - vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ | ||
239 | + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ | ||
240 | } | ||
241 | |||
242 | /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ | ||
243 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) | ||
244 | GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) | ||
245 | |||
246 | /* Vector Floating-Point Slide Instructions */ | ||
247 | -#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ | ||
248 | +#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ | ||
249 | void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
250 | CPURISCVState *env, uint32_t desc) \ | ||
251 | { \ | ||
252 | - vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ | ||
253 | + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ | ||
254 | } | ||
255 | |||
256 | /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ | ||
257 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) | ||
258 | GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) | ||
259 | GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) | ||
260 | |||
261 | -#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ | ||
262 | +#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ | ||
263 | void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
264 | CPURISCVState *env, uint32_t desc) \ | ||
265 | { \ | ||
266 | - vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ | ||
267 | + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ | ||
268 | } | ||
269 | |||
270 | /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ | ||
271 | -- | 28 | -- |
272 | 2.36.1 | 29 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <eop.chen@sifive.com> | 1 | From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
---|---|---|---|
2 | 2 | ||
3 | According to v-spec, tail agnostic behavior can be either kept as | 3 | The cpu->cfg.epmp extension is still experimental, but it already has a |
4 | undisturbed or set elements' bits to all 1s. To distinguish the | 4 | 'smepmp' riscv,isa string. Add it. |
5 | difference of tail policies, QEMU should be able to simulate the tail | ||
6 | agnostic behavior as "set tail elements' bits to all 1s". | ||
7 | 5 | ||
8 | There are multiple possibility for agnostic elements according to | 6 | Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
9 | v-spec. The main intent of this patch-set tries to add option that | ||
10 | can distinguish between tail policies. Setting agnostic elements to | ||
11 | all 1s allows QEMU to express this. | ||
12 | |||
13 | This commit adds option 'rvv_ta_all_1s' is added to enable the | ||
14 | behavior, it is default as disabled. | ||
15 | |||
16 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
17 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
18 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 7 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> |
19 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 8 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> |
20 | Message-Id: <165449614532.19704.7000832880482980398-16@git.sr.ht> | 9 | Message-Id: <20230720132424.371132-3-dbarboza@ventanamicro.com> |
21 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
22 | --- | 11 | --- |
23 | target/riscv/cpu.c | 2 ++ | 12 | target/riscv/cpu.c | 1 + |
24 | 1 file changed, 2 insertions(+) | 13 | 1 file changed, 1 insertion(+) |
25 | 14 | ||
26 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | 15 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
27 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/target/riscv/cpu.c | 17 | --- a/target/riscv/cpu.c |
29 | +++ b/target/riscv/cpu.c | 18 | +++ b/target/riscv/cpu.c |
30 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { | 19 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { |
31 | DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC), | 20 | ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), |
32 | 21 | ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), | |
33 | DEFINE_PROP_BOOL("short-isa-string", RISCVCPU, cfg.short_isa_string, false), | 22 | ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia), |
34 | + | 23 | + ISA_EXT_DATA_ENTRY(smepmp, PRIV_VERSION_1_12_0, epmp), |
35 | + DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false), | 24 | ISA_EXT_DATA_ENTRY(smstateen, PRIV_VERSION_1_12_0, ext_smstateen), |
36 | DEFINE_PROP_END_OF_LIST(), | 25 | ISA_EXT_DATA_ENTRY(ssaia, PRIV_VERSION_1_12_0, ext_ssaia), |
37 | }; | 26 | ISA_EXT_DATA_ENTRY(sscofpmf, PRIV_VERSION_1_12_0, ext_sscofpmf), |
38 | |||
39 | -- | 27 | -- |
40 | 2.36.1 | 28 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> |
---|---|---|---|
2 | 2 | ||
3 | The tail elements in the destination mask register are updated under | 3 | Commit bef6f008b98(accel/tcg: Return bool from page_check_range) converts |
4 | a tail-agnostic policy. | 4 | integer return value to bool type. However, it wrongly converted the use |
5 | of the API in riscv fault-only-first, where page_check_range < = 0, should | ||
6 | be converted to !page_check_range. | ||
5 | 7 | ||
6 | Signed-off-by: eop Chen <eop.chen@sifive.com> | 8 | Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> |
7 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | 9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 10 | Message-ID: <20230729031618.821-1-zhiwei_liu@linux.alibaba.com> |
9 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
10 | Message-Id: <165449614532.19704.7000832880482980398-14@git.sr.ht> | ||
11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
12 | --- | 12 | --- |
13 | target/riscv/vector_helper.c | 30 +++++++++++++++++++++++++ | 13 | target/riscv/vector_helper.c | 2 +- |
14 | target/riscv/insn_trans/trans_rvv.c.inc | 6 +++++ | 14 | 1 file changed, 1 insertion(+), 1 deletion(-) |
15 | 2 files changed, 36 insertions(+) | ||
16 | 15 | ||
17 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 16 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c |
18 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/target/riscv/vector_helper.c | 18 | --- a/target/riscv/vector_helper.c |
20 | +++ b/target/riscv/vector_helper.c | 19 | +++ b/target/riscv/vector_helper.c |
21 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | 20 | @@ -XXX,XX +XXX,XX @@ vext_ldff(void *vd, void *v0, target_ulong base, |
22 | uint32_t desc) \ | 21 | cpu_mmu_index(env, false)); |
23 | { \ | 22 | if (host) { |
24 | uint32_t vl = env->vl; \ | 23 | #ifdef CONFIG_USER_ONLY |
25 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ | 24 | - if (page_check_range(addr, offset, PAGE_READ)) { |
26 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ | 25 | + if (!page_check_range(addr, offset, PAGE_READ)) { |
27 | uint32_t i; \ | 26 | vl = i; |
28 | int a, b; \ | 27 | goto ProbeSuccess; |
29 | \ | 28 | } |
30 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
31 | vext_set_elem_mask(vd, i, OP(b, a)); \ | ||
32 | } \ | ||
33 | env->vstart = 0; \ | ||
34 | + /* mask destination register are always tail- \ | ||
35 | + * agnostic \ | ||
36 | + */ \ | ||
37 | + /* set tail elements to 1s */ \ | ||
38 | + if (vta_all_1s) { \ | ||
39 | + for (; i < total_elems; i++) { \ | ||
40 | + vext_set_elem_mask(vd, i, 1); \ | ||
41 | + } \ | ||
42 | + } \ | ||
43 | } | ||
44 | |||
45 | #define DO_NAND(N, M) (!(N & M)) | ||
46 | @@ -XXX,XX +XXX,XX @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, | ||
47 | { | ||
48 | uint32_t vm = vext_vm(desc); | ||
49 | uint32_t vl = env->vl; | ||
50 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; | ||
51 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); | ||
52 | int i; | ||
53 | bool first_mask_bit = false; | ||
54 | |||
55 | @@ -XXX,XX +XXX,XX @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, | ||
56 | } | ||
57 | } | ||
58 | env->vstart = 0; | ||
59 | + /* mask destination register are always tail-agnostic */ | ||
60 | + /* set tail elements to 1s */ | ||
61 | + if (vta_all_1s) { | ||
62 | + for (; i < total_elems; i++) { | ||
63 | + vext_set_elem_mask(vd, i, 1); | ||
64 | + } | ||
65 | + } | ||
66 | } | ||
67 | |||
68 | void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, | ||
69 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ | ||
70 | { \ | ||
71 | uint32_t vm = vext_vm(desc); \ | ||
72 | uint32_t vl = env->vl; \ | ||
73 | + uint32_t esz = sizeof(ETYPE); \ | ||
74 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
75 | + uint32_t vta = vext_vta(desc); \ | ||
76 | uint32_t sum = 0; \ | ||
77 | int i; \ | ||
78 | \ | ||
79 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ | ||
80 | } \ | ||
81 | } \ | ||
82 | env->vstart = 0; \ | ||
83 | + /* set tail elements to 1s */ \ | ||
84 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
85 | } | ||
86 | |||
87 | GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) | ||
88 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ | ||
89 | { \ | ||
90 | uint32_t vm = vext_vm(desc); \ | ||
91 | uint32_t vl = env->vl; \ | ||
92 | + uint32_t esz = sizeof(ETYPE); \ | ||
93 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
94 | + uint32_t vta = vext_vta(desc); \ | ||
95 | int i; \ | ||
96 | \ | ||
97 | for (i = env->vstart; i < vl; i++) { \ | ||
98 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ | ||
99 | *((ETYPE *)vd + H(i)) = i; \ | ||
100 | } \ | ||
101 | env->vstart = 0; \ | ||
102 | + /* set tail elements to 1s */ \ | ||
103 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
104 | } | ||
105 | |||
106 | GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) | ||
107 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
108 | index XXXXXXX..XXXXXXX 100644 | ||
109 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
110 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
111 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \ | ||
112 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
113 | \ | ||
114 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
115 | + data = \ | ||
116 | + FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ | ||
117 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
118 | vreg_ofs(s, a->rs1), \ | ||
119 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
120 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
121 | \ | ||
122 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
123 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
124 | + data = \ | ||
125 | + FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ | ||
126 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \ | ||
127 | vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \ | ||
128 | cpu_env, s->cfg_ptr->vlen / 8, \ | ||
129 | @@ -XXX,XX +XXX,XX @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a) | ||
130 | |||
131 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
132 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
133 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
134 | static gen_helper_gvec_3_ptr * const fns[4] = { | ||
135 | gen_helper_viota_m_b, gen_helper_viota_m_h, | ||
136 | gen_helper_viota_m_w, gen_helper_viota_m_d, | ||
137 | @@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) | ||
138 | |||
139 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
140 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
141 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
142 | static gen_helper_gvec_2_ptr * const fns[4] = { | ||
143 | gen_helper_vid_v_b, gen_helper_vid_v_h, | ||
144 | gen_helper_vid_v_w, gen_helper_vid_v_d, | ||
145 | -- | 29 | -- |
146 | 2.36.1 | 30 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Ard Biesheuvel <ardb@kernel.org> | ||
1 | 2 | ||
3 | The AES MixColumns and InvMixColumns operations are relatively | ||
4 | expensive 4x4 matrix multiplications in GF(2^8), which is why C | ||
5 | implementations usually rely on precomputed lookup tables rather than | ||
6 | performing the calculations on demand. | ||
7 | |||
8 | Given that we already carry those tables in QEMU, we can just grab the | ||
9 | right value in the implementation of the RISC-V AES32 instructions. Note | ||
10 | that the tables in question are permuted according to the respective | ||
11 | Sbox, so we can omit the Sbox lookup as well in this case. | ||
12 | |||
13 | Cc: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Cc: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
15 | Cc: Zewen Ye <lustrew@foxmail.com> | ||
16 | Cc: Weiwei Li <liweiwei@iscas.ac.cn> | ||
17 | Cc: Junqiang Wang <wangjunqiang@iscas.ac.cn> | ||
18 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | ||
19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
20 | Message-ID: <20230731084043.1791984-1-ardb@kernel.org> | ||
21 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
22 | --- | ||
23 | include/crypto/aes.h | 7 +++++++ | ||
24 | crypto/aes.c | 4 ++-- | ||
25 | target/riscv/crypto_helper.c | 34 ++++------------------------------ | ||
26 | 3 files changed, 13 insertions(+), 32 deletions(-) | ||
27 | |||
28 | diff --git a/include/crypto/aes.h b/include/crypto/aes.h | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/include/crypto/aes.h | ||
31 | +++ b/include/crypto/aes.h | ||
32 | @@ -XXX,XX +XXX,XX @@ void AES_decrypt(const unsigned char *in, unsigned char *out, | ||
33 | extern const uint8_t AES_sbox[256]; | ||
34 | extern const uint8_t AES_isbox[256]; | ||
35 | |||
36 | +/* | ||
37 | +AES_Te0[x] = S [x].[02, 01, 01, 03]; | ||
38 | +AES_Td0[x] = Si[x].[0e, 09, 0d, 0b]; | ||
39 | +*/ | ||
40 | + | ||
41 | +extern const uint32_t AES_Te0[256], AES_Td0[256]; | ||
42 | + | ||
43 | #endif | ||
44 | diff --git a/crypto/aes.c b/crypto/aes.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/crypto/aes.c | ||
47 | +++ b/crypto/aes.c | ||
48 | @@ -XXX,XX +XXX,XX @@ AES_Td3[x] = Si[x].[09, 0d, 0b, 0e]; | ||
49 | AES_Td4[x] = Si[x].[01, 01, 01, 01]; | ||
50 | */ | ||
51 | |||
52 | -static const uint32_t AES_Te0[256] = { | ||
53 | +const uint32_t AES_Te0[256] = { | ||
54 | 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, | ||
55 | 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, | ||
56 | 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, | ||
57 | @@ -XXX,XX +XXX,XX @@ static const uint32_t AES_Te4[256] = { | ||
58 | 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U, | ||
59 | }; | ||
60 | |||
61 | -static const uint32_t AES_Td0[256] = { | ||
62 | +const uint32_t AES_Td0[256] = { | ||
63 | 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, | ||
64 | 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, | ||
65 | 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, | ||
66 | diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/riscv/crypto_helper.c | ||
69 | +++ b/target/riscv/crypto_helper.c | ||
70 | @@ -XXX,XX +XXX,XX @@ | ||
71 | #include "crypto/aes-round.h" | ||
72 | #include "crypto/sm4.h" | ||
73 | |||
74 | -#define AES_XTIME(a) \ | ||
75 | - ((a << 1) ^ ((a & 0x80) ? 0x1b : 0)) | ||
76 | - | ||
77 | -#define AES_GFMUL(a, b) (( \ | ||
78 | - (((b) & 0x1) ? (a) : 0) ^ \ | ||
79 | - (((b) & 0x2) ? AES_XTIME(a) : 0) ^ \ | ||
80 | - (((b) & 0x4) ? AES_XTIME(AES_XTIME(a)) : 0) ^ \ | ||
81 | - (((b) & 0x8) ? AES_XTIME(AES_XTIME(AES_XTIME(a))) : 0)) & 0xFF) | ||
82 | - | ||
83 | -static inline uint32_t aes_mixcolumn_byte(uint8_t x, bool fwd) | ||
84 | -{ | ||
85 | - uint32_t u; | ||
86 | - | ||
87 | - if (fwd) { | ||
88 | - u = (AES_GFMUL(x, 3) << 24) | (x << 16) | (x << 8) | | ||
89 | - (AES_GFMUL(x, 2) << 0); | ||
90 | - } else { | ||
91 | - u = (AES_GFMUL(x, 0xb) << 24) | (AES_GFMUL(x, 0xd) << 16) | | ||
92 | - (AES_GFMUL(x, 0x9) << 8) | (AES_GFMUL(x, 0xe) << 0); | ||
93 | - } | ||
94 | - return u; | ||
95 | -} | ||
96 | - | ||
97 | #define sext32_xlen(x) (target_ulong)(int32_t)(x) | ||
98 | |||
99 | static inline target_ulong aes32_operation(target_ulong shamt, | ||
100 | @@ -XXX,XX +XXX,XX @@ static inline target_ulong aes32_operation(target_ulong shamt, | ||
101 | bool enc, bool mix) | ||
102 | { | ||
103 | uint8_t si = rs2 >> shamt; | ||
104 | - uint8_t so; | ||
105 | uint32_t mixed; | ||
106 | target_ulong res; | ||
107 | |||
108 | if (enc) { | ||
109 | - so = AES_sbox[si]; | ||
110 | if (mix) { | ||
111 | - mixed = aes_mixcolumn_byte(so, true); | ||
112 | + mixed = be32_to_cpu(AES_Te0[si]); | ||
113 | } else { | ||
114 | - mixed = so; | ||
115 | + mixed = AES_sbox[si]; | ||
116 | } | ||
117 | } else { | ||
118 | - so = AES_isbox[si]; | ||
119 | if (mix) { | ||
120 | - mixed = aes_mixcolumn_byte(so, false); | ||
121 | + mixed = be32_to_cpu(AES_Td0[si]); | ||
122 | } else { | ||
123 | - mixed = so; | ||
124 | + mixed = AES_isbox[si]; | ||
125 | } | ||
126 | } | ||
127 | mixed = rol32(mixed, shamt); | ||
128 | -- | ||
129 | 2.41.0 | ||
130 | |||
131 | diff view generated by jsdifflib |
1 | From: eopXD <eop.chen@sifive.com> | 1 | From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | According to v-spec, tail agnostic behavior can be either kept as | 3 | Take some functions/macros out of `vector_helper` and put them in a new |
4 | undisturbed or set elements' bits to all 1s. To distinguish the | 4 | module called `vector_internals`. This ensures they can be used by both |
5 | difference of tail policies, QEMU should be able to simulate the tail | 5 | vector and vector-crypto helpers (latter implemented in proceeding |
6 | agnostic behavior as "set tail elements' bits to all 1s". | 6 | commits). |
7 | 7 | ||
8 | There are multiple possibility for agnostic elements according to | 8 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> |
9 | v-spec. The main intent of this patch-set tries to add option that | ||
10 | can distinguish between tail policies. Setting agnostic elements to | ||
11 | all 1s allows QEMU to express this. | ||
12 | |||
13 | This is the first commit regarding the optional tail agnostic | ||
14 | behavior. Follow-up commits will add this optional behavior | ||
15 | for all rvv instructions. | ||
16 | |||
17 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
18 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
19 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 9 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> |
10 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
20 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Acked-by: Alistair Francis <alistair.francis@wdc.com> |
21 | Message-Id: <165449614532.19704.7000832880482980398-5@git.sr.ht> | 12 | Message-ID: <20230711165917.2629866-2-max.chou@sifive.com> |
22 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 13 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
23 | --- | 14 | --- |
24 | target/riscv/cpu.h | 2 + | 15 | target/riscv/vector_internals.h | 182 +++++++++++++++++++++++++++++ |
25 | target/riscv/internals.h | 5 +- | 16 | target/riscv/vector_helper.c | 201 +------------------------------- |
26 | target/riscv/cpu_helper.c | 2 + | 17 | target/riscv/vector_internals.c | 81 +++++++++++++ |
27 | target/riscv/translate.c | 2 + | 18 | target/riscv/meson.build | 1 + |
28 | target/riscv/vector_helper.c | 296 +++++++++++++----------- | 19 | 4 files changed, 265 insertions(+), 200 deletions(-) |
29 | target/riscv/insn_trans/trans_rvv.c.inc | 3 +- | 20 | create mode 100644 target/riscv/vector_internals.h |
30 | 6 files changed, 178 insertions(+), 132 deletions(-) | 21 | create mode 100644 target/riscv/vector_internals.c |
31 | 22 | ||
32 | diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h | 23 | diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h |
33 | index XXXXXXX..XXXXXXX 100644 | 24 | new file mode 100644 |
34 | --- a/target/riscv/cpu.h | 25 | index XXXXXXX..XXXXXXX |
35 | +++ b/target/riscv/cpu.h | 26 | --- /dev/null |
36 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | 27 | +++ b/target/riscv/vector_internals.h |
37 | bool ext_zve32f; | ||
38 | bool ext_zve64f; | ||
39 | bool ext_zmmul; | ||
40 | + bool rvv_ta_all_1s; | ||
41 | |||
42 | uint32_t mvendorid; | ||
43 | uint64_t marchid; | ||
44 | @@ -XXX,XX +XXX,XX @@ FIELD(TB_FLAGS, XL, 20, 2) | ||
45 | /* If PointerMasking should be applied */ | ||
46 | FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1) | ||
47 | FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1) | ||
48 | +FIELD(TB_FLAGS, VTA, 24, 1) | ||
49 | |||
50 | #ifdef TARGET_RISCV32 | ||
51 | #define riscv_cpu_mxl(env) ((void)(env), MXL_RV32) | ||
52 | diff --git a/target/riscv/internals.h b/target/riscv/internals.h | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/target/riscv/internals.h | ||
55 | +++ b/target/riscv/internals.h | ||
56 | @@ -XXX,XX +XXX,XX @@ | 28 | @@ -XXX,XX +XXX,XX @@ |
57 | /* share data between vector helpers and decode code */ | 29 | +/* |
58 | FIELD(VDATA, VM, 0, 1) | 30 | + * RISC-V Vector Extension Internals |
59 | FIELD(VDATA, LMUL, 1, 3) | 31 | + * |
60 | -FIELD(VDATA, NF, 4, 4) | 32 | + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. |
61 | -FIELD(VDATA, WD, 4, 1) | 33 | + * |
62 | +FIELD(VDATA, VTA, 4, 1) | 34 | + * This program is free software; you can redistribute it and/or modify it |
63 | +FIELD(VDATA, NF, 5, 4) | 35 | + * under the terms and conditions of the GNU General Public License, |
64 | +FIELD(VDATA, WD, 5, 1) | 36 | + * version 2 or later, as published by the Free Software Foundation. |
65 | 37 | + * | |
66 | /* float point classify helpers */ | 38 | + * This program is distributed in the hope it will be useful, but WITHOUT |
67 | target_ulong fclass_h(uint64_t frs1); | 39 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
68 | diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c | 40 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
69 | index XXXXXXX..XXXXXXX 100644 | 41 | + * more details. |
70 | --- a/target/riscv/cpu_helper.c | 42 | + * |
71 | +++ b/target/riscv/cpu_helper.c | 43 | + * You should have received a copy of the GNU General Public License along with |
72 | @@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc, | 44 | + * this program. If not, see <http://www.gnu.org/licenses/>. |
73 | flags = FIELD_DP32(flags, TB_FLAGS, LMUL, | 45 | + */ |
74 | FIELD_EX64(env->vtype, VTYPE, VLMUL)); | 46 | + |
75 | flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax); | 47 | +#ifndef TARGET_RISCV_VECTOR_INTERNALS_H |
76 | + flags = FIELD_DP32(flags, TB_FLAGS, VTA, | 48 | +#define TARGET_RISCV_VECTOR_INTERNALS_H |
77 | + FIELD_EX64(env->vtype, VTYPE, VTA)); | 49 | + |
78 | } else { | 50 | +#include "qemu/osdep.h" |
79 | flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1); | 51 | +#include "qemu/bitops.h" |
80 | } | 52 | +#include "cpu.h" |
81 | diff --git a/target/riscv/translate.c b/target/riscv/translate.c | 53 | +#include "tcg/tcg-gvec-desc.h" |
82 | index XXXXXXX..XXXXXXX 100644 | 54 | +#include "internals.h" |
83 | --- a/target/riscv/translate.c | 55 | + |
84 | +++ b/target/riscv/translate.c | 56 | +static inline uint32_t vext_nf(uint32_t desc) |
85 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | 57 | +{ |
86 | */ | 58 | + return FIELD_EX32(simd_data(desc), VDATA, NF); |
87 | int8_t lmul; | 59 | +} |
88 | uint8_t sew; | 60 | + |
89 | + uint8_t vta; | 61 | +/* |
90 | target_ulong vstart; | 62 | + * Note that vector data is stored in host-endian 64-bit chunks, |
91 | bool vl_eq_vlmax; | 63 | + * so addressing units smaller than that needs a host-endian fixup. |
92 | uint8_t ntemp; | 64 | + */ |
93 | @@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) | 65 | +#if HOST_BIG_ENDIAN |
94 | ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL); | 66 | +#define H1(x) ((x) ^ 7) |
95 | ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); | 67 | +#define H1_2(x) ((x) ^ 6) |
96 | ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3); | 68 | +#define H1_4(x) ((x) ^ 4) |
97 | + ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s; | 69 | +#define H2(x) ((x) ^ 3) |
98 | ctx->vstart = env->vstart; | 70 | +#define H4(x) ((x) ^ 1) |
99 | ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); | 71 | +#define H8(x) ((x)) |
100 | ctx->misa_mxl_max = env->misa_mxl_max; | 72 | +#else |
73 | +#define H1(x) (x) | ||
74 | +#define H1_2(x) (x) | ||
75 | +#define H1_4(x) (x) | ||
76 | +#define H2(x) (x) | ||
77 | +#define H4(x) (x) | ||
78 | +#define H8(x) (x) | ||
79 | +#endif | ||
80 | + | ||
81 | +/* | ||
82 | + * Encode LMUL to lmul as following: | ||
83 | + * LMUL vlmul lmul | ||
84 | + * 1 000 0 | ||
85 | + * 2 001 1 | ||
86 | + * 4 010 2 | ||
87 | + * 8 011 3 | ||
88 | + * - 100 - | ||
89 | + * 1/8 101 -3 | ||
90 | + * 1/4 110 -2 | ||
91 | + * 1/2 111 -1 | ||
92 | + */ | ||
93 | +static inline int32_t vext_lmul(uint32_t desc) | ||
94 | +{ | ||
95 | + return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); | ||
96 | +} | ||
97 | + | ||
98 | +static inline uint32_t vext_vm(uint32_t desc) | ||
99 | +{ | ||
100 | + return FIELD_EX32(simd_data(desc), VDATA, VM); | ||
101 | +} | ||
102 | + | ||
103 | +static inline uint32_t vext_vma(uint32_t desc) | ||
104 | +{ | ||
105 | + return FIELD_EX32(simd_data(desc), VDATA, VMA); | ||
106 | +} | ||
107 | + | ||
108 | +static inline uint32_t vext_vta(uint32_t desc) | ||
109 | +{ | ||
110 | + return FIELD_EX32(simd_data(desc), VDATA, VTA); | ||
111 | +} | ||
112 | + | ||
113 | +static inline uint32_t vext_vta_all_1s(uint32_t desc) | ||
114 | +{ | ||
115 | + return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); | ||
116 | +} | ||
117 | + | ||
118 | +/* | ||
119 | + * Earlier designs (pre-0.9) had a varying number of bits | ||
120 | + * per mask value (MLEN). In the 0.9 design, MLEN=1. | ||
121 | + * (Section 4.5) | ||
122 | + */ | ||
123 | +static inline int vext_elem_mask(void *v0, int index) | ||
124 | +{ | ||
125 | + int idx = index / 64; | ||
126 | + int pos = index % 64; | ||
127 | + return (((uint64_t *)v0)[idx] >> pos) & 1; | ||
128 | +} | ||
129 | + | ||
130 | +/* | ||
131 | + * Get number of total elements, including prestart, body and tail elements. | ||
132 | + * Note that when LMUL < 1, the tail includes the elements past VLMAX that | ||
133 | + * are held in the same vector register. | ||
134 | + */ | ||
135 | +static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, | ||
136 | + uint32_t esz) | ||
137 | +{ | ||
138 | + uint32_t vlenb = simd_maxsz(desc); | ||
139 | + uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); | ||
140 | + int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : | ||
141 | + ctzl(esz) - ctzl(sew) + vext_lmul(desc); | ||
142 | + return (vlenb << emul) / esz; | ||
143 | +} | ||
144 | + | ||
145 | +/* set agnostic elements to 1s */ | ||
146 | +void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, | ||
147 | + uint32_t tot); | ||
148 | + | ||
149 | +/* expand macro args before macro */ | ||
150 | +#define RVVCALL(macro, ...) macro(__VA_ARGS__) | ||
151 | + | ||
152 | +/* (TD, T1, T2, TX1, TX2) */ | ||
153 | +#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t | ||
154 | +#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t | ||
155 | +#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t | ||
156 | +#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t | ||
157 | + | ||
158 | +/* operation of two vector elements */ | ||
159 | +typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); | ||
160 | + | ||
161 | +#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
162 | +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ | ||
163 | +{ \ | ||
164 | + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ | ||
165 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
166 | + *((TD *)vd + HD(i)) = OP(s2, s1); \ | ||
167 | +} | ||
168 | + | ||
169 | +void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, | ||
170 | + CPURISCVState *env, uint32_t desc, | ||
171 | + opivv2_fn *fn, uint32_t esz); | ||
172 | + | ||
173 | +/* generate the helpers for OPIVV */ | ||
174 | +#define GEN_VEXT_VV(NAME, ESZ) \ | ||
175 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
176 | + void *vs2, CPURISCVState *env, \ | ||
177 | + uint32_t desc) \ | ||
178 | +{ \ | ||
179 | + do_vext_vv(vd, v0, vs1, vs2, env, desc, \ | ||
180 | + do_##NAME, ESZ); \ | ||
181 | +} | ||
182 | + | ||
183 | +typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); | ||
184 | + | ||
185 | +/* | ||
186 | + * (T1)s1 gives the real operator type. | ||
187 | + * (TX1)(T1)s1 expands the operator type of widen or narrow operations. | ||
188 | + */ | ||
189 | +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
190 | +static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ | ||
191 | +{ \ | ||
192 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
193 | + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ | ||
194 | +} | ||
195 | + | ||
196 | +void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, | ||
197 | + CPURISCVState *env, uint32_t desc, | ||
198 | + opivx2_fn fn, uint32_t esz); | ||
199 | + | ||
200 | +/* generate the helpers for OPIVX */ | ||
201 | +#define GEN_VEXT_VX(NAME, ESZ) \ | ||
202 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
203 | + void *vs2, CPURISCVState *env, \ | ||
204 | + uint32_t desc) \ | ||
205 | +{ \ | ||
206 | + do_vext_vx(vd, v0, s1, vs2, env, desc, \ | ||
207 | + do_##NAME, ESZ); \ | ||
208 | +} | ||
209 | + | ||
210 | +#endif /* TARGET_RISCV_VECTOR_INTERNALS_H */ | ||
101 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 211 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c |
102 | index XXXXXXX..XXXXXXX 100644 | 212 | index XXXXXXX..XXXXXXX 100644 |
103 | --- a/target/riscv/vector_helper.c | 213 | --- a/target/riscv/vector_helper.c |
104 | +++ b/target/riscv/vector_helper.c | 214 | +++ b/target/riscv/vector_helper.c |
105 | @@ -XXX,XX +XXX,XX @@ static inline int32_t vext_lmul(uint32_t desc) | 215 | @@ -XXX,XX +XXX,XX @@ |
106 | return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); | 216 | #include "fpu/softfloat.h" |
217 | #include "tcg/tcg-gvec-desc.h" | ||
218 | #include "internals.h" | ||
219 | +#include "vector_internals.h" | ||
220 | #include <math.h> | ||
221 | |||
222 | target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, | ||
223 | @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, | ||
224 | return vl; | ||
107 | } | 225 | } |
108 | 226 | ||
109 | +static inline uint32_t vext_vta(uint32_t desc) | 227 | -/* |
110 | +{ | 228 | - * Note that vector data is stored in host-endian 64-bit chunks, |
111 | + return FIELD_EX32(simd_data(desc), VDATA, VTA); | 229 | - * so addressing units smaller than that needs a host-endian fixup. |
112 | +} | 230 | - */ |
113 | + | 231 | -#if HOST_BIG_ENDIAN |
232 | -#define H1(x) ((x) ^ 7) | ||
233 | -#define H1_2(x) ((x) ^ 6) | ||
234 | -#define H1_4(x) ((x) ^ 4) | ||
235 | -#define H2(x) ((x) ^ 3) | ||
236 | -#define H4(x) ((x) ^ 1) | ||
237 | -#define H8(x) ((x)) | ||
238 | -#else | ||
239 | -#define H1(x) (x) | ||
240 | -#define H1_2(x) (x) | ||
241 | -#define H1_4(x) (x) | ||
242 | -#define H2(x) (x) | ||
243 | -#define H4(x) (x) | ||
244 | -#define H8(x) (x) | ||
245 | -#endif | ||
246 | - | ||
247 | -static inline uint32_t vext_nf(uint32_t desc) | ||
248 | -{ | ||
249 | - return FIELD_EX32(simd_data(desc), VDATA, NF); | ||
250 | -} | ||
251 | - | ||
252 | -static inline uint32_t vext_vm(uint32_t desc) | ||
253 | -{ | ||
254 | - return FIELD_EX32(simd_data(desc), VDATA, VM); | ||
255 | -} | ||
256 | - | ||
257 | -/* | ||
258 | - * Encode LMUL to lmul as following: | ||
259 | - * LMUL vlmul lmul | ||
260 | - * 1 000 0 | ||
261 | - * 2 001 1 | ||
262 | - * 4 010 2 | ||
263 | - * 8 011 3 | ||
264 | - * - 100 - | ||
265 | - * 1/8 101 -3 | ||
266 | - * 1/4 110 -2 | ||
267 | - * 1/2 111 -1 | ||
268 | - */ | ||
269 | -static inline int32_t vext_lmul(uint32_t desc) | ||
270 | -{ | ||
271 | - return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); | ||
272 | -} | ||
273 | - | ||
274 | -static inline uint32_t vext_vta(uint32_t desc) | ||
275 | -{ | ||
276 | - return FIELD_EX32(simd_data(desc), VDATA, VTA); | ||
277 | -} | ||
278 | - | ||
279 | -static inline uint32_t vext_vma(uint32_t desc) | ||
280 | -{ | ||
281 | - return FIELD_EX32(simd_data(desc), VDATA, VMA); | ||
282 | -} | ||
283 | - | ||
284 | -static inline uint32_t vext_vta_all_1s(uint32_t desc) | ||
285 | -{ | ||
286 | - return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); | ||
287 | -} | ||
288 | - | ||
114 | /* | 289 | /* |
115 | * Get the maximum number of elements can be operated. | 290 | * Get the maximum number of elements can be operated. |
116 | * | 291 | * |
117 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) | 292 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) |
118 | return scale < 0 ? vlenb >> -scale : vlenb << scale; | 293 | return scale < 0 ? vlenb >> -scale : vlenb << scale; |
119 | } | 294 | } |
120 | 295 | ||
121 | +/* | 296 | -/* |
122 | + * Get number of total elements, including prestart, body and tail elements. | 297 | - * Get number of total elements, including prestart, body and tail elements. |
123 | + * Note that when LMUL < 1, the tail includes the elements past VLMAX that | 298 | - * Note that when LMUL < 1, the tail includes the elements past VLMAX that |
124 | + * are held in the same vector register. | 299 | - * are held in the same vector register. |
125 | + */ | 300 | - */ |
126 | +static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, | 301 | -static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, |
127 | + uint32_t esz) | 302 | - uint32_t esz) |
128 | +{ | 303 | -{ |
129 | + uint32_t vlenb = simd_maxsz(desc); | 304 | - uint32_t vlenb = simd_maxsz(desc); |
130 | + uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); | 305 | - uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); |
131 | + int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : | 306 | - int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : |
132 | + ctzl(esz) - ctzl(sew) + vext_lmul(desc); | 307 | - ctzl(esz) - ctzl(sew) + vext_lmul(desc); |
133 | + return (vlenb << emul) / esz; | 308 | - return (vlenb << emul) / esz; |
134 | +} | 309 | -} |
135 | + | 310 | - |
136 | static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) | 311 | static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) |
137 | { | 312 | { |
138 | return (addr & env->cur_pmmask) | env->cur_pmbase; | 313 | return (addr & ~env->cur_pmmask) | env->cur_pmbase; |
139 | @@ -XXX,XX +XXX,XX @@ static void probe_pages(CPURISCVState *env, target_ulong addr, | 314 | @@ -XXX,XX +XXX,XX @@ static void probe_pages(CPURISCVState *env, target_ulong addr, |
140 | } | 315 | } |
141 | } | 316 | } |
142 | 317 | ||
318 | -/* set agnostic elements to 1s */ | ||
319 | -static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, | ||
320 | - uint32_t tot) | ||
321 | -{ | ||
322 | - if (is_agnostic == 0) { | ||
323 | - /* policy undisturbed */ | ||
324 | - return; | ||
325 | - } | ||
326 | - if (tot - cnt == 0) { | ||
327 | - return; | ||
328 | - } | ||
329 | - memset(base + cnt, -1, tot - cnt); | ||
330 | -} | ||
331 | - | ||
332 | static inline void vext_set_elem_mask(void *v0, int index, | ||
333 | uint8_t value) | ||
334 | { | ||
335 | @@ -XXX,XX +XXX,XX @@ static inline void vext_set_elem_mask(void *v0, int index, | ||
336 | ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); | ||
337 | } | ||
338 | |||
339 | -/* | ||
340 | - * Earlier designs (pre-0.9) had a varying number of bits | ||
341 | - * per mask value (MLEN). In the 0.9 design, MLEN=1. | ||
342 | - * (Section 4.5) | ||
343 | - */ | ||
344 | -static inline int vext_elem_mask(void *v0, int index) | ||
345 | -{ | ||
346 | - int idx = index / 64; | ||
347 | - int pos = index % 64; | ||
348 | - return (((uint64_t *)v0)[idx] >> pos) & 1; | ||
349 | -} | ||
350 | - | ||
351 | /* elements operations for load and store */ | ||
352 | typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr, | ||
353 | uint32_t idx, void *vd, uintptr_t retaddr); | ||
354 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) | ||
355 | * Vector Integer Arithmetic Instructions | ||
356 | */ | ||
357 | |||
358 | -/* expand macro args before macro */ | ||
359 | -#define RVVCALL(macro, ...) macro(__VA_ARGS__) | ||
360 | - | ||
361 | /* (TD, T1, T2, TX1, TX2) */ | ||
362 | #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t | ||
363 | #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t | ||
364 | #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t | ||
365 | #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t | ||
366 | -#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t | ||
367 | -#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t | ||
368 | -#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t | ||
369 | -#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t | ||
370 | #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t | ||
371 | #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t | ||
372 | #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t | ||
373 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) | ||
374 | #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t | ||
375 | #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t | ||
376 | |||
377 | -/* operation of two vector elements */ | ||
378 | -typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); | ||
379 | - | ||
380 | -#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
381 | -static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ | ||
382 | -{ \ | ||
383 | - TX1 s1 = *((T1 *)vs1 + HS1(i)); \ | ||
384 | - TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
385 | - *((TD *)vd + HD(i)) = OP(s2, s1); \ | ||
386 | -} | ||
387 | #define DO_SUB(N, M) (N - M) | ||
388 | #define DO_RSUB(N, M) (M - N) | ||
389 | |||
390 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) | ||
391 | RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) | ||
392 | RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) | ||
393 | |||
394 | -static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, | ||
395 | - CPURISCVState *env, uint32_t desc, | ||
396 | - opivv2_fn *fn, uint32_t esz) | ||
397 | -{ | ||
398 | - uint32_t vm = vext_vm(desc); | ||
399 | - uint32_t vl = env->vl; | ||
400 | - uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
401 | - uint32_t vta = vext_vta(desc); | ||
402 | - uint32_t vma = vext_vma(desc); | ||
403 | - uint32_t i; | ||
404 | - | ||
405 | - for (i = env->vstart; i < vl; i++) { | ||
406 | - if (!vm && !vext_elem_mask(v0, i)) { | ||
407 | - /* set masked-off elements to 1s */ | ||
408 | - vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); | ||
409 | - continue; | ||
410 | - } | ||
411 | - fn(vd, vs1, vs2, i); | ||
412 | - } | ||
413 | - env->vstart = 0; | ||
414 | - /* set tail elements to 1s */ | ||
415 | - vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); | ||
416 | -} | ||
417 | - | ||
418 | -/* generate the helpers for OPIVV */ | ||
419 | -#define GEN_VEXT_VV(NAME, ESZ) \ | ||
420 | -void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
421 | - void *vs2, CPURISCVState *env, \ | ||
422 | - uint32_t desc) \ | ||
423 | -{ \ | ||
424 | - do_vext_vv(vd, v0, vs1, vs2, env, desc, \ | ||
425 | - do_##NAME, ESZ); \ | ||
426 | -} | ||
427 | - | ||
428 | GEN_VEXT_VV(vadd_vv_b, 1) | ||
429 | GEN_VEXT_VV(vadd_vv_h, 2) | ||
430 | GEN_VEXT_VV(vadd_vv_w, 4) | ||
431 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VV(vsub_vv_h, 2) | ||
432 | GEN_VEXT_VV(vsub_vv_w, 4) | ||
433 | GEN_VEXT_VV(vsub_vv_d, 8) | ||
434 | |||
435 | -typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); | ||
436 | - | ||
437 | -/* | ||
438 | - * (T1)s1 gives the real operator type. | ||
439 | - * (TX1)(T1)s1 expands the operator type of widen or narrow operations. | ||
440 | - */ | ||
441 | -#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
442 | -static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ | ||
443 | -{ \ | ||
444 | - TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
445 | - *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ | ||
446 | -} | ||
447 | |||
448 | RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) | ||
449 | RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) | ||
450 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) | ||
451 | RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) | ||
452 | RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) | ||
453 | |||
454 | -static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, | ||
455 | - CPURISCVState *env, uint32_t desc, | ||
456 | - opivx2_fn fn, uint32_t esz) | ||
457 | -{ | ||
458 | - uint32_t vm = vext_vm(desc); | ||
459 | - uint32_t vl = env->vl; | ||
460 | - uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
461 | - uint32_t vta = vext_vta(desc); | ||
462 | - uint32_t vma = vext_vma(desc); | ||
463 | - uint32_t i; | ||
464 | - | ||
465 | - for (i = env->vstart; i < vl; i++) { | ||
466 | - if (!vm && !vext_elem_mask(v0, i)) { | ||
467 | - /* set masked-off elements to 1s */ | ||
468 | - vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); | ||
469 | - continue; | ||
470 | - } | ||
471 | - fn(vd, s1, vs2, i); | ||
472 | - } | ||
473 | - env->vstart = 0; | ||
474 | - /* set tail elements to 1s */ | ||
475 | - vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); | ||
476 | -} | ||
477 | - | ||
478 | -/* generate the helpers for OPIVX */ | ||
479 | -#define GEN_VEXT_VX(NAME, ESZ) \ | ||
480 | -void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
481 | - void *vs2, CPURISCVState *env, \ | ||
482 | - uint32_t desc) \ | ||
483 | -{ \ | ||
484 | - do_vext_vx(vd, v0, s1, vs2, env, desc, \ | ||
485 | - do_##NAME, ESZ); \ | ||
486 | -} | ||
487 | - | ||
488 | GEN_VEXT_VX(vadd_vx_b, 1) | ||
489 | GEN_VEXT_VX(vadd_vx_h, 2) | ||
490 | GEN_VEXT_VX(vadd_vx_w, 4) | ||
491 | diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c | ||
492 | new file mode 100644 | ||
493 | index XXXXXXX..XXXXXXX | ||
494 | --- /dev/null | ||
495 | +++ b/target/riscv/vector_internals.c | ||
496 | @@ -XXX,XX +XXX,XX @@ | ||
497 | +/* | ||
498 | + * RISC-V Vector Extension Internals | ||
499 | + * | ||
500 | + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. | ||
501 | + * | ||
502 | + * This program is free software; you can redistribute it and/or modify it | ||
503 | + * under the terms and conditions of the GNU General Public License, | ||
504 | + * version 2 or later, as published by the Free Software Foundation. | ||
505 | + * | ||
506 | + * This program is distributed in the hope it will be useful, but WITHOUT | ||
507 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
508 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
509 | + * more details. | ||
510 | + * | ||
511 | + * You should have received a copy of the GNU General Public License along with | ||
512 | + * this program. If not, see <http://www.gnu.org/licenses/>. | ||
513 | + */ | ||
514 | + | ||
515 | +#include "vector_internals.h" | ||
516 | + | ||
143 | +/* set agnostic elements to 1s */ | 517 | +/* set agnostic elements to 1s */ |
144 | +static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, | 518 | +void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, |
145 | + uint32_t tot) | 519 | + uint32_t tot) |
146 | +{ | 520 | +{ |
147 | + if (is_agnostic == 0) { | 521 | + if (is_agnostic == 0) { |
148 | + /* policy undisturbed */ | 522 | + /* policy undisturbed */ |
149 | + return; | 523 | + return; |
150 | + } | 524 | + } |
151 | + if (tot - cnt == 0) { | 525 | + if (tot - cnt == 0) { |
152 | + return ; | 526 | + return ; |
153 | + } | 527 | + } |
154 | + memset(base + cnt, -1, tot - cnt); | 528 | + memset(base + cnt, -1, tot - cnt); |
155 | +} | 529 | +} |
156 | + | 530 | + |
157 | static inline void vext_set_elem_mask(void *v0, int index, | 531 | +void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, |
158 | uint8_t value) | 532 | + CPURISCVState *env, uint32_t desc, |
159 | { | 533 | + opivv2_fn *fn, uint32_t esz) |
160 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) | 534 | +{ |
161 | 535 | + uint32_t vm = vext_vm(desc); | |
162 | static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, | 536 | + uint32_t vl = env->vl; |
163 | CPURISCVState *env, uint32_t desc, | ||
164 | - opivv2_fn *fn) | ||
165 | + opivv2_fn *fn, uint32_t esz) | ||
166 | { | ||
167 | uint32_t vm = vext_vm(desc); | ||
168 | uint32_t vl = env->vl; | ||
169 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | 537 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); |
170 | + uint32_t vta = vext_vta(desc); | 538 | + uint32_t vta = vext_vta(desc); |
171 | uint32_t i; | 539 | + uint32_t vma = vext_vma(desc); |
172 | 540 | + uint32_t i; | |
173 | for (i = env->vstart; i < vl; i++) { | 541 | + |
174 | @@ -XXX,XX +XXX,XX @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, | 542 | + for (i = env->vstart; i < vl; i++) { |
175 | fn(vd, vs1, vs2, i); | 543 | + if (!vm && !vext_elem_mask(v0, i)) { |
176 | } | 544 | + /* set masked-off elements to 1s */ |
177 | env->vstart = 0; | 545 | + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); |
546 | + continue; | ||
547 | + } | ||
548 | + fn(vd, vs1, vs2, i); | ||
549 | + } | ||
550 | + env->vstart = 0; | ||
178 | + /* set tail elements to 1s */ | 551 | + /* set tail elements to 1s */ |
179 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); | 552 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); |
180 | } | 553 | +} |
181 | 554 | + | |
182 | /* generate the helpers for OPIVV */ | 555 | +void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, |
183 | -#define GEN_VEXT_VV(NAME) \ | 556 | + CPURISCVState *env, uint32_t desc, |
184 | +#define GEN_VEXT_VV(NAME, ESZ) \ | 557 | + opivx2_fn fn, uint32_t esz) |
185 | void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | 558 | +{ |
186 | void *vs2, CPURISCVState *env, \ | 559 | + uint32_t vm = vext_vm(desc); |
187 | uint32_t desc) \ | 560 | + uint32_t vl = env->vl; |
188 | { \ | 561 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); |
189 | do_vext_vv(vd, v0, vs1, vs2, env, desc, \ | 562 | + uint32_t vta = vext_vta(desc); |
190 | - do_##NAME); \ | 563 | + uint32_t vma = vext_vma(desc); |
191 | + do_##NAME, ESZ); \ | 564 | + uint32_t i; |
192 | } | 565 | + |
193 | 566 | + for (i = env->vstart; i < vl; i++) { | |
194 | -GEN_VEXT_VV(vadd_vv_b) | 567 | + if (!vm && !vext_elem_mask(v0, i)) { |
195 | -GEN_VEXT_VV(vadd_vv_h) | 568 | + /* set masked-off elements to 1s */ |
196 | -GEN_VEXT_VV(vadd_vv_w) | 569 | + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); |
197 | -GEN_VEXT_VV(vadd_vv_d) | 570 | + continue; |
198 | -GEN_VEXT_VV(vsub_vv_b) | 571 | + } |
199 | -GEN_VEXT_VV(vsub_vv_h) | 572 | + fn(vd, s1, vs2, i); |
200 | -GEN_VEXT_VV(vsub_vv_w) | 573 | + } |
201 | -GEN_VEXT_VV(vsub_vv_d) | 574 | + env->vstart = 0; |
202 | +GEN_VEXT_VV(vadd_vv_b, 1) | 575 | + /* set tail elements to 1s */ |
203 | +GEN_VEXT_VV(vadd_vv_h, 2) | 576 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); |
204 | +GEN_VEXT_VV(vadd_vv_w, 4) | 577 | +} |
205 | +GEN_VEXT_VV(vadd_vv_d, 8) | 578 | diff --git a/target/riscv/meson.build b/target/riscv/meson.build |
206 | +GEN_VEXT_VV(vsub_vv_b, 1) | ||
207 | +GEN_VEXT_VV(vsub_vv_h, 2) | ||
208 | +GEN_VEXT_VV(vsub_vv_w, 4) | ||
209 | +GEN_VEXT_VV(vsub_vv_d, 8) | ||
210 | |||
211 | typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); | ||
212 | |||
213 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) | ||
214 | RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) | ||
215 | RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) | ||
216 | RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) | ||
217 | -GEN_VEXT_VV(vwaddu_vv_b) | ||
218 | -GEN_VEXT_VV(vwaddu_vv_h) | ||
219 | -GEN_VEXT_VV(vwaddu_vv_w) | ||
220 | -GEN_VEXT_VV(vwsubu_vv_b) | ||
221 | -GEN_VEXT_VV(vwsubu_vv_h) | ||
222 | -GEN_VEXT_VV(vwsubu_vv_w) | ||
223 | -GEN_VEXT_VV(vwadd_vv_b) | ||
224 | -GEN_VEXT_VV(vwadd_vv_h) | ||
225 | -GEN_VEXT_VV(vwadd_vv_w) | ||
226 | -GEN_VEXT_VV(vwsub_vv_b) | ||
227 | -GEN_VEXT_VV(vwsub_vv_h) | ||
228 | -GEN_VEXT_VV(vwsub_vv_w) | ||
229 | -GEN_VEXT_VV(vwaddu_wv_b) | ||
230 | -GEN_VEXT_VV(vwaddu_wv_h) | ||
231 | -GEN_VEXT_VV(vwaddu_wv_w) | ||
232 | -GEN_VEXT_VV(vwsubu_wv_b) | ||
233 | -GEN_VEXT_VV(vwsubu_wv_h) | ||
234 | -GEN_VEXT_VV(vwsubu_wv_w) | ||
235 | -GEN_VEXT_VV(vwadd_wv_b) | ||
236 | -GEN_VEXT_VV(vwadd_wv_h) | ||
237 | -GEN_VEXT_VV(vwadd_wv_w) | ||
238 | -GEN_VEXT_VV(vwsub_wv_b) | ||
239 | -GEN_VEXT_VV(vwsub_wv_h) | ||
240 | -GEN_VEXT_VV(vwsub_wv_w) | ||
241 | +GEN_VEXT_VV(vwaddu_vv_b, 2) | ||
242 | +GEN_VEXT_VV(vwaddu_vv_h, 4) | ||
243 | +GEN_VEXT_VV(vwaddu_vv_w, 8) | ||
244 | +GEN_VEXT_VV(vwsubu_vv_b, 2) | ||
245 | +GEN_VEXT_VV(vwsubu_vv_h, 4) | ||
246 | +GEN_VEXT_VV(vwsubu_vv_w, 8) | ||
247 | +GEN_VEXT_VV(vwadd_vv_b, 2) | ||
248 | +GEN_VEXT_VV(vwadd_vv_h, 4) | ||
249 | +GEN_VEXT_VV(vwadd_vv_w, 8) | ||
250 | +GEN_VEXT_VV(vwsub_vv_b, 2) | ||
251 | +GEN_VEXT_VV(vwsub_vv_h, 4) | ||
252 | +GEN_VEXT_VV(vwsub_vv_w, 8) | ||
253 | +GEN_VEXT_VV(vwaddu_wv_b, 2) | ||
254 | +GEN_VEXT_VV(vwaddu_wv_h, 4) | ||
255 | +GEN_VEXT_VV(vwaddu_wv_w, 8) | ||
256 | +GEN_VEXT_VV(vwsubu_wv_b, 2) | ||
257 | +GEN_VEXT_VV(vwsubu_wv_h, 4) | ||
258 | +GEN_VEXT_VV(vwsubu_wv_w, 8) | ||
259 | +GEN_VEXT_VV(vwadd_wv_b, 2) | ||
260 | +GEN_VEXT_VV(vwadd_wv_h, 4) | ||
261 | +GEN_VEXT_VV(vwadd_wv_w, 8) | ||
262 | +GEN_VEXT_VV(vwsub_wv_b, 2) | ||
263 | +GEN_VEXT_VV(vwsub_wv_h, 4) | ||
264 | +GEN_VEXT_VV(vwsub_wv_w, 8) | ||
265 | |||
266 | RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) | ||
267 | RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) | ||
268 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) | ||
269 | RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) | ||
270 | RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) | ||
271 | RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) | ||
272 | -GEN_VEXT_VV(vand_vv_b) | ||
273 | -GEN_VEXT_VV(vand_vv_h) | ||
274 | -GEN_VEXT_VV(vand_vv_w) | ||
275 | -GEN_VEXT_VV(vand_vv_d) | ||
276 | -GEN_VEXT_VV(vor_vv_b) | ||
277 | -GEN_VEXT_VV(vor_vv_h) | ||
278 | -GEN_VEXT_VV(vor_vv_w) | ||
279 | -GEN_VEXT_VV(vor_vv_d) | ||
280 | -GEN_VEXT_VV(vxor_vv_b) | ||
281 | -GEN_VEXT_VV(vxor_vv_h) | ||
282 | -GEN_VEXT_VV(vxor_vv_w) | ||
283 | -GEN_VEXT_VV(vxor_vv_d) | ||
284 | +GEN_VEXT_VV(vand_vv_b, 1) | ||
285 | +GEN_VEXT_VV(vand_vv_h, 2) | ||
286 | +GEN_VEXT_VV(vand_vv_w, 4) | ||
287 | +GEN_VEXT_VV(vand_vv_d, 8) | ||
288 | +GEN_VEXT_VV(vor_vv_b, 1) | ||
289 | +GEN_VEXT_VV(vor_vv_h, 2) | ||
290 | +GEN_VEXT_VV(vor_vv_w, 4) | ||
291 | +GEN_VEXT_VV(vor_vv_d, 8) | ||
292 | +GEN_VEXT_VV(vxor_vv_b, 1) | ||
293 | +GEN_VEXT_VV(vxor_vv_h, 2) | ||
294 | +GEN_VEXT_VV(vxor_vv_w, 4) | ||
295 | +GEN_VEXT_VV(vxor_vv_d, 8) | ||
296 | |||
297 | RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) | ||
298 | RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) | ||
299 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) | ||
300 | RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) | ||
301 | RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) | ||
302 | RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) | ||
303 | -GEN_VEXT_VV(vminu_vv_b) | ||
304 | -GEN_VEXT_VV(vminu_vv_h) | ||
305 | -GEN_VEXT_VV(vminu_vv_w) | ||
306 | -GEN_VEXT_VV(vminu_vv_d) | ||
307 | -GEN_VEXT_VV(vmin_vv_b) | ||
308 | -GEN_VEXT_VV(vmin_vv_h) | ||
309 | -GEN_VEXT_VV(vmin_vv_w) | ||
310 | -GEN_VEXT_VV(vmin_vv_d) | ||
311 | -GEN_VEXT_VV(vmaxu_vv_b) | ||
312 | -GEN_VEXT_VV(vmaxu_vv_h) | ||
313 | -GEN_VEXT_VV(vmaxu_vv_w) | ||
314 | -GEN_VEXT_VV(vmaxu_vv_d) | ||
315 | -GEN_VEXT_VV(vmax_vv_b) | ||
316 | -GEN_VEXT_VV(vmax_vv_h) | ||
317 | -GEN_VEXT_VV(vmax_vv_w) | ||
318 | -GEN_VEXT_VV(vmax_vv_d) | ||
319 | +GEN_VEXT_VV(vminu_vv_b, 1) | ||
320 | +GEN_VEXT_VV(vminu_vv_h, 2) | ||
321 | +GEN_VEXT_VV(vminu_vv_w, 4) | ||
322 | +GEN_VEXT_VV(vminu_vv_d, 8) | ||
323 | +GEN_VEXT_VV(vmin_vv_b, 1) | ||
324 | +GEN_VEXT_VV(vmin_vv_h, 2) | ||
325 | +GEN_VEXT_VV(vmin_vv_w, 4) | ||
326 | +GEN_VEXT_VV(vmin_vv_d, 8) | ||
327 | +GEN_VEXT_VV(vmaxu_vv_b, 1) | ||
328 | +GEN_VEXT_VV(vmaxu_vv_h, 2) | ||
329 | +GEN_VEXT_VV(vmaxu_vv_w, 4) | ||
330 | +GEN_VEXT_VV(vmaxu_vv_d, 8) | ||
331 | +GEN_VEXT_VV(vmax_vv_b, 1) | ||
332 | +GEN_VEXT_VV(vmax_vv_h, 2) | ||
333 | +GEN_VEXT_VV(vmax_vv_w, 4) | ||
334 | +GEN_VEXT_VV(vmax_vv_d, 8) | ||
335 | |||
336 | RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) | ||
337 | RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) | ||
338 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) | ||
339 | RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) | ||
340 | RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) | ||
341 | RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) | ||
342 | -GEN_VEXT_VV(vmul_vv_b) | ||
343 | -GEN_VEXT_VV(vmul_vv_h) | ||
344 | -GEN_VEXT_VV(vmul_vv_w) | ||
345 | -GEN_VEXT_VV(vmul_vv_d) | ||
346 | +GEN_VEXT_VV(vmul_vv_b, 1) | ||
347 | +GEN_VEXT_VV(vmul_vv_h, 2) | ||
348 | +GEN_VEXT_VV(vmul_vv_w, 4) | ||
349 | +GEN_VEXT_VV(vmul_vv_d, 8) | ||
350 | |||
351 | static int8_t do_mulh_b(int8_t s2, int8_t s1) | ||
352 | { | ||
353 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) | ||
354 | RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) | ||
355 | RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) | ||
356 | RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) | ||
357 | -GEN_VEXT_VV(vmulh_vv_b) | ||
358 | -GEN_VEXT_VV(vmulh_vv_h) | ||
359 | -GEN_VEXT_VV(vmulh_vv_w) | ||
360 | -GEN_VEXT_VV(vmulh_vv_d) | ||
361 | -GEN_VEXT_VV(vmulhu_vv_b) | ||
362 | -GEN_VEXT_VV(vmulhu_vv_h) | ||
363 | -GEN_VEXT_VV(vmulhu_vv_w) | ||
364 | -GEN_VEXT_VV(vmulhu_vv_d) | ||
365 | -GEN_VEXT_VV(vmulhsu_vv_b) | ||
366 | -GEN_VEXT_VV(vmulhsu_vv_h) | ||
367 | -GEN_VEXT_VV(vmulhsu_vv_w) | ||
368 | -GEN_VEXT_VV(vmulhsu_vv_d) | ||
369 | +GEN_VEXT_VV(vmulh_vv_b, 1) | ||
370 | +GEN_VEXT_VV(vmulh_vv_h, 2) | ||
371 | +GEN_VEXT_VV(vmulh_vv_w, 4) | ||
372 | +GEN_VEXT_VV(vmulh_vv_d, 8) | ||
373 | +GEN_VEXT_VV(vmulhu_vv_b, 1) | ||
374 | +GEN_VEXT_VV(vmulhu_vv_h, 2) | ||
375 | +GEN_VEXT_VV(vmulhu_vv_w, 4) | ||
376 | +GEN_VEXT_VV(vmulhu_vv_d, 8) | ||
377 | +GEN_VEXT_VV(vmulhsu_vv_b, 1) | ||
378 | +GEN_VEXT_VV(vmulhsu_vv_h, 2) | ||
379 | +GEN_VEXT_VV(vmulhsu_vv_w, 4) | ||
380 | +GEN_VEXT_VV(vmulhsu_vv_d, 8) | ||
381 | |||
382 | RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) | ||
383 | RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) | ||
384 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) | ||
385 | RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) | ||
386 | RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) | ||
387 | RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) | ||
388 | -GEN_VEXT_VV(vdivu_vv_b) | ||
389 | -GEN_VEXT_VV(vdivu_vv_h) | ||
390 | -GEN_VEXT_VV(vdivu_vv_w) | ||
391 | -GEN_VEXT_VV(vdivu_vv_d) | ||
392 | -GEN_VEXT_VV(vdiv_vv_b) | ||
393 | -GEN_VEXT_VV(vdiv_vv_h) | ||
394 | -GEN_VEXT_VV(vdiv_vv_w) | ||
395 | -GEN_VEXT_VV(vdiv_vv_d) | ||
396 | -GEN_VEXT_VV(vremu_vv_b) | ||
397 | -GEN_VEXT_VV(vremu_vv_h) | ||
398 | -GEN_VEXT_VV(vremu_vv_w) | ||
399 | -GEN_VEXT_VV(vremu_vv_d) | ||
400 | -GEN_VEXT_VV(vrem_vv_b) | ||
401 | -GEN_VEXT_VV(vrem_vv_h) | ||
402 | -GEN_VEXT_VV(vrem_vv_w) | ||
403 | -GEN_VEXT_VV(vrem_vv_d) | ||
404 | +GEN_VEXT_VV(vdivu_vv_b, 1) | ||
405 | +GEN_VEXT_VV(vdivu_vv_h, 2) | ||
406 | +GEN_VEXT_VV(vdivu_vv_w, 4) | ||
407 | +GEN_VEXT_VV(vdivu_vv_d, 8) | ||
408 | +GEN_VEXT_VV(vdiv_vv_b, 1) | ||
409 | +GEN_VEXT_VV(vdiv_vv_h, 2) | ||
410 | +GEN_VEXT_VV(vdiv_vv_w, 4) | ||
411 | +GEN_VEXT_VV(vdiv_vv_d, 8) | ||
412 | +GEN_VEXT_VV(vremu_vv_b, 1) | ||
413 | +GEN_VEXT_VV(vremu_vv_h, 2) | ||
414 | +GEN_VEXT_VV(vremu_vv_w, 4) | ||
415 | +GEN_VEXT_VV(vremu_vv_d, 8) | ||
416 | +GEN_VEXT_VV(vrem_vv_b, 1) | ||
417 | +GEN_VEXT_VV(vrem_vv_h, 2) | ||
418 | +GEN_VEXT_VV(vrem_vv_w, 4) | ||
419 | +GEN_VEXT_VV(vrem_vv_d, 8) | ||
420 | |||
421 | RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) | ||
422 | RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) | ||
423 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) | ||
424 | RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) | ||
425 | RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) | ||
426 | RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) | ||
427 | -GEN_VEXT_VV(vwmul_vv_b) | ||
428 | -GEN_VEXT_VV(vwmul_vv_h) | ||
429 | -GEN_VEXT_VV(vwmul_vv_w) | ||
430 | -GEN_VEXT_VV(vwmulu_vv_b) | ||
431 | -GEN_VEXT_VV(vwmulu_vv_h) | ||
432 | -GEN_VEXT_VV(vwmulu_vv_w) | ||
433 | -GEN_VEXT_VV(vwmulsu_vv_b) | ||
434 | -GEN_VEXT_VV(vwmulsu_vv_h) | ||
435 | -GEN_VEXT_VV(vwmulsu_vv_w) | ||
436 | +GEN_VEXT_VV(vwmul_vv_b, 2) | ||
437 | +GEN_VEXT_VV(vwmul_vv_h, 4) | ||
438 | +GEN_VEXT_VV(vwmul_vv_w, 8) | ||
439 | +GEN_VEXT_VV(vwmulu_vv_b, 2) | ||
440 | +GEN_VEXT_VV(vwmulu_vv_h, 4) | ||
441 | +GEN_VEXT_VV(vwmulu_vv_w, 8) | ||
442 | +GEN_VEXT_VV(vwmulsu_vv_b, 2) | ||
443 | +GEN_VEXT_VV(vwmulsu_vv_h, 4) | ||
444 | +GEN_VEXT_VV(vwmulsu_vv_w, 8) | ||
445 | |||
446 | RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) | ||
447 | RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) | ||
448 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) | ||
449 | RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) | ||
450 | RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) | ||
451 | RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) | ||
452 | -GEN_VEXT_VV(vmacc_vv_b) | ||
453 | -GEN_VEXT_VV(vmacc_vv_h) | ||
454 | -GEN_VEXT_VV(vmacc_vv_w) | ||
455 | -GEN_VEXT_VV(vmacc_vv_d) | ||
456 | -GEN_VEXT_VV(vnmsac_vv_b) | ||
457 | -GEN_VEXT_VV(vnmsac_vv_h) | ||
458 | -GEN_VEXT_VV(vnmsac_vv_w) | ||
459 | -GEN_VEXT_VV(vnmsac_vv_d) | ||
460 | -GEN_VEXT_VV(vmadd_vv_b) | ||
461 | -GEN_VEXT_VV(vmadd_vv_h) | ||
462 | -GEN_VEXT_VV(vmadd_vv_w) | ||
463 | -GEN_VEXT_VV(vmadd_vv_d) | ||
464 | -GEN_VEXT_VV(vnmsub_vv_b) | ||
465 | -GEN_VEXT_VV(vnmsub_vv_h) | ||
466 | -GEN_VEXT_VV(vnmsub_vv_w) | ||
467 | -GEN_VEXT_VV(vnmsub_vv_d) | ||
468 | +GEN_VEXT_VV(vmacc_vv_b, 1) | ||
469 | +GEN_VEXT_VV(vmacc_vv_h, 2) | ||
470 | +GEN_VEXT_VV(vmacc_vv_w, 4) | ||
471 | +GEN_VEXT_VV(vmacc_vv_d, 8) | ||
472 | +GEN_VEXT_VV(vnmsac_vv_b, 1) | ||
473 | +GEN_VEXT_VV(vnmsac_vv_h, 2) | ||
474 | +GEN_VEXT_VV(vnmsac_vv_w, 4) | ||
475 | +GEN_VEXT_VV(vnmsac_vv_d, 8) | ||
476 | +GEN_VEXT_VV(vmadd_vv_b, 1) | ||
477 | +GEN_VEXT_VV(vmadd_vv_h, 2) | ||
478 | +GEN_VEXT_VV(vmadd_vv_w, 4) | ||
479 | +GEN_VEXT_VV(vmadd_vv_d, 8) | ||
480 | +GEN_VEXT_VV(vnmsub_vv_b, 1) | ||
481 | +GEN_VEXT_VV(vnmsub_vv_h, 2) | ||
482 | +GEN_VEXT_VV(vnmsub_vv_w, 4) | ||
483 | +GEN_VEXT_VV(vnmsub_vv_d, 8) | ||
484 | |||
485 | #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
486 | static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ | ||
487 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) | ||
488 | RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) | ||
489 | RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) | ||
490 | RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) | ||
491 | -GEN_VEXT_VV(vwmaccu_vv_b) | ||
492 | -GEN_VEXT_VV(vwmaccu_vv_h) | ||
493 | -GEN_VEXT_VV(vwmaccu_vv_w) | ||
494 | -GEN_VEXT_VV(vwmacc_vv_b) | ||
495 | -GEN_VEXT_VV(vwmacc_vv_h) | ||
496 | -GEN_VEXT_VV(vwmacc_vv_w) | ||
497 | -GEN_VEXT_VV(vwmaccsu_vv_b) | ||
498 | -GEN_VEXT_VV(vwmaccsu_vv_h) | ||
499 | -GEN_VEXT_VV(vwmaccsu_vv_w) | ||
500 | +GEN_VEXT_VV(vwmaccu_vv_b, 2) | ||
501 | +GEN_VEXT_VV(vwmaccu_vv_h, 4) | ||
502 | +GEN_VEXT_VV(vwmaccu_vv_w, 8) | ||
503 | +GEN_VEXT_VV(vwmacc_vv_b, 2) | ||
504 | +GEN_VEXT_VV(vwmacc_vv_h, 4) | ||
505 | +GEN_VEXT_VV(vwmacc_vv_w, 8) | ||
506 | +GEN_VEXT_VV(vwmaccsu_vv_b, 2) | ||
507 | +GEN_VEXT_VV(vwmaccsu_vv_h, 4) | ||
508 | +GEN_VEXT_VV(vwmaccsu_vv_w, 8) | ||
509 | |||
510 | RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) | ||
511 | RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) | ||
512 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
513 | index XXXXXXX..XXXXXXX 100644 | 579 | index XXXXXXX..XXXXXXX 100644 |
514 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | 580 | --- a/target/riscv/meson.build |
515 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | 581 | +++ b/target/riscv/meson.build |
516 | @@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, | 582 | @@ -XXX,XX +XXX,XX @@ riscv_ss.add(files( |
517 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 583 | 'gdbstub.c', |
518 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 584 | 'op_helper.c', |
519 | 585 | 'vector_helper.c', | |
520 | - if (a->vm && s->vl_eq_vlmax) { | 586 | + 'vector_internals.c', |
521 | + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | 587 | 'bitmanip_helper.c', |
522 | gvec_fn(s->sew, vreg_ofs(s, a->rd), | 588 | 'translate.c', |
523 | vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), | 589 | 'm128_helper.c', |
524 | MAXSZ(s), MAXSZ(s)); | ||
525 | @@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, | ||
526 | |||
527 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
528 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
529 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
530 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
531 | vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), | ||
532 | cpu_env, s->cfg_ptr->vlen / 8, | ||
533 | -- | 590 | -- |
534 | 2.36.1 | 591 | 2.41.0 | diff view generated by jsdifflib |
1 | From: Alistair Francis <alistair.francis@wdc.com> | 1 | From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | When running a 32-bit guest, with a e64 vmv.v.x and vl_eq_vlmax set to | 3 | Refactor the non SEW-specific stuff out of `GEN_OPIVV_TRANS` into |
4 | true the `tcg_debug_assert(vece <= MO_32)` will be triggered inside | 4 | function `opivv_trans` (similar to `opivi_trans`). `opivv_trans` will be |
5 | tcg_gen_gvec_dup_i32(). | 5 | used in proceeding vector-crypto commits. |
6 | 6 | ||
7 | This patch checks that condition and instead uses tcg_gen_gvec_dup_i64() | 7 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> |
8 | is required. | ||
9 | |||
10 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1028 | ||
11 | Suggested-by: Robert Bu <robert.bu@gmail.com> | ||
12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
14 | Message-Id: <20220608234701.369536-1-alistair.francis@opensource.wdc.com> | 9 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> |
10 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
11 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
12 | Message-ID: <20230711165917.2629866-3-max.chou@sifive.com> | ||
15 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 13 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
16 | --- | 14 | --- |
17 | target/riscv/insn_trans/trans_rvv.c.inc | 12 ++++++++++-- | 15 | target/riscv/insn_trans/trans_rvv.c.inc | 62 +++++++++++++------------ |
18 | 1 file changed, 10 insertions(+), 2 deletions(-) | 16 | 1 file changed, 32 insertions(+), 30 deletions(-) |
19 | 17 | ||
20 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | 18 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc |
21 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | 20 | --- a/target/riscv/insn_trans/trans_rvv.c.inc |
23 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | 21 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc |
24 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) | 22 | @@ -XXX,XX +XXX,XX @@ GEN_OPIWX_WIDEN_TRANS(vwadd_wx) |
25 | s1 = get_gpr(s, a->rs1, EXT_SIGN); | 23 | GEN_OPIWX_WIDEN_TRANS(vwsubu_wx) |
26 | 24 | GEN_OPIWX_WIDEN_TRANS(vwsub_wx) | |
27 | if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | 25 | |
28 | - tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), | 26 | +static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm, |
29 | - MAXSZ(s), MAXSZ(s), s1); | 27 | + gen_helper_gvec_4_ptr *fn, DisasContext *s) |
30 | + if (get_xl(s) == MXL_RV32 && s->sew == MO_64) { | 28 | +{ |
31 | + TCGv_i64 s1_i64 = tcg_temp_new_i64(); | 29 | + uint32_t data = 0; |
32 | + tcg_gen_ext_tl_i64(s1_i64, s1); | 30 | + TCGLabel *over = gen_new_label(); |
33 | + tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), | 31 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
34 | + MAXSZ(s), MAXSZ(s), s1_i64); | 32 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
35 | + tcg_temp_free_i64(s1_i64); | 33 | + |
36 | + } else { | 34 | + data = FIELD_DP32(data, VDATA, VM, vm); |
37 | + tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), | 35 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); |
38 | + MAXSZ(s), MAXSZ(s), s1); | 36 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); |
39 | + } | 37 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); |
40 | } else { | 38 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); |
41 | TCGv_i32 desc; | 39 | + tcg_gen_gvec_4_ptr(vreg_ofs(s, vd), vreg_ofs(s, 0), vreg_ofs(s, vs1), |
42 | TCGv_i64 s1_i64 = tcg_temp_new_i64(); | 40 | + vreg_ofs(s, vs2), cpu_env, s->cfg_ptr->vlen / 8, |
41 | + s->cfg_ptr->vlen / 8, data, fn); | ||
42 | + mark_vs_dirty(s); | ||
43 | + gen_set_label(over); | ||
44 | + return true; | ||
45 | +} | ||
46 | + | ||
47 | /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ | ||
48 | /* OPIVV without GVEC IR */ | ||
49 | -#define GEN_OPIVV_TRANS(NAME, CHECK) \ | ||
50 | -static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
51 | -{ \ | ||
52 | - if (CHECK(s, a)) { \ | ||
53 | - uint32_t data = 0; \ | ||
54 | - static gen_helper_gvec_4_ptr * const fns[4] = { \ | ||
55 | - gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ | ||
56 | - gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ | ||
57 | - }; \ | ||
58 | - TCGLabel *over = gen_new_label(); \ | ||
59 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
60 | - tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
61 | - \ | ||
62 | - data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
63 | - data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
64 | - data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
65 | - data = \ | ||
66 | - FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ | ||
67 | - data = FIELD_DP32(data, VDATA, VMA, s->vma); \ | ||
68 | - tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
69 | - vreg_ofs(s, a->rs1), \ | ||
70 | - vreg_ofs(s, a->rs2), cpu_env, \ | ||
71 | - s->cfg_ptr->vlen / 8, \ | ||
72 | - s->cfg_ptr->vlen / 8, data, \ | ||
73 | - fns[s->sew]); \ | ||
74 | - mark_vs_dirty(s); \ | ||
75 | - gen_set_label(over); \ | ||
76 | - return true; \ | ||
77 | - } \ | ||
78 | - return false; \ | ||
79 | +#define GEN_OPIVV_TRANS(NAME, CHECK) \ | ||
80 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
81 | +{ \ | ||
82 | + if (CHECK(s, a)) { \ | ||
83 | + static gen_helper_gvec_4_ptr * const fns[4] = { \ | ||
84 | + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ | ||
85 | + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ | ||
86 | + }; \ | ||
87 | + return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ | ||
88 | + } \ | ||
89 | + return false; \ | ||
90 | } | ||
91 | |||
92 | /* | ||
43 | -- | 93 | -- |
44 | 2.36.1 | 94 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: Nazar Kazakov <nazar.kazakov@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | According to v-spec (section 5.4): | 3 | Remove the redundant "vl == 0" check which is already included within the vstart >= vl check, when vl == 0. |
4 | When vstart ≥ vl, there are no body elements, and no elements are | ||
5 | updated in any destination vector register group, including that | ||
6 | no tail elements are updated with agnostic values. | ||
7 | 4 | ||
8 | vmsbf.m, vmsif.m, vmsof.m, viota.m, vcompress instructions themselves | 5 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> |
9 | require vstart to be zero. So they don't need the early exit. | ||
10 | |||
11 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
12 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
13 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 6 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> |
7 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
14 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | 8 | Acked-by: Alistair Francis <alistair.francis@wdc.com> |
15 | Message-Id: <165449614532.19704.7000832880482980398-4@git.sr.ht> | 9 | Message-ID: <20230711165917.2629866-4-max.chou@sifive.com> |
16 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
17 | --- | 11 | --- |
18 | target/riscv/insn_trans/trans_rvv.c.inc | 27 +++++++++++++++++++++++++ | 12 | target/riscv/insn_trans/trans_rvv.c.inc | 31 +------------------------ |
19 | 1 file changed, 27 insertions(+) | 13 | 1 file changed, 1 insertion(+), 30 deletions(-) |
20 | 14 | ||
21 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | 15 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc |
22 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | 17 | --- a/target/riscv/insn_trans/trans_rvv.c.inc |
24 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | 18 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc |
25 | @@ -XXX,XX +XXX,XX @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data, | 19 | @@ -XXX,XX +XXX,XX @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data, |
26 | 20 | TCGv_i32 desc; | |
27 | TCGLabel *over = gen_new_label(); | 21 | |
28 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 22 | TCGLabel *over = gen_new_label(); |
29 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 23 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
30 | 24 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | |
31 | dest = tcg_temp_new_ptr(); | 25 | |
32 | mask = tcg_temp_new_ptr(); | 26 | dest = tcg_temp_new_ptr(); |
33 | @@ -XXX,XX +XXX,XX @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, | 27 | @@ -XXX,XX +XXX,XX @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, |
34 | 28 | TCGv_i32 desc; | |
35 | TCGLabel *over = gen_new_label(); | 29 | |
36 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 30 | TCGLabel *over = gen_new_label(); |
37 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 31 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
38 | 32 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | |
39 | dest = tcg_temp_new_ptr(); | 33 | |
40 | mask = tcg_temp_new_ptr(); | 34 | dest = tcg_temp_new_ptr(); |
41 | @@ -XXX,XX +XXX,XX @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, | 35 | @@ -XXX,XX +XXX,XX @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, |
42 | 36 | TCGv_i32 desc; | |
43 | TCGLabel *over = gen_new_label(); | 37 | |
44 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 38 | TCGLabel *over = gen_new_label(); |
45 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 39 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
46 | 40 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | |
47 | dest = tcg_temp_new_ptr(); | 41 | |
48 | mask = tcg_temp_new_ptr(); | 42 | dest = tcg_temp_new_ptr(); |
49 | @@ -XXX,XX +XXX,XX @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, | 43 | @@ -XXX,XX +XXX,XX @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, |
50 | 44 | TCGv_i32 desc; | |
51 | TCGLabel *over = gen_new_label(); | 45 | |
52 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 46 | TCGLabel *over = gen_new_label(); |
53 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 47 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
54 | 48 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | |
55 | dest = tcg_temp_new_ptr(); | 49 | |
56 | mask = tcg_temp_new_ptr(); | 50 | dest = tcg_temp_new_ptr(); |
57 | @@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, | 51 | @@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, |
52 | return false; | ||
58 | } | 53 | } |
59 | 54 | ||
60 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 55 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
61 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 56 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
62 | 57 | ||
63 | if (a->vm && s->vl_eq_vlmax) { | 58 | if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { |
64 | gvec_fn(s->sew, vreg_ofs(s, a->rd), | ||
65 | @@ -XXX,XX +XXX,XX @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, | 59 | @@ -XXX,XX +XXX,XX @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, |
66 | 60 | uint32_t data = 0; | |
67 | TCGLabel *over = gen_new_label(); | 61 | |
68 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 62 | TCGLabel *over = gen_new_label(); |
69 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 63 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
70 | 64 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | |
71 | dest = tcg_temp_new_ptr(); | 65 | |
72 | mask = tcg_temp_new_ptr(); | 66 | dest = tcg_temp_new_ptr(); |
73 | @@ -XXX,XX +XXX,XX @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, | 67 | @@ -XXX,XX +XXX,XX @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, |
74 | 68 | uint32_t data = 0; | |
75 | TCGLabel *over = gen_new_label(); | 69 | |
76 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 70 | TCGLabel *over = gen_new_label(); |
77 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 71 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
78 | 72 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | |
79 | dest = tcg_temp_new_ptr(); | 73 | |
80 | mask = tcg_temp_new_ptr(); | 74 | dest = tcg_temp_new_ptr(); |
81 | @@ -XXX,XX +XXX,XX @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, | 75 | @@ -XXX,XX +XXX,XX @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, |
82 | uint32_t data = 0; | 76 | if (checkfn(s, a)) { |
83 | TCGLabel *over = gen_new_label(); | 77 | uint32_t data = 0; |
84 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 78 | TCGLabel *over = gen_new_label(); |
85 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 79 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
86 | 80 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | |
87 | data = FIELD_DP32(data, VDATA, VM, a->vm); | 81 | |
88 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | 82 | data = FIELD_DP32(data, VDATA, VM, a->vm); |
89 | @@ -XXX,XX +XXX,XX @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, | 83 | @@ -XXX,XX +XXX,XX @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, |
90 | uint32_t data = 0; | 84 | if (opiwv_widen_check(s, a)) { |
91 | TCGLabel *over = gen_new_label(); | 85 | uint32_t data = 0; |
92 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 86 | TCGLabel *over = gen_new_label(); |
93 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 87 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
94 | 88 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | |
95 | data = FIELD_DP32(data, VDATA, VM, a->vm); | 89 | |
96 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | 90 | data = FIELD_DP32(data, VDATA, VM, a->vm); |
91 | @@ -XXX,XX +XXX,XX @@ static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm, | ||
92 | { | ||
93 | uint32_t data = 0; | ||
94 | TCGLabel *over = gen_new_label(); | ||
95 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
96 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
97 | |||
98 | data = FIELD_DP32(data, VDATA, VM, vm); | ||
97 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | 99 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
98 | }; \ | 100 | gen_helper_##NAME##_w, \ |
99 | TCGLabel *over = gen_new_label(); \ | 101 | }; \ |
100 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | 102 | TCGLabel *over = gen_new_label(); \ |
101 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | 103 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ |
102 | \ | 104 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ |
103 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | 105 | \ |
104 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | 106 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ |
105 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
106 | }; \ | ||
107 | TCGLabel *over = gen_new_label(); \ | ||
108 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
109 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
110 | \ | ||
111 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
112 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
113 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) | 107 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) |
108 | gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, | ||
114 | }; | 109 | }; |
115 | TCGLabel *over = gen_new_label(); | 110 | TCGLabel *over = gen_new_label(); |
116 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 111 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
117 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 112 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
118 | 113 | ||
119 | tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), | 114 | tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), |
120 | cpu_env, s->cfg_ptr->vlen / 8, | ||
121 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) | 115 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) |
116 | vext_check_ss(s, a->rd, 0, 1)) { | ||
122 | TCGv s1; | 117 | TCGv s1; |
123 | TCGLabel *over = gen_new_label(); | 118 | TCGLabel *over = gen_new_label(); |
124 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 119 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
125 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 120 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
126 | 121 | ||
127 | s1 = get_gpr(s, a->rs1, EXT_SIGN); | 122 | s1 = get_gpr(s, a->rs1, EXT_SIGN); |
128 | |||
129 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) | 123 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) |
124 | gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, | ||
130 | }; | 125 | }; |
131 | TCGLabel *over = gen_new_label(); | 126 | TCGLabel *over = gen_new_label(); |
132 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 127 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
133 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 128 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
134 | 129 | ||
135 | s1 = tcg_constant_i64(simm); | 130 | s1 = tcg_constant_i64(simm); |
136 | dest = tcg_temp_new_ptr(); | ||
137 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | 131 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
132 | }; \ | ||
138 | TCGLabel *over = gen_new_label(); \ | 133 | TCGLabel *over = gen_new_label(); \ |
139 | gen_set_rm(s, RISCV_FRM_DYN); \ | 134 | gen_set_rm(s, RISCV_FRM_DYN); \ |
140 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | 135 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ |
141 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | 136 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ |
142 | \ | 137 | \ |
143 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | 138 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ |
144 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
145 | @@ -XXX,XX +XXX,XX @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, | 139 | @@ -XXX,XX +XXX,XX @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, |
146 | 140 | TCGv_i64 t1; | |
147 | TCGLabel *over = gen_new_label(); | 141 | |
148 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 142 | TCGLabel *over = gen_new_label(); |
149 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 143 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
150 | 144 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | |
151 | dest = tcg_temp_new_ptr(); | 145 | |
152 | mask = tcg_temp_new_ptr(); | 146 | dest = tcg_temp_new_ptr(); |
153 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | 147 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
148 | }; \ | ||
154 | TCGLabel *over = gen_new_label(); \ | 149 | TCGLabel *over = gen_new_label(); \ |
155 | gen_set_rm(s, RISCV_FRM_DYN); \ | 150 | gen_set_rm(s, RISCV_FRM_DYN); \ |
156 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | 151 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ |
157 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\ | 152 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\ |
158 | \ | 153 | \ |
159 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | 154 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ |
160 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
161 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | 155 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
156 | }; \ | ||
162 | TCGLabel *over = gen_new_label(); \ | 157 | TCGLabel *over = gen_new_label(); \ |
163 | gen_set_rm(s, RISCV_FRM_DYN); \ | 158 | gen_set_rm(s, RISCV_FRM_DYN); \ |
164 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | 159 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ |
165 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | 160 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ |
166 | \ | 161 | \ |
167 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | 162 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ |
168 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
169 | @@ -XXX,XX +XXX,XX @@ static bool do_opfv(DisasContext *s, arg_rmr *a, | 163 | @@ -XXX,XX +XXX,XX @@ static bool do_opfv(DisasContext *s, arg_rmr *a, |
170 | TCGLabel *over = gen_new_label(); | 164 | uint32_t data = 0; |
171 | gen_set_rm(s, rm); | 165 | TCGLabel *over = gen_new_label(); |
172 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 166 | gen_set_rm_chkfrm(s, rm); |
173 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 167 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
174 | 168 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | |
175 | data = FIELD_DP32(data, VDATA, VM, a->vm); | 169 | |
176 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | 170 | data = FIELD_DP32(data, VDATA, VM, a->vm); |
177 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) | 171 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) |
172 | gen_helper_vmv_v_x_d, | ||
178 | }; | 173 | }; |
179 | TCGLabel *over = gen_new_label(); | 174 | TCGLabel *over = gen_new_label(); |
180 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 175 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
181 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 176 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
182 | 177 | ||
183 | t1 = tcg_temp_new_i64(); | 178 | t1 = tcg_temp_new_i64(); |
184 | /* NaN-box f[rs1] */ | 179 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ |
185 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | 180 | }; \ |
186 | TCGLabel *over = gen_new_label(); \ | 181 | TCGLabel *over = gen_new_label(); \ |
187 | gen_set_rm(s, FRM); \ | 182 | gen_set_rm_chkfrm(s, FRM); \ |
188 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | 183 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ |
189 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | 184 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ |
190 | \ | 185 | \ |
191 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | 186 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ |
187 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
188 | }; \ | ||
189 | TCGLabel *over = gen_new_label(); \ | ||
190 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
191 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
192 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
193 | \ | ||
194 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
195 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
196 | }; \ | ||
197 | TCGLabel *over = gen_new_label(); \ | ||
198 | gen_set_rm_chkfrm(s, FRM); \ | ||
199 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
200 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
201 | \ | ||
202 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
203 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
204 | }; \ | ||
205 | TCGLabel *over = gen_new_label(); \ | ||
206 | gen_set_rm_chkfrm(s, FRM); \ | ||
207 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
208 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
209 | \ | ||
210 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
211 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \ | ||
212 | uint32_t data = 0; \ | ||
213 | gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ | ||
214 | TCGLabel *over = gen_new_label(); \ | ||
215 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
216 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
217 | \ | ||
192 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | 218 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ |
193 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
194 | TCGLabel *over = gen_new_label(); \ | ||
195 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
196 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
197 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
198 | \ | ||
199 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
200 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
201 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
202 | TCGLabel *over = gen_new_label(); \ | ||
203 | gen_set_rm(s, FRM); \ | ||
204 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
205 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
206 | \ | ||
207 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
208 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
209 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
210 | TCGLabel *over = gen_new_label(); \ | ||
211 | gen_set_rm(s, FRM); \ | ||
212 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
213 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
214 | \ | ||
215 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
216 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
217 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \ | ||
218 | gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ | ||
219 | TCGLabel *over = gen_new_label(); \ | ||
220 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
221 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
222 | \ | ||
223 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
224 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
225 | @@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) | 219 | @@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) |
226 | uint32_t data = 0; | 220 | require_vm(a->vm, a->rd)) { |
227 | TCGLabel *over = gen_new_label(); | 221 | uint32_t data = 0; |
228 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 222 | TCGLabel *over = gen_new_label(); |
229 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 223 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
230 | 224 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | |
231 | data = FIELD_DP32(data, VDATA, VM, a->vm); | 225 | |
232 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | 226 | data = FIELD_DP32(data, VDATA, VM, a->vm); |
227 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) | ||
228 | TCGv s1; | ||
229 | TCGLabel *over = gen_new_label(); | ||
230 | |||
231 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
232 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
233 | |||
234 | t1 = tcg_temp_new_i64(); | ||
235 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) | ||
236 | TCGv_i64 t1; | ||
237 | TCGLabel *over = gen_new_label(); | ||
238 | |||
239 | - /* if vl == 0 or vstart >= vl, skip vector register write back */ | ||
240 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
241 | + /* if vstart >= vl, skip vector register write back */ | ||
242 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
243 | |||
244 | /* NaN-box f[rs1] */ | ||
233 | @@ -XXX,XX +XXX,XX @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) | 245 | @@ -XXX,XX +XXX,XX @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) |
246 | uint32_t data = 0; | ||
234 | gen_helper_gvec_3_ptr *fn; | 247 | gen_helper_gvec_3_ptr *fn; |
235 | TCGLabel *over = gen_new_label(); | 248 | TCGLabel *over = gen_new_label(); |
236 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 249 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
237 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | 250 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
238 | 251 | ||
239 | static gen_helper_gvec_3_ptr * const fns[6][4] = { | 252 | static gen_helper_gvec_3_ptr * const fns[6][4] = { |
240 | { | ||
241 | -- | 253 | -- |
242 | 2.36.1 | 254 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: Lawrence Hunter <lawrence.hunter@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Destination register of unit-stride mask load and store instructions are | 3 | This commit adds support for the Zvbc vector-crypto extension, which |
4 | always written with a tail-agnostic policy. | 4 | consists of the following instructions: |
5 | 5 | ||
6 | A vector segment load / store instruction may contain fractional lmul | 6 | * vclmulh.[vx,vv] |
7 | with nf * lmul > 1. The rest of the elements in the last register should | 7 | * vclmul.[vx,vv] |
8 | be treated as tail elements. | 8 | |
9 | 9 | Translation functions are defined in | |
10 | Signed-off-by: eop Chen <eop.chen@sifive.com> | 10 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in |
11 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | 11 | `target/riscv/vcrypto_helper.c`. |
12 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 12 | |
13 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | 13 | Co-authored-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> |
14 | Message-Id: <165449614532.19704.7000832880482980398-6@git.sr.ht> | 14 | Co-authored-by: Max Chou <max.chou@sifive.com> |
15 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
16 | Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
17 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
18 | [max.chou@sifive.com: Exposed x-zvbc property] | ||
19 | Message-ID: <20230711165917.2629866-5-max.chou@sifive.com> | ||
15 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 20 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
16 | --- | 21 | --- |
17 | target/riscv/translate.c | 2 + | 22 | target/riscv/cpu_cfg.h | 1 + |
18 | target/riscv/vector_helper.c | 60 +++++++++++++++++++++++++ | 23 | target/riscv/helper.h | 6 +++ |
19 | target/riscv/insn_trans/trans_rvv.c.inc | 6 +++ | 24 | target/riscv/insn32.decode | 6 +++ |
20 | 3 files changed, 68 insertions(+) | 25 | target/riscv/cpu.c | 9 ++++ |
21 | 26 | target/riscv/translate.c | 1 + | |
27 | target/riscv/vcrypto_helper.c | 59 ++++++++++++++++++++++ | ||
28 | target/riscv/insn_trans/trans_rvvk.c.inc | 62 ++++++++++++++++++++++++ | ||
29 | target/riscv/meson.build | 3 +- | ||
30 | 8 files changed, 146 insertions(+), 1 deletion(-) | ||
31 | create mode 100644 target/riscv/vcrypto_helper.c | ||
32 | create mode 100644 target/riscv/insn_trans/trans_rvvk.c.inc | ||
33 | |||
34 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/target/riscv/cpu_cfg.h | ||
37 | +++ b/target/riscv/cpu_cfg.h | ||
38 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
39 | bool ext_zve32f; | ||
40 | bool ext_zve64f; | ||
41 | bool ext_zve64d; | ||
42 | + bool ext_zvbc; | ||
43 | bool ext_zmmul; | ||
44 | bool ext_zvfbfmin; | ||
45 | bool ext_zvfbfwma; | ||
46 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/riscv/helper.h | ||
49 | +++ b/target/riscv/helper.h | ||
50 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfwcvtbf16_f_f_v, void, ptr, ptr, ptr, env, i32) | ||
51 | |||
52 | DEF_HELPER_6(vfwmaccbf16_vv, void, ptr, ptr, ptr, ptr, env, i32) | ||
53 | DEF_HELPER_6(vfwmaccbf16_vf, void, ptr, ptr, i64, ptr, env, i32) | ||
54 | + | ||
55 | +/* Vector crypto functions */ | ||
56 | +DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32) | ||
57 | +DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32) | ||
58 | +DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32) | ||
59 | +DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32) | ||
60 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/riscv/insn32.decode | ||
63 | +++ b/target/riscv/insn32.decode | ||
64 | @@ -XXX,XX +XXX,XX @@ vfwcvtbf16_f_f_v 010010 . ..... 01101 001 ..... 1010111 @r2_vm | ||
65 | # *** Zvfbfwma Standard Extension *** | ||
66 | vfwmaccbf16_vv 111011 . ..... ..... 001 ..... 1010111 @r_vm | ||
67 | vfwmaccbf16_vf 111011 . ..... ..... 101 ..... 1010111 @r_vm | ||
68 | + | ||
69 | +# *** Zvbc vector crypto extension *** | ||
70 | +vclmul_vv 001100 . ..... ..... 010 ..... 1010111 @r_vm | ||
71 | +vclmul_vx 001100 . ..... ..... 110 ..... 1010111 @r_vm | ||
72 | +vclmulh_vv 001101 . ..... ..... 010 ..... 1010111 @r_vm | ||
73 | +vclmulh_vx 001101 . ..... ..... 110 ..... 1010111 @r_vm | ||
74 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/target/riscv/cpu.c | ||
77 | +++ b/target/riscv/cpu.c | ||
78 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { | ||
79 | ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed), | ||
80 | ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh), | ||
81 | ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt), | ||
82 | + ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc), | ||
83 | ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f), | ||
84 | ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f), | ||
85 | ISA_EXT_DATA_ENTRY(zve64d, PRIV_VERSION_1_10_0, ext_zve64d), | ||
86 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
87 | return; | ||
88 | } | ||
89 | |||
90 | + if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) { | ||
91 | + error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions"); | ||
92 | + return; | ||
93 | + } | ||
94 | + | ||
95 | if (cpu->cfg.ext_zk) { | ||
96 | cpu->cfg.ext_zkn = true; | ||
97 | cpu->cfg.ext_zkr = true; | ||
98 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
99 | DEFINE_PROP_BOOL("x-zvfbfmin", RISCVCPU, cfg.ext_zvfbfmin, false), | ||
100 | DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false), | ||
101 | |||
102 | + /* Vector cryptography extensions */ | ||
103 | + DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), | ||
104 | + | ||
105 | DEFINE_PROP_END_OF_LIST(), | ||
106 | }; | ||
107 | |||
22 | diff --git a/target/riscv/translate.c b/target/riscv/translate.c | 108 | diff --git a/target/riscv/translate.c b/target/riscv/translate.c |
23 | index XXXXXXX..XXXXXXX 100644 | 109 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/target/riscv/translate.c | 110 | --- a/target/riscv/translate.c |
25 | +++ b/target/riscv/translate.c | 111 | +++ b/target/riscv/translate.c |
26 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | 112 | @@ -XXX,XX +XXX,XX @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) |
27 | int8_t lmul; | 113 | #include "insn_trans/trans_rvzfa.c.inc" |
28 | uint8_t sew; | 114 | #include "insn_trans/trans_rvzfh.c.inc" |
29 | uint8_t vta; | 115 | #include "insn_trans/trans_rvk.c.inc" |
30 | + bool cfg_vta_all_1s; | 116 | +#include "insn_trans/trans_rvvk.c.inc" |
31 | target_ulong vstart; | 117 | #include "insn_trans/trans_privileged.c.inc" |
32 | bool vl_eq_vlmax; | 118 | #include "insn_trans/trans_svinval.c.inc" |
33 | uint8_t ntemp; | 119 | #include "insn_trans/trans_rvbf16.c.inc" |
34 | @@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) | 120 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c |
35 | ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); | 121 | new file mode 100644 |
36 | ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3); | 122 | index XXXXXXX..XXXXXXX |
37 | ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s; | 123 | --- /dev/null |
38 | + ctx->cfg_vta_all_1s = cpu->cfg.rvv_ta_all_1s; | 124 | +++ b/target/riscv/vcrypto_helper.c |
39 | ctx->vstart = env->vstart; | 125 | @@ -XXX,XX +XXX,XX @@ |
40 | ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); | 126 | +/* |
41 | ctx->misa_mxl_max = env->misa_mxl_max; | 127 | + * RISC-V Vector Crypto Extension Helpers for QEMU. |
42 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 128 | + * |
43 | index XXXXXXX..XXXXXXX 100644 | 129 | + * Copyright (C) 2023 SiFive, Inc. |
44 | --- a/target/riscv/vector_helper.c | 130 | + * Written by Codethink Ltd and SiFive. |
45 | +++ b/target/riscv/vector_helper.c | 131 | + * |
46 | @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, | 132 | + * This program is free software; you can redistribute it and/or modify it |
47 | uint32_t i, k; | 133 | + * under the terms and conditions of the GNU General Public License, |
48 | uint32_t nf = vext_nf(desc); | 134 | + * version 2 or later, as published by the Free Software Foundation. |
49 | uint32_t max_elems = vext_max_elems(desc, log2_esz); | 135 | + * |
50 | + uint32_t esz = 1 << log2_esz; | 136 | + * This program is distributed in the hope it will be useful, but WITHOUT |
51 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | 137 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
52 | + uint32_t vta = vext_vta(desc); | 138 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
53 | 139 | + * more details. | |
54 | for (i = env->vstart; i < env->vl; i++, env->vstart++) { | 140 | + * |
55 | if (!vm && !vext_elem_mask(v0, i)) { | 141 | + * You should have received a copy of the GNU General Public License along with |
56 | @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, | 142 | + * this program. If not, see <http://www.gnu.org/licenses/>. |
57 | } | 143 | + */ |
58 | } | 144 | + |
59 | env->vstart = 0; | 145 | +#include "qemu/osdep.h" |
60 | + /* set tail elements to 1s */ | 146 | +#include "qemu/host-utils.h" |
61 | + for (k = 0; k < nf; ++k) { | 147 | +#include "qemu/bitops.h" |
62 | + vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, | 148 | +#include "cpu.h" |
63 | + (k * max_elems + max_elems) * esz); | 149 | +#include "exec/memop.h" |
64 | + } | 150 | +#include "exec/exec-all.h" |
65 | + if (nf * max_elems % total_elems != 0) { | 151 | +#include "exec/helper-proto.h" |
66 | + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; | 152 | +#include "internals.h" |
67 | + uint32_t registers_used = | 153 | +#include "vector_internals.h" |
68 | + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; | 154 | + |
69 | + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, | 155 | +static uint64_t clmul64(uint64_t y, uint64_t x) |
70 | + registers_used * vlenb); | 156 | +{ |
71 | + } | 157 | + uint64_t result = 0; |
72 | } | 158 | + for (int j = 63; j >= 0; j--) { |
73 | 159 | + if ((y >> j) & 1) { | |
74 | #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ | 160 | + result ^= (x << j); |
75 | @@ -XXX,XX +XXX,XX @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | 161 | + } |
76 | uint32_t i, k; | 162 | + } |
77 | uint32_t nf = vext_nf(desc); | 163 | + return result; |
78 | uint32_t max_elems = vext_max_elems(desc, log2_esz); | 164 | +} |
79 | + uint32_t esz = 1 << log2_esz; | 165 | + |
80 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | 166 | +static uint64_t clmulh64(uint64_t y, uint64_t x) |
81 | + uint32_t vta = vext_vta(desc); | 167 | +{ |
82 | 168 | + uint64_t result = 0; | |
83 | /* load bytes from guest memory */ | 169 | + for (int j = 63; j >= 1; j--) { |
84 | for (i = env->vstart; i < evl; i++, env->vstart++) { | 170 | + if ((y >> j) & 1) { |
85 | @@ -XXX,XX +XXX,XX @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | 171 | + result ^= (x >> (64 - j)); |
86 | } | 172 | + } |
87 | } | 173 | + } |
88 | env->vstart = 0; | 174 | + return result; |
89 | + /* set tail elements to 1s */ | 175 | +} |
90 | + for (k = 0; k < nf; ++k) { | 176 | + |
91 | + vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz, | 177 | +RVVCALL(OPIVV2, vclmul_vv, OP_UUU_D, H8, H8, H8, clmul64) |
92 | + (k * max_elems + max_elems) * esz); | 178 | +GEN_VEXT_VV(vclmul_vv, 8) |
93 | + } | 179 | +RVVCALL(OPIVX2, vclmul_vx, OP_UUU_D, H8, H8, clmul64) |
94 | + if (nf * max_elems % total_elems != 0) { | 180 | +GEN_VEXT_VX(vclmul_vx, 8) |
95 | + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; | 181 | +RVVCALL(OPIVV2, vclmulh_vv, OP_UUU_D, H8, H8, H8, clmulh64) |
96 | + uint32_t registers_used = | 182 | +GEN_VEXT_VV(vclmulh_vv, 8) |
97 | + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; | 183 | +RVVCALL(OPIVX2, vclmulh_vx, OP_UUU_D, H8, H8, clmulh64) |
98 | + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, | 184 | +GEN_VEXT_VX(vclmulh_vx, 8) |
99 | + registers_used * vlenb); | 185 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc |
100 | + } | 186 | new file mode 100644 |
101 | } | 187 | index XXXXXXX..XXXXXXX |
102 | 188 | --- /dev/null | |
103 | /* | 189 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc |
104 | @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, | 190 | @@ -XXX,XX +XXX,XX @@ |
105 | uint32_t nf = vext_nf(desc); | 191 | +/* |
106 | uint32_t vm = vext_vm(desc); | 192 | + * RISC-V translation routines for the vector crypto extension. |
107 | uint32_t max_elems = vext_max_elems(desc, log2_esz); | 193 | + * |
108 | + uint32_t esz = 1 << log2_esz; | 194 | + * Copyright (C) 2023 SiFive, Inc. |
109 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | 195 | + * Written by Codethink Ltd and SiFive. |
110 | + uint32_t vta = vext_vta(desc); | 196 | + * |
111 | 197 | + * This program is free software; you can redistribute it and/or modify it | |
112 | /* load bytes from guest memory */ | 198 | + * under the terms and conditions of the GNU General Public License, |
113 | for (i = env->vstart; i < env->vl; i++, env->vstart++) { | 199 | + * version 2 or later, as published by the Free Software Foundation. |
114 | @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, | 200 | + * |
115 | } | 201 | + * This program is distributed in the hope it will be useful, but WITHOUT |
116 | } | 202 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
117 | env->vstart = 0; | 203 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
118 | + /* set tail elements to 1s */ | 204 | + * more details. |
119 | + for (k = 0; k < nf; ++k) { | 205 | + * |
120 | + vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, | 206 | + * You should have received a copy of the GNU General Public License along with |
121 | + (k * max_elems + max_elems) * esz); | 207 | + * this program. If not, see <http://www.gnu.org/licenses/>. |
122 | + } | 208 | + */ |
123 | + if (nf * max_elems % total_elems != 0) { | 209 | + |
124 | + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; | 210 | +/* |
125 | + uint32_t registers_used = | 211 | + * Zvbc |
126 | + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; | 212 | + */ |
127 | + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, | 213 | + |
128 | + registers_used * vlenb); | 214 | +#define GEN_VV_MASKED_TRANS(NAME, CHECK) \ |
129 | + } | 215 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
130 | } | 216 | + { \ |
131 | 217 | + if (CHECK(s, a)) { \ | |
132 | #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ | 218 | + return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, \ |
133 | @@ -XXX,XX +XXX,XX @@ vext_ldff(void *vd, void *v0, target_ulong base, | 219 | + gen_helper_##NAME, s); \ |
134 | uint32_t nf = vext_nf(desc); | 220 | + } \ |
135 | uint32_t vm = vext_vm(desc); | 221 | + return false; \ |
136 | uint32_t max_elems = vext_max_elems(desc, log2_esz); | 222 | + } |
137 | + uint32_t esz = 1 << log2_esz; | 223 | + |
138 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | 224 | +static bool vclmul_vv_check(DisasContext *s, arg_rmrr *a) |
139 | + uint32_t vta = vext_vta(desc); | 225 | +{ |
140 | target_ulong addr, offset, remain; | 226 | + return opivv_check(s, a) && |
141 | 227 | + s->cfg_ptr->ext_zvbc == true && | |
142 | /* probe every access*/ | 228 | + s->sew == MO_64; |
143 | @@ -XXX,XX +XXX,XX @@ ProbeSuccess: | 229 | +} |
144 | } | 230 | + |
145 | } | 231 | +GEN_VV_MASKED_TRANS(vclmul_vv, vclmul_vv_check) |
146 | env->vstart = 0; | 232 | +GEN_VV_MASKED_TRANS(vclmulh_vv, vclmul_vv_check) |
147 | + /* set tail elements to 1s */ | 233 | + |
148 | + for (k = 0; k < nf; ++k) { | 234 | +#define GEN_VX_MASKED_TRANS(NAME, CHECK) \ |
149 | + vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, | 235 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
150 | + (k * max_elems + max_elems) * esz); | 236 | + { \ |
151 | + } | 237 | + if (CHECK(s, a)) { \ |
152 | + if (nf * max_elems % total_elems != 0) { | 238 | + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, \ |
153 | + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; | 239 | + gen_helper_##NAME, s); \ |
154 | + uint32_t registers_used = | 240 | + } \ |
155 | + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; | 241 | + return false; \ |
156 | + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, | 242 | + } |
157 | + registers_used * vlenb); | 243 | + |
158 | + } | 244 | +static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a) |
159 | } | 245 | +{ |
160 | 246 | + return opivx_check(s, a) && | |
161 | #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ | 247 | + s->cfg_ptr->ext_zvbc == true && |
162 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | 248 | + s->sew == MO_64; |
163 | index XXXXXXX..XXXXXXX 100644 | 249 | +} |
164 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | 250 | + |
165 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | 251 | +GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check) |
166 | @@ -XXX,XX +XXX,XX @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) | 252 | +GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check) |
167 | data = FIELD_DP32(data, VDATA, VM, a->vm); | 253 | diff --git a/target/riscv/meson.build b/target/riscv/meson.build |
168 | data = FIELD_DP32(data, VDATA, LMUL, emul); | 254 | index XXXXXXX..XXXXXXX 100644 |
169 | data = FIELD_DP32(data, VDATA, NF, a->nf); | 255 | --- a/target/riscv/meson.build |
170 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | 256 | +++ b/target/riscv/meson.build |
171 | return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); | 257 | @@ -XXX,XX +XXX,XX @@ riscv_ss.add(files( |
172 | } | 258 | 'translate.c', |
173 | 259 | 'm128_helper.c', | |
174 | @@ -XXX,XX +XXX,XX @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew) | 260 | 'crypto_helper.c', |
175 | /* EMUL = 1, NFIELDS = 1 */ | 261 | - 'zce_helper.c' |
176 | data = FIELD_DP32(data, VDATA, LMUL, 0); | 262 | + 'zce_helper.c', |
177 | data = FIELD_DP32(data, VDATA, NF, 1); | 263 | + 'vcrypto_helper.c' |
178 | + /* Mask destination register are always tail-agnostic */ | 264 | )) |
179 | + data = FIELD_DP32(data, VDATA, VTA, s->cfg_vta_all_1s); | 265 | riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: files('kvm-stub.c')) |
180 | return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); | ||
181 | } | ||
182 | |||
183 | @@ -XXX,XX +XXX,XX @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) | ||
184 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
185 | data = FIELD_DP32(data, VDATA, LMUL, emul); | ||
186 | data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
187 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
188 | return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false); | ||
189 | } | ||
190 | |||
191 | @@ -XXX,XX +XXX,XX @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) | ||
192 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
193 | data = FIELD_DP32(data, VDATA, LMUL, emul); | ||
194 | data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
195 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
196 | return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false); | ||
197 | } | ||
198 | |||
199 | @@ -XXX,XX +XXX,XX @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) | ||
200 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
201 | data = FIELD_DP32(data, VDATA, LMUL, emul); | ||
202 | data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
203 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
204 | return ldff_trans(a->rd, a->rs1, data, fn, s); | ||
205 | } | ||
206 | 266 | ||
207 | -- | 267 | -- |
208 | 2.36.1 | 268 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: Nazar Kazakov <nazar.kazakov@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: eop Chen <eop.chen@sifive.com> | 3 | Move the checks out of `do_opiv{v,x,i}_gvec{,_shift}` functions |
4 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | 4 | and into the corresponding macros. This enables the functions to be |
5 | reused in proceeding commits without check duplication. | ||
6 | |||
7 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 9 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> |
6 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Signed-off-by: Max Chou <max.chou@sifive.com> |
7 | Message-Id: <165449614532.19704.7000832880482980398-8@git.sr.ht> | 11 | Message-ID: <20230711165917.2629866-6-max.chou@sifive.com> |
8 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
9 | --- | 13 | --- |
10 | target/riscv/vector_helper.c | 11 +++++++++++ | 14 | target/riscv/insn_trans/trans_rvv.c.inc | 28 +++++++++++-------------- |
11 | target/riscv/insn_trans/trans_rvv.c.inc | 3 ++- | 15 | 1 file changed, 12 insertions(+), 16 deletions(-) |
12 | 2 files changed, 13 insertions(+), 1 deletion(-) | ||
13 | 16 | ||
14 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/riscv/vector_helper.c | ||
17 | +++ b/target/riscv/vector_helper.c | ||
18 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
19 | { \ | ||
20 | uint32_t vm = vext_vm(desc); \ | ||
21 | uint32_t vl = env->vl; \ | ||
22 | + uint32_t esz = sizeof(TS1); \ | ||
23 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
24 | + uint32_t vta = vext_vta(desc); \ | ||
25 | uint32_t i; \ | ||
26 | \ | ||
27 | for (i = env->vstart; i < vl; i++) { \ | ||
28 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
29 | *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ | ||
30 | } \ | ||
31 | env->vstart = 0; \ | ||
32 | + /* set tail elements to 1s */ \ | ||
33 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
34 | } | ||
35 | |||
36 | GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) | ||
37 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
38 | { \ | ||
39 | uint32_t vm = vext_vm(desc); \ | ||
40 | uint32_t vl = env->vl; \ | ||
41 | + uint32_t esz = sizeof(TD); \ | ||
42 | + uint32_t total_elems = \ | ||
43 | + vext_get_total_elems(env, desc, esz); \ | ||
44 | + uint32_t vta = vext_vta(desc); \ | ||
45 | uint32_t i; \ | ||
46 | \ | ||
47 | for (i = env->vstart; i < vl; i++) { \ | ||
48 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
49 | *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ | ||
50 | } \ | ||
51 | env->vstart = 0; \ | ||
52 | + /* set tail elements to 1s */ \ | ||
53 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ | ||
54 | } | ||
55 | |||
56 | GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) | ||
57 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | 17 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc |
58 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
59 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | 19 | --- a/target/riscv/insn_trans/trans_rvv.c.inc |
60 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | 20 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc |
61 | @@ -XXX,XX +XXX,XX @@ do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, | 21 | @@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, |
62 | return false; | 22 | gen_helper_gvec_4_ptr *fn) |
63 | } | 23 | { |
64 | 24 | TCGLabel *over = gen_new_label(); | |
65 | - if (a->vm && s->vl_eq_vlmax) { | 25 | - if (!opivv_check(s, a)) { |
66 | + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | 26 | - return false; |
27 | - } | ||
28 | |||
29 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
30 | |||
31 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
32 | gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ | ||
33 | gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ | ||
34 | }; \ | ||
35 | + if (!opivv_check(s, a)) { \ | ||
36 | + return false; \ | ||
37 | + } \ | ||
38 | return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ | ||
39 | } | ||
40 | |||
41 | @@ -XXX,XX +XXX,XX @@ static inline bool | ||
42 | do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, | ||
43 | gen_helper_opivx *fn) | ||
44 | { | ||
45 | - if (!opivx_check(s, a)) { | ||
46 | - return false; | ||
47 | - } | ||
48 | - | ||
49 | if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
50 | TCGv_i64 src1 = tcg_temp_new_i64(); | ||
51 | |||
52 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
53 | gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ | ||
54 | gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ | ||
55 | }; \ | ||
56 | + if (!opivx_check(s, a)) { \ | ||
57 | + return false; \ | ||
58 | + } \ | ||
59 | return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ | ||
60 | } | ||
61 | |||
62 | @@ -XXX,XX +XXX,XX @@ static inline bool | ||
63 | do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, | ||
64 | gen_helper_opivx *fn, imm_mode_t imm_mode) | ||
65 | { | ||
66 | - if (!opivx_check(s, a)) { | ||
67 | - return false; | ||
68 | - } | ||
69 | - | ||
70 | if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
71 | gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), | ||
72 | extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s)); | ||
73 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
74 | gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ | ||
75 | gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ | ||
76 | }; \ | ||
77 | + if (!opivx_check(s, a)) { \ | ||
78 | + return false; \ | ||
79 | + } \ | ||
80 | return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \ | ||
81 | fns[s->sew], IMM_MODE); \ | ||
82 | } | ||
83 | @@ -XXX,XX +XXX,XX @@ static inline bool | ||
84 | do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, | ||
85 | gen_helper_opivx *fn) | ||
86 | { | ||
87 | - if (!opivx_check(s, a)) { | ||
88 | - return false; | ||
89 | - } | ||
90 | - | ||
91 | if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
67 | TCGv_i32 src1 = tcg_temp_new_i32(); | 92 | TCGv_i32 src1 = tcg_temp_new_i32(); |
68 | 93 | ||
69 | tcg_gen_trunc_tl_i32(src1, get_gpr(s, a->rs1, EXT_NONE)); | 94 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
70 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | 95 | gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ |
71 | \ | 96 | gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ |
72 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | 97 | }; \ |
73 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | 98 | - \ |
74 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | 99 | + if (!opivx_check(s, a)) { \ |
75 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | 100 | + return false; \ |
76 | vreg_ofs(s, a->rs1), \ | 101 | + } \ |
77 | vreg_ofs(s, a->rs2), cpu_env, \ | 102 | return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ |
103 | } | ||
104 | |||
78 | -- | 105 | -- |
79 | 2.36.1 | 106 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: Dickon Hood <dickon.hood@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | No functional change intended in this commit. | 3 | Zvbb (implemented in later commit) has a widening instruction, which |
4 | requires an extra check on the enabled extensions. Refactor | ||
5 | GEN_OPIVX_WIDEN_TRANS() to take a check function to avoid reimplementing | ||
6 | it. | ||
4 | 7 | ||
5 | Signed-off-by: eop Chen <eop.chen@sifive.com> | 8 | Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk> |
6 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | 9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 10 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> |
8 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Signed-off-by: Max Chou <max.chou@sifive.com> |
9 | Message-Id: <165449614532.19704.7000832880482980398-1@git.sr.ht> | 12 | Message-ID: <20230711165917.2629866-7-max.chou@sifive.com> |
10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 13 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
11 | --- | 14 | --- |
12 | target/riscv/vector_helper.c | 1132 +++++++++++++++++----------------- | 15 | target/riscv/insn_trans/trans_rvv.c.inc | 52 +++++++++++-------------- |
13 | 1 file changed, 565 insertions(+), 567 deletions(-) | 16 | 1 file changed, 23 insertions(+), 29 deletions(-) |
14 | 17 | ||
15 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 18 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc |
16 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/vector_helper.c | 20 | --- a/target/riscv/insn_trans/trans_rvv.c.inc |
18 | +++ b/target/riscv/vector_helper.c | 21 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc |
19 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) | 22 | @@ -XXX,XX +XXX,XX @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) |
20 | 23 | vext_check_ds(s, a->rd, a->rs2, a->vm); | |
21 | static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, | ||
22 | CPURISCVState *env, uint32_t desc, | ||
23 | - uint32_t esz, uint32_t dsz, | ||
24 | opivv2_fn *fn) | ||
25 | { | ||
26 | uint32_t vm = vext_vm(desc); | ||
27 | @@ -XXX,XX +XXX,XX @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, | ||
28 | } | 24 | } |
29 | 25 | ||
30 | /* generate the helpers for OPIVV */ | 26 | -static bool do_opivx_widen(DisasContext *s, arg_rmrr *a, |
31 | -#define GEN_VEXT_VV(NAME, ESZ, DSZ) \ | 27 | - gen_helper_opivx *fn) |
32 | +#define GEN_VEXT_VV(NAME) \ | 28 | -{ |
33 | void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | 29 | - if (opivx_widen_check(s, a)) { |
34 | void *vs2, CPURISCVState *env, \ | 30 | - return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); |
35 | uint32_t desc) \ | 31 | - } |
36 | { \ | 32 | - return false; |
37 | - do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ | 33 | -} |
38 | + do_vext_vv(vd, v0, vs1, vs2, env, desc, \ | 34 | - |
39 | do_##NAME); \ | 35 | -#define GEN_OPIVX_WIDEN_TRANS(NAME) \ |
36 | -static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
37 | -{ \ | ||
38 | - static gen_helper_opivx * const fns[3] = { \ | ||
39 | - gen_helper_##NAME##_b, \ | ||
40 | - gen_helper_##NAME##_h, \ | ||
41 | - gen_helper_##NAME##_w \ | ||
42 | - }; \ | ||
43 | - return do_opivx_widen(s, a, fns[s->sew]); \ | ||
44 | +#define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \ | ||
45 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
46 | +{ \ | ||
47 | + if (CHECK(s, a)) { \ | ||
48 | + static gen_helper_opivx * const fns[3] = { \ | ||
49 | + gen_helper_##NAME##_b, \ | ||
50 | + gen_helper_##NAME##_h, \ | ||
51 | + gen_helper_##NAME##_w \ | ||
52 | + }; \ | ||
53 | + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); \ | ||
54 | + } \ | ||
55 | + return false; \ | ||
40 | } | 56 | } |
41 | 57 | ||
42 | -GEN_VEXT_VV(vadd_vv_b, 1, 1) | 58 | -GEN_OPIVX_WIDEN_TRANS(vwaddu_vx) |
43 | -GEN_VEXT_VV(vadd_vv_h, 2, 2) | 59 | -GEN_OPIVX_WIDEN_TRANS(vwadd_vx) |
44 | -GEN_VEXT_VV(vadd_vv_w, 4, 4) | 60 | -GEN_OPIVX_WIDEN_TRANS(vwsubu_vx) |
45 | -GEN_VEXT_VV(vadd_vv_d, 8, 8) | 61 | -GEN_OPIVX_WIDEN_TRANS(vwsub_vx) |
46 | -GEN_VEXT_VV(vsub_vv_b, 1, 1) | 62 | +GEN_OPIVX_WIDEN_TRANS(vwaddu_vx, opivx_widen_check) |
47 | -GEN_VEXT_VV(vsub_vv_h, 2, 2) | 63 | +GEN_OPIVX_WIDEN_TRANS(vwadd_vx, opivx_widen_check) |
48 | -GEN_VEXT_VV(vsub_vv_w, 4, 4) | 64 | +GEN_OPIVX_WIDEN_TRANS(vwsubu_vx, opivx_widen_check) |
49 | -GEN_VEXT_VV(vsub_vv_d, 8, 8) | 65 | +GEN_OPIVX_WIDEN_TRANS(vwsub_vx, opivx_widen_check) |
50 | +GEN_VEXT_VV(vadd_vv_b) | 66 | |
51 | +GEN_VEXT_VV(vadd_vv_h) | 67 | /* WIDEN OPIVV with WIDEN */ |
52 | +GEN_VEXT_VV(vadd_vv_w) | 68 | static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a) |
53 | +GEN_VEXT_VV(vadd_vv_d) | 69 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vrem_vx, opivx_check) |
54 | +GEN_VEXT_VV(vsub_vv_b) | 70 | GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check) |
55 | +GEN_VEXT_VV(vsub_vv_h) | 71 | GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check) |
56 | +GEN_VEXT_VV(vsub_vv_w) | 72 | GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check) |
57 | +GEN_VEXT_VV(vsub_vv_d) | 73 | -GEN_OPIVX_WIDEN_TRANS(vwmul_vx) |
58 | 74 | -GEN_OPIVX_WIDEN_TRANS(vwmulu_vx) | |
59 | typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); | 75 | -GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx) |
60 | 76 | +GEN_OPIVX_WIDEN_TRANS(vwmul_vx, opivx_widen_check) | |
61 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) | 77 | +GEN_OPIVX_WIDEN_TRANS(vwmulu_vx, opivx_widen_check) |
62 | 78 | +GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx, opivx_widen_check) | |
63 | static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, | ||
64 | CPURISCVState *env, uint32_t desc, | ||
65 | - uint32_t esz, uint32_t dsz, | ||
66 | opivx2_fn fn) | ||
67 | { | ||
68 | uint32_t vm = vext_vm(desc); | ||
69 | @@ -XXX,XX +XXX,XX @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, | ||
70 | } | ||
71 | |||
72 | /* generate the helpers for OPIVX */ | ||
73 | -#define GEN_VEXT_VX(NAME, ESZ, DSZ) \ | ||
74 | +#define GEN_VEXT_VX(NAME) \ | ||
75 | void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
76 | void *vs2, CPURISCVState *env, \ | ||
77 | uint32_t desc) \ | ||
78 | { \ | ||
79 | - do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ | ||
80 | + do_vext_vx(vd, v0, s1, vs2, env, desc, \ | ||
81 | do_##NAME); \ | ||
82 | } | ||
83 | |||
84 | -GEN_VEXT_VX(vadd_vx_b, 1, 1) | ||
85 | -GEN_VEXT_VX(vadd_vx_h, 2, 2) | ||
86 | -GEN_VEXT_VX(vadd_vx_w, 4, 4) | ||
87 | -GEN_VEXT_VX(vadd_vx_d, 8, 8) | ||
88 | -GEN_VEXT_VX(vsub_vx_b, 1, 1) | ||
89 | -GEN_VEXT_VX(vsub_vx_h, 2, 2) | ||
90 | -GEN_VEXT_VX(vsub_vx_w, 4, 4) | ||
91 | -GEN_VEXT_VX(vsub_vx_d, 8, 8) | ||
92 | -GEN_VEXT_VX(vrsub_vx_b, 1, 1) | ||
93 | -GEN_VEXT_VX(vrsub_vx_h, 2, 2) | ||
94 | -GEN_VEXT_VX(vrsub_vx_w, 4, 4) | ||
95 | -GEN_VEXT_VX(vrsub_vx_d, 8, 8) | ||
96 | +GEN_VEXT_VX(vadd_vx_b) | ||
97 | +GEN_VEXT_VX(vadd_vx_h) | ||
98 | +GEN_VEXT_VX(vadd_vx_w) | ||
99 | +GEN_VEXT_VX(vadd_vx_d) | ||
100 | +GEN_VEXT_VX(vsub_vx_b) | ||
101 | +GEN_VEXT_VX(vsub_vx_h) | ||
102 | +GEN_VEXT_VX(vsub_vx_w) | ||
103 | +GEN_VEXT_VX(vsub_vx_d) | ||
104 | +GEN_VEXT_VX(vrsub_vx_b) | ||
105 | +GEN_VEXT_VX(vrsub_vx_h) | ||
106 | +GEN_VEXT_VX(vrsub_vx_w) | ||
107 | +GEN_VEXT_VX(vrsub_vx_d) | ||
108 | |||
109 | void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) | ||
110 | { | ||
111 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) | ||
112 | RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) | ||
113 | RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) | ||
114 | RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) | ||
115 | -GEN_VEXT_VV(vwaddu_vv_b, 1, 2) | ||
116 | -GEN_VEXT_VV(vwaddu_vv_h, 2, 4) | ||
117 | -GEN_VEXT_VV(vwaddu_vv_w, 4, 8) | ||
118 | -GEN_VEXT_VV(vwsubu_vv_b, 1, 2) | ||
119 | -GEN_VEXT_VV(vwsubu_vv_h, 2, 4) | ||
120 | -GEN_VEXT_VV(vwsubu_vv_w, 4, 8) | ||
121 | -GEN_VEXT_VV(vwadd_vv_b, 1, 2) | ||
122 | -GEN_VEXT_VV(vwadd_vv_h, 2, 4) | ||
123 | -GEN_VEXT_VV(vwadd_vv_w, 4, 8) | ||
124 | -GEN_VEXT_VV(vwsub_vv_b, 1, 2) | ||
125 | -GEN_VEXT_VV(vwsub_vv_h, 2, 4) | ||
126 | -GEN_VEXT_VV(vwsub_vv_w, 4, 8) | ||
127 | -GEN_VEXT_VV(vwaddu_wv_b, 1, 2) | ||
128 | -GEN_VEXT_VV(vwaddu_wv_h, 2, 4) | ||
129 | -GEN_VEXT_VV(vwaddu_wv_w, 4, 8) | ||
130 | -GEN_VEXT_VV(vwsubu_wv_b, 1, 2) | ||
131 | -GEN_VEXT_VV(vwsubu_wv_h, 2, 4) | ||
132 | -GEN_VEXT_VV(vwsubu_wv_w, 4, 8) | ||
133 | -GEN_VEXT_VV(vwadd_wv_b, 1, 2) | ||
134 | -GEN_VEXT_VV(vwadd_wv_h, 2, 4) | ||
135 | -GEN_VEXT_VV(vwadd_wv_w, 4, 8) | ||
136 | -GEN_VEXT_VV(vwsub_wv_b, 1, 2) | ||
137 | -GEN_VEXT_VV(vwsub_wv_h, 2, 4) | ||
138 | -GEN_VEXT_VV(vwsub_wv_w, 4, 8) | ||
139 | +GEN_VEXT_VV(vwaddu_vv_b) | ||
140 | +GEN_VEXT_VV(vwaddu_vv_h) | ||
141 | +GEN_VEXT_VV(vwaddu_vv_w) | ||
142 | +GEN_VEXT_VV(vwsubu_vv_b) | ||
143 | +GEN_VEXT_VV(vwsubu_vv_h) | ||
144 | +GEN_VEXT_VV(vwsubu_vv_w) | ||
145 | +GEN_VEXT_VV(vwadd_vv_b) | ||
146 | +GEN_VEXT_VV(vwadd_vv_h) | ||
147 | +GEN_VEXT_VV(vwadd_vv_w) | ||
148 | +GEN_VEXT_VV(vwsub_vv_b) | ||
149 | +GEN_VEXT_VV(vwsub_vv_h) | ||
150 | +GEN_VEXT_VV(vwsub_vv_w) | ||
151 | +GEN_VEXT_VV(vwaddu_wv_b) | ||
152 | +GEN_VEXT_VV(vwaddu_wv_h) | ||
153 | +GEN_VEXT_VV(vwaddu_wv_w) | ||
154 | +GEN_VEXT_VV(vwsubu_wv_b) | ||
155 | +GEN_VEXT_VV(vwsubu_wv_h) | ||
156 | +GEN_VEXT_VV(vwsubu_wv_w) | ||
157 | +GEN_VEXT_VV(vwadd_wv_b) | ||
158 | +GEN_VEXT_VV(vwadd_wv_h) | ||
159 | +GEN_VEXT_VV(vwadd_wv_w) | ||
160 | +GEN_VEXT_VV(vwsub_wv_b) | ||
161 | +GEN_VEXT_VV(vwsub_wv_h) | ||
162 | +GEN_VEXT_VV(vwsub_wv_w) | ||
163 | |||
164 | RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) | ||
165 | RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) | ||
166 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) | ||
167 | RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) | ||
168 | RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) | ||
169 | RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) | ||
170 | -GEN_VEXT_VX(vwaddu_vx_b, 1, 2) | ||
171 | -GEN_VEXT_VX(vwaddu_vx_h, 2, 4) | ||
172 | -GEN_VEXT_VX(vwaddu_vx_w, 4, 8) | ||
173 | -GEN_VEXT_VX(vwsubu_vx_b, 1, 2) | ||
174 | -GEN_VEXT_VX(vwsubu_vx_h, 2, 4) | ||
175 | -GEN_VEXT_VX(vwsubu_vx_w, 4, 8) | ||
176 | -GEN_VEXT_VX(vwadd_vx_b, 1, 2) | ||
177 | -GEN_VEXT_VX(vwadd_vx_h, 2, 4) | ||
178 | -GEN_VEXT_VX(vwadd_vx_w, 4, 8) | ||
179 | -GEN_VEXT_VX(vwsub_vx_b, 1, 2) | ||
180 | -GEN_VEXT_VX(vwsub_vx_h, 2, 4) | ||
181 | -GEN_VEXT_VX(vwsub_vx_w, 4, 8) | ||
182 | -GEN_VEXT_VX(vwaddu_wx_b, 1, 2) | ||
183 | -GEN_VEXT_VX(vwaddu_wx_h, 2, 4) | ||
184 | -GEN_VEXT_VX(vwaddu_wx_w, 4, 8) | ||
185 | -GEN_VEXT_VX(vwsubu_wx_b, 1, 2) | ||
186 | -GEN_VEXT_VX(vwsubu_wx_h, 2, 4) | ||
187 | -GEN_VEXT_VX(vwsubu_wx_w, 4, 8) | ||
188 | -GEN_VEXT_VX(vwadd_wx_b, 1, 2) | ||
189 | -GEN_VEXT_VX(vwadd_wx_h, 2, 4) | ||
190 | -GEN_VEXT_VX(vwadd_wx_w, 4, 8) | ||
191 | -GEN_VEXT_VX(vwsub_wx_b, 1, 2) | ||
192 | -GEN_VEXT_VX(vwsub_wx_h, 2, 4) | ||
193 | -GEN_VEXT_VX(vwsub_wx_w, 4, 8) | ||
194 | +GEN_VEXT_VX(vwaddu_vx_b) | ||
195 | +GEN_VEXT_VX(vwaddu_vx_h) | ||
196 | +GEN_VEXT_VX(vwaddu_vx_w) | ||
197 | +GEN_VEXT_VX(vwsubu_vx_b) | ||
198 | +GEN_VEXT_VX(vwsubu_vx_h) | ||
199 | +GEN_VEXT_VX(vwsubu_vx_w) | ||
200 | +GEN_VEXT_VX(vwadd_vx_b) | ||
201 | +GEN_VEXT_VX(vwadd_vx_h) | ||
202 | +GEN_VEXT_VX(vwadd_vx_w) | ||
203 | +GEN_VEXT_VX(vwsub_vx_b) | ||
204 | +GEN_VEXT_VX(vwsub_vx_h) | ||
205 | +GEN_VEXT_VX(vwsub_vx_w) | ||
206 | +GEN_VEXT_VX(vwaddu_wx_b) | ||
207 | +GEN_VEXT_VX(vwaddu_wx_h) | ||
208 | +GEN_VEXT_VX(vwaddu_wx_w) | ||
209 | +GEN_VEXT_VX(vwsubu_wx_b) | ||
210 | +GEN_VEXT_VX(vwsubu_wx_h) | ||
211 | +GEN_VEXT_VX(vwsubu_wx_w) | ||
212 | +GEN_VEXT_VX(vwadd_wx_b) | ||
213 | +GEN_VEXT_VX(vwadd_wx_h) | ||
214 | +GEN_VEXT_VX(vwadd_wx_w) | ||
215 | +GEN_VEXT_VX(vwsub_wx_b) | ||
216 | +GEN_VEXT_VX(vwsub_wx_h) | ||
217 | +GEN_VEXT_VX(vwsub_wx_w) | ||
218 | |||
219 | /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ | ||
220 | #define DO_VADC(N, M, C) (N + M + C) | ||
221 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) | ||
222 | RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) | ||
223 | RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) | ||
224 | RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) | ||
225 | -GEN_VEXT_VV(vand_vv_b, 1, 1) | ||
226 | -GEN_VEXT_VV(vand_vv_h, 2, 2) | ||
227 | -GEN_VEXT_VV(vand_vv_w, 4, 4) | ||
228 | -GEN_VEXT_VV(vand_vv_d, 8, 8) | ||
229 | -GEN_VEXT_VV(vor_vv_b, 1, 1) | ||
230 | -GEN_VEXT_VV(vor_vv_h, 2, 2) | ||
231 | -GEN_VEXT_VV(vor_vv_w, 4, 4) | ||
232 | -GEN_VEXT_VV(vor_vv_d, 8, 8) | ||
233 | -GEN_VEXT_VV(vxor_vv_b, 1, 1) | ||
234 | -GEN_VEXT_VV(vxor_vv_h, 2, 2) | ||
235 | -GEN_VEXT_VV(vxor_vv_w, 4, 4) | ||
236 | -GEN_VEXT_VV(vxor_vv_d, 8, 8) | ||
237 | +GEN_VEXT_VV(vand_vv_b) | ||
238 | +GEN_VEXT_VV(vand_vv_h) | ||
239 | +GEN_VEXT_VV(vand_vv_w) | ||
240 | +GEN_VEXT_VV(vand_vv_d) | ||
241 | +GEN_VEXT_VV(vor_vv_b) | ||
242 | +GEN_VEXT_VV(vor_vv_h) | ||
243 | +GEN_VEXT_VV(vor_vv_w) | ||
244 | +GEN_VEXT_VV(vor_vv_d) | ||
245 | +GEN_VEXT_VV(vxor_vv_b) | ||
246 | +GEN_VEXT_VV(vxor_vv_h) | ||
247 | +GEN_VEXT_VV(vxor_vv_w) | ||
248 | +GEN_VEXT_VV(vxor_vv_d) | ||
249 | |||
250 | RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) | ||
251 | RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) | ||
252 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) | ||
253 | RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) | ||
254 | RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) | ||
255 | RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) | ||
256 | -GEN_VEXT_VX(vand_vx_b, 1, 1) | ||
257 | -GEN_VEXT_VX(vand_vx_h, 2, 2) | ||
258 | -GEN_VEXT_VX(vand_vx_w, 4, 4) | ||
259 | -GEN_VEXT_VX(vand_vx_d, 8, 8) | ||
260 | -GEN_VEXT_VX(vor_vx_b, 1, 1) | ||
261 | -GEN_VEXT_VX(vor_vx_h, 2, 2) | ||
262 | -GEN_VEXT_VX(vor_vx_w, 4, 4) | ||
263 | -GEN_VEXT_VX(vor_vx_d, 8, 8) | ||
264 | -GEN_VEXT_VX(vxor_vx_b, 1, 1) | ||
265 | -GEN_VEXT_VX(vxor_vx_h, 2, 2) | ||
266 | -GEN_VEXT_VX(vxor_vx_w, 4, 4) | ||
267 | -GEN_VEXT_VX(vxor_vx_d, 8, 8) | ||
268 | +GEN_VEXT_VX(vand_vx_b) | ||
269 | +GEN_VEXT_VX(vand_vx_h) | ||
270 | +GEN_VEXT_VX(vand_vx_w) | ||
271 | +GEN_VEXT_VX(vand_vx_d) | ||
272 | +GEN_VEXT_VX(vor_vx_b) | ||
273 | +GEN_VEXT_VX(vor_vx_h) | ||
274 | +GEN_VEXT_VX(vor_vx_w) | ||
275 | +GEN_VEXT_VX(vor_vx_d) | ||
276 | +GEN_VEXT_VX(vxor_vx_b) | ||
277 | +GEN_VEXT_VX(vxor_vx_h) | ||
278 | +GEN_VEXT_VX(vxor_vx_w) | ||
279 | +GEN_VEXT_VX(vxor_vx_d) | ||
280 | |||
281 | /* Vector Single-Width Bit Shift Instructions */ | ||
282 | #define DO_SLL(N, M) (N << (M)) | ||
283 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) | ||
284 | RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) | ||
285 | RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) | ||
286 | RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) | ||
287 | -GEN_VEXT_VV(vminu_vv_b, 1, 1) | ||
288 | -GEN_VEXT_VV(vminu_vv_h, 2, 2) | ||
289 | -GEN_VEXT_VV(vminu_vv_w, 4, 4) | ||
290 | -GEN_VEXT_VV(vminu_vv_d, 8, 8) | ||
291 | -GEN_VEXT_VV(vmin_vv_b, 1, 1) | ||
292 | -GEN_VEXT_VV(vmin_vv_h, 2, 2) | ||
293 | -GEN_VEXT_VV(vmin_vv_w, 4, 4) | ||
294 | -GEN_VEXT_VV(vmin_vv_d, 8, 8) | ||
295 | -GEN_VEXT_VV(vmaxu_vv_b, 1, 1) | ||
296 | -GEN_VEXT_VV(vmaxu_vv_h, 2, 2) | ||
297 | -GEN_VEXT_VV(vmaxu_vv_w, 4, 4) | ||
298 | -GEN_VEXT_VV(vmaxu_vv_d, 8, 8) | ||
299 | -GEN_VEXT_VV(vmax_vv_b, 1, 1) | ||
300 | -GEN_VEXT_VV(vmax_vv_h, 2, 2) | ||
301 | -GEN_VEXT_VV(vmax_vv_w, 4, 4) | ||
302 | -GEN_VEXT_VV(vmax_vv_d, 8, 8) | ||
303 | +GEN_VEXT_VV(vminu_vv_b) | ||
304 | +GEN_VEXT_VV(vminu_vv_h) | ||
305 | +GEN_VEXT_VV(vminu_vv_w) | ||
306 | +GEN_VEXT_VV(vminu_vv_d) | ||
307 | +GEN_VEXT_VV(vmin_vv_b) | ||
308 | +GEN_VEXT_VV(vmin_vv_h) | ||
309 | +GEN_VEXT_VV(vmin_vv_w) | ||
310 | +GEN_VEXT_VV(vmin_vv_d) | ||
311 | +GEN_VEXT_VV(vmaxu_vv_b) | ||
312 | +GEN_VEXT_VV(vmaxu_vv_h) | ||
313 | +GEN_VEXT_VV(vmaxu_vv_w) | ||
314 | +GEN_VEXT_VV(vmaxu_vv_d) | ||
315 | +GEN_VEXT_VV(vmax_vv_b) | ||
316 | +GEN_VEXT_VV(vmax_vv_h) | ||
317 | +GEN_VEXT_VV(vmax_vv_w) | ||
318 | +GEN_VEXT_VV(vmax_vv_d) | ||
319 | |||
320 | RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) | ||
321 | RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) | ||
322 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) | ||
323 | RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) | ||
324 | RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) | ||
325 | RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) | ||
326 | -GEN_VEXT_VX(vminu_vx_b, 1, 1) | ||
327 | -GEN_VEXT_VX(vminu_vx_h, 2, 2) | ||
328 | -GEN_VEXT_VX(vminu_vx_w, 4, 4) | ||
329 | -GEN_VEXT_VX(vminu_vx_d, 8, 8) | ||
330 | -GEN_VEXT_VX(vmin_vx_b, 1, 1) | ||
331 | -GEN_VEXT_VX(vmin_vx_h, 2, 2) | ||
332 | -GEN_VEXT_VX(vmin_vx_w, 4, 4) | ||
333 | -GEN_VEXT_VX(vmin_vx_d, 8, 8) | ||
334 | -GEN_VEXT_VX(vmaxu_vx_b, 1, 1) | ||
335 | -GEN_VEXT_VX(vmaxu_vx_h, 2, 2) | ||
336 | -GEN_VEXT_VX(vmaxu_vx_w, 4, 4) | ||
337 | -GEN_VEXT_VX(vmaxu_vx_d, 8, 8) | ||
338 | -GEN_VEXT_VX(vmax_vx_b, 1, 1) | ||
339 | -GEN_VEXT_VX(vmax_vx_h, 2, 2) | ||
340 | -GEN_VEXT_VX(vmax_vx_w, 4, 4) | ||
341 | -GEN_VEXT_VX(vmax_vx_d, 8, 8) | ||
342 | +GEN_VEXT_VX(vminu_vx_b) | ||
343 | +GEN_VEXT_VX(vminu_vx_h) | ||
344 | +GEN_VEXT_VX(vminu_vx_w) | ||
345 | +GEN_VEXT_VX(vminu_vx_d) | ||
346 | +GEN_VEXT_VX(vmin_vx_b) | ||
347 | +GEN_VEXT_VX(vmin_vx_h) | ||
348 | +GEN_VEXT_VX(vmin_vx_w) | ||
349 | +GEN_VEXT_VX(vmin_vx_d) | ||
350 | +GEN_VEXT_VX(vmaxu_vx_b) | ||
351 | +GEN_VEXT_VX(vmaxu_vx_h) | ||
352 | +GEN_VEXT_VX(vmaxu_vx_w) | ||
353 | +GEN_VEXT_VX(vmaxu_vx_d) | ||
354 | +GEN_VEXT_VX(vmax_vx_b) | ||
355 | +GEN_VEXT_VX(vmax_vx_h) | ||
356 | +GEN_VEXT_VX(vmax_vx_w) | ||
357 | +GEN_VEXT_VX(vmax_vx_d) | ||
358 | |||
359 | /* Vector Single-Width Integer Multiply Instructions */ | ||
360 | #define DO_MUL(N, M) (N * M) | ||
361 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) | ||
362 | RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) | ||
363 | RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) | ||
364 | RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) | ||
365 | -GEN_VEXT_VV(vmul_vv_b, 1, 1) | ||
366 | -GEN_VEXT_VV(vmul_vv_h, 2, 2) | ||
367 | -GEN_VEXT_VV(vmul_vv_w, 4, 4) | ||
368 | -GEN_VEXT_VV(vmul_vv_d, 8, 8) | ||
369 | +GEN_VEXT_VV(vmul_vv_b) | ||
370 | +GEN_VEXT_VV(vmul_vv_h) | ||
371 | +GEN_VEXT_VV(vmul_vv_w) | ||
372 | +GEN_VEXT_VV(vmul_vv_d) | ||
373 | |||
374 | static int8_t do_mulh_b(int8_t s2, int8_t s1) | ||
375 | { | ||
376 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) | ||
377 | RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) | ||
378 | RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) | ||
379 | RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) | ||
380 | -GEN_VEXT_VV(vmulh_vv_b, 1, 1) | ||
381 | -GEN_VEXT_VV(vmulh_vv_h, 2, 2) | ||
382 | -GEN_VEXT_VV(vmulh_vv_w, 4, 4) | ||
383 | -GEN_VEXT_VV(vmulh_vv_d, 8, 8) | ||
384 | -GEN_VEXT_VV(vmulhu_vv_b, 1, 1) | ||
385 | -GEN_VEXT_VV(vmulhu_vv_h, 2, 2) | ||
386 | -GEN_VEXT_VV(vmulhu_vv_w, 4, 4) | ||
387 | -GEN_VEXT_VV(vmulhu_vv_d, 8, 8) | ||
388 | -GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) | ||
389 | -GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) | ||
390 | -GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) | ||
391 | -GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) | ||
392 | +GEN_VEXT_VV(vmulh_vv_b) | ||
393 | +GEN_VEXT_VV(vmulh_vv_h) | ||
394 | +GEN_VEXT_VV(vmulh_vv_w) | ||
395 | +GEN_VEXT_VV(vmulh_vv_d) | ||
396 | +GEN_VEXT_VV(vmulhu_vv_b) | ||
397 | +GEN_VEXT_VV(vmulhu_vv_h) | ||
398 | +GEN_VEXT_VV(vmulhu_vv_w) | ||
399 | +GEN_VEXT_VV(vmulhu_vv_d) | ||
400 | +GEN_VEXT_VV(vmulhsu_vv_b) | ||
401 | +GEN_VEXT_VV(vmulhsu_vv_h) | ||
402 | +GEN_VEXT_VV(vmulhsu_vv_w) | ||
403 | +GEN_VEXT_VV(vmulhsu_vv_d) | ||
404 | |||
405 | RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) | ||
406 | RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) | ||
407 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) | ||
408 | RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) | ||
409 | RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) | ||
410 | RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) | ||
411 | -GEN_VEXT_VX(vmul_vx_b, 1, 1) | ||
412 | -GEN_VEXT_VX(vmul_vx_h, 2, 2) | ||
413 | -GEN_VEXT_VX(vmul_vx_w, 4, 4) | ||
414 | -GEN_VEXT_VX(vmul_vx_d, 8, 8) | ||
415 | -GEN_VEXT_VX(vmulh_vx_b, 1, 1) | ||
416 | -GEN_VEXT_VX(vmulh_vx_h, 2, 2) | ||
417 | -GEN_VEXT_VX(vmulh_vx_w, 4, 4) | ||
418 | -GEN_VEXT_VX(vmulh_vx_d, 8, 8) | ||
419 | -GEN_VEXT_VX(vmulhu_vx_b, 1, 1) | ||
420 | -GEN_VEXT_VX(vmulhu_vx_h, 2, 2) | ||
421 | -GEN_VEXT_VX(vmulhu_vx_w, 4, 4) | ||
422 | -GEN_VEXT_VX(vmulhu_vx_d, 8, 8) | ||
423 | -GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) | ||
424 | -GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) | ||
425 | -GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) | ||
426 | -GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) | ||
427 | +GEN_VEXT_VX(vmul_vx_b) | ||
428 | +GEN_VEXT_VX(vmul_vx_h) | ||
429 | +GEN_VEXT_VX(vmul_vx_w) | ||
430 | +GEN_VEXT_VX(vmul_vx_d) | ||
431 | +GEN_VEXT_VX(vmulh_vx_b) | ||
432 | +GEN_VEXT_VX(vmulh_vx_h) | ||
433 | +GEN_VEXT_VX(vmulh_vx_w) | ||
434 | +GEN_VEXT_VX(vmulh_vx_d) | ||
435 | +GEN_VEXT_VX(vmulhu_vx_b) | ||
436 | +GEN_VEXT_VX(vmulhu_vx_h) | ||
437 | +GEN_VEXT_VX(vmulhu_vx_w) | ||
438 | +GEN_VEXT_VX(vmulhu_vx_d) | ||
439 | +GEN_VEXT_VX(vmulhsu_vx_b) | ||
440 | +GEN_VEXT_VX(vmulhsu_vx_h) | ||
441 | +GEN_VEXT_VX(vmulhsu_vx_w) | ||
442 | +GEN_VEXT_VX(vmulhsu_vx_d) | ||
443 | |||
444 | /* Vector Integer Divide Instructions */ | ||
445 | #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) | ||
446 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) | ||
447 | RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) | ||
448 | RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) | ||
449 | RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) | ||
450 | -GEN_VEXT_VV(vdivu_vv_b, 1, 1) | ||
451 | -GEN_VEXT_VV(vdivu_vv_h, 2, 2) | ||
452 | -GEN_VEXT_VV(vdivu_vv_w, 4, 4) | ||
453 | -GEN_VEXT_VV(vdivu_vv_d, 8, 8) | ||
454 | -GEN_VEXT_VV(vdiv_vv_b, 1, 1) | ||
455 | -GEN_VEXT_VV(vdiv_vv_h, 2, 2) | ||
456 | -GEN_VEXT_VV(vdiv_vv_w, 4, 4) | ||
457 | -GEN_VEXT_VV(vdiv_vv_d, 8, 8) | ||
458 | -GEN_VEXT_VV(vremu_vv_b, 1, 1) | ||
459 | -GEN_VEXT_VV(vremu_vv_h, 2, 2) | ||
460 | -GEN_VEXT_VV(vremu_vv_w, 4, 4) | ||
461 | -GEN_VEXT_VV(vremu_vv_d, 8, 8) | ||
462 | -GEN_VEXT_VV(vrem_vv_b, 1, 1) | ||
463 | -GEN_VEXT_VV(vrem_vv_h, 2, 2) | ||
464 | -GEN_VEXT_VV(vrem_vv_w, 4, 4) | ||
465 | -GEN_VEXT_VV(vrem_vv_d, 8, 8) | ||
466 | +GEN_VEXT_VV(vdivu_vv_b) | ||
467 | +GEN_VEXT_VV(vdivu_vv_h) | ||
468 | +GEN_VEXT_VV(vdivu_vv_w) | ||
469 | +GEN_VEXT_VV(vdivu_vv_d) | ||
470 | +GEN_VEXT_VV(vdiv_vv_b) | ||
471 | +GEN_VEXT_VV(vdiv_vv_h) | ||
472 | +GEN_VEXT_VV(vdiv_vv_w) | ||
473 | +GEN_VEXT_VV(vdiv_vv_d) | ||
474 | +GEN_VEXT_VV(vremu_vv_b) | ||
475 | +GEN_VEXT_VV(vremu_vv_h) | ||
476 | +GEN_VEXT_VV(vremu_vv_w) | ||
477 | +GEN_VEXT_VV(vremu_vv_d) | ||
478 | +GEN_VEXT_VV(vrem_vv_b) | ||
479 | +GEN_VEXT_VV(vrem_vv_h) | ||
480 | +GEN_VEXT_VV(vrem_vv_w) | ||
481 | +GEN_VEXT_VV(vrem_vv_d) | ||
482 | |||
483 | RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) | ||
484 | RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) | ||
485 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) | ||
486 | RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) | ||
487 | RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) | ||
488 | RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) | ||
489 | -GEN_VEXT_VX(vdivu_vx_b, 1, 1) | ||
490 | -GEN_VEXT_VX(vdivu_vx_h, 2, 2) | ||
491 | -GEN_VEXT_VX(vdivu_vx_w, 4, 4) | ||
492 | -GEN_VEXT_VX(vdivu_vx_d, 8, 8) | ||
493 | -GEN_VEXT_VX(vdiv_vx_b, 1, 1) | ||
494 | -GEN_VEXT_VX(vdiv_vx_h, 2, 2) | ||
495 | -GEN_VEXT_VX(vdiv_vx_w, 4, 4) | ||
496 | -GEN_VEXT_VX(vdiv_vx_d, 8, 8) | ||
497 | -GEN_VEXT_VX(vremu_vx_b, 1, 1) | ||
498 | -GEN_VEXT_VX(vremu_vx_h, 2, 2) | ||
499 | -GEN_VEXT_VX(vremu_vx_w, 4, 4) | ||
500 | -GEN_VEXT_VX(vremu_vx_d, 8, 8) | ||
501 | -GEN_VEXT_VX(vrem_vx_b, 1, 1) | ||
502 | -GEN_VEXT_VX(vrem_vx_h, 2, 2) | ||
503 | -GEN_VEXT_VX(vrem_vx_w, 4, 4) | ||
504 | -GEN_VEXT_VX(vrem_vx_d, 8, 8) | ||
505 | +GEN_VEXT_VX(vdivu_vx_b) | ||
506 | +GEN_VEXT_VX(vdivu_vx_h) | ||
507 | +GEN_VEXT_VX(vdivu_vx_w) | ||
508 | +GEN_VEXT_VX(vdivu_vx_d) | ||
509 | +GEN_VEXT_VX(vdiv_vx_b) | ||
510 | +GEN_VEXT_VX(vdiv_vx_h) | ||
511 | +GEN_VEXT_VX(vdiv_vx_w) | ||
512 | +GEN_VEXT_VX(vdiv_vx_d) | ||
513 | +GEN_VEXT_VX(vremu_vx_b) | ||
514 | +GEN_VEXT_VX(vremu_vx_h) | ||
515 | +GEN_VEXT_VX(vremu_vx_w) | ||
516 | +GEN_VEXT_VX(vremu_vx_d) | ||
517 | +GEN_VEXT_VX(vrem_vx_b) | ||
518 | +GEN_VEXT_VX(vrem_vx_h) | ||
519 | +GEN_VEXT_VX(vrem_vx_w) | ||
520 | +GEN_VEXT_VX(vrem_vx_d) | ||
521 | |||
522 | /* Vector Widening Integer Multiply Instructions */ | ||
523 | RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) | ||
524 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) | ||
525 | RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) | ||
526 | RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) | ||
527 | RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) | ||
528 | -GEN_VEXT_VV(vwmul_vv_b, 1, 2) | ||
529 | -GEN_VEXT_VV(vwmul_vv_h, 2, 4) | ||
530 | -GEN_VEXT_VV(vwmul_vv_w, 4, 8) | ||
531 | -GEN_VEXT_VV(vwmulu_vv_b, 1, 2) | ||
532 | -GEN_VEXT_VV(vwmulu_vv_h, 2, 4) | ||
533 | -GEN_VEXT_VV(vwmulu_vv_w, 4, 8) | ||
534 | -GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) | ||
535 | -GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) | ||
536 | -GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) | ||
537 | +GEN_VEXT_VV(vwmul_vv_b) | ||
538 | +GEN_VEXT_VV(vwmul_vv_h) | ||
539 | +GEN_VEXT_VV(vwmul_vv_w) | ||
540 | +GEN_VEXT_VV(vwmulu_vv_b) | ||
541 | +GEN_VEXT_VV(vwmulu_vv_h) | ||
542 | +GEN_VEXT_VV(vwmulu_vv_w) | ||
543 | +GEN_VEXT_VV(vwmulsu_vv_b) | ||
544 | +GEN_VEXT_VV(vwmulsu_vv_h) | ||
545 | +GEN_VEXT_VV(vwmulsu_vv_w) | ||
546 | |||
547 | RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) | ||
548 | RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) | ||
549 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) | ||
550 | RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) | ||
551 | RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) | ||
552 | RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) | ||
553 | -GEN_VEXT_VX(vwmul_vx_b, 1, 2) | ||
554 | -GEN_VEXT_VX(vwmul_vx_h, 2, 4) | ||
555 | -GEN_VEXT_VX(vwmul_vx_w, 4, 8) | ||
556 | -GEN_VEXT_VX(vwmulu_vx_b, 1, 2) | ||
557 | -GEN_VEXT_VX(vwmulu_vx_h, 2, 4) | ||
558 | -GEN_VEXT_VX(vwmulu_vx_w, 4, 8) | ||
559 | -GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) | ||
560 | -GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) | ||
561 | -GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) | ||
562 | +GEN_VEXT_VX(vwmul_vx_b) | ||
563 | +GEN_VEXT_VX(vwmul_vx_h) | ||
564 | +GEN_VEXT_VX(vwmul_vx_w) | ||
565 | +GEN_VEXT_VX(vwmulu_vx_b) | ||
566 | +GEN_VEXT_VX(vwmulu_vx_h) | ||
567 | +GEN_VEXT_VX(vwmulu_vx_w) | ||
568 | +GEN_VEXT_VX(vwmulsu_vx_b) | ||
569 | +GEN_VEXT_VX(vwmulsu_vx_h) | ||
570 | +GEN_VEXT_VX(vwmulsu_vx_w) | ||
571 | 79 | ||
572 | /* Vector Single-Width Integer Multiply-Add Instructions */ | 80 | /* Vector Single-Width Integer Multiply-Add Instructions */ |
573 | #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | 81 | GEN_OPIVV_TRANS(vmacc_vv, opivv_check) |
574 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) | 82 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) |
575 | RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) | 83 | GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check) |
576 | RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) | 84 | GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check) |
577 | RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) | 85 | GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check) |
578 | -GEN_VEXT_VV(vmacc_vv_b, 1, 1) | 86 | -GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx) |
579 | -GEN_VEXT_VV(vmacc_vv_h, 2, 2) | 87 | -GEN_OPIVX_WIDEN_TRANS(vwmacc_vx) |
580 | -GEN_VEXT_VV(vmacc_vv_w, 4, 4) | 88 | -GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx) |
581 | -GEN_VEXT_VV(vmacc_vv_d, 8, 8) | 89 | -GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx) |
582 | -GEN_VEXT_VV(vnmsac_vv_b, 1, 1) | 90 | +GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_widen_check) |
583 | -GEN_VEXT_VV(vnmsac_vv_h, 2, 2) | 91 | +GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_widen_check) |
584 | -GEN_VEXT_VV(vnmsac_vv_w, 4, 4) | 92 | +GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_widen_check) |
585 | -GEN_VEXT_VV(vnmsac_vv_d, 8, 8) | 93 | +GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_widen_check) |
586 | -GEN_VEXT_VV(vmadd_vv_b, 1, 1) | ||
587 | -GEN_VEXT_VV(vmadd_vv_h, 2, 2) | ||
588 | -GEN_VEXT_VV(vmadd_vv_w, 4, 4) | ||
589 | -GEN_VEXT_VV(vmadd_vv_d, 8, 8) | ||
590 | -GEN_VEXT_VV(vnmsub_vv_b, 1, 1) | ||
591 | -GEN_VEXT_VV(vnmsub_vv_h, 2, 2) | ||
592 | -GEN_VEXT_VV(vnmsub_vv_w, 4, 4) | ||
593 | -GEN_VEXT_VV(vnmsub_vv_d, 8, 8) | ||
594 | +GEN_VEXT_VV(vmacc_vv_b) | ||
595 | +GEN_VEXT_VV(vmacc_vv_h) | ||
596 | +GEN_VEXT_VV(vmacc_vv_w) | ||
597 | +GEN_VEXT_VV(vmacc_vv_d) | ||
598 | +GEN_VEXT_VV(vnmsac_vv_b) | ||
599 | +GEN_VEXT_VV(vnmsac_vv_h) | ||
600 | +GEN_VEXT_VV(vnmsac_vv_w) | ||
601 | +GEN_VEXT_VV(vnmsac_vv_d) | ||
602 | +GEN_VEXT_VV(vmadd_vv_b) | ||
603 | +GEN_VEXT_VV(vmadd_vv_h) | ||
604 | +GEN_VEXT_VV(vmadd_vv_w) | ||
605 | +GEN_VEXT_VV(vmadd_vv_d) | ||
606 | +GEN_VEXT_VV(vnmsub_vv_b) | ||
607 | +GEN_VEXT_VV(vnmsub_vv_h) | ||
608 | +GEN_VEXT_VV(vnmsub_vv_w) | ||
609 | +GEN_VEXT_VV(vnmsub_vv_d) | ||
610 | |||
611 | #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
612 | static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ | ||
613 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) | ||
614 | RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) | ||
615 | RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) | ||
616 | RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) | ||
617 | -GEN_VEXT_VX(vmacc_vx_b, 1, 1) | ||
618 | -GEN_VEXT_VX(vmacc_vx_h, 2, 2) | ||
619 | -GEN_VEXT_VX(vmacc_vx_w, 4, 4) | ||
620 | -GEN_VEXT_VX(vmacc_vx_d, 8, 8) | ||
621 | -GEN_VEXT_VX(vnmsac_vx_b, 1, 1) | ||
622 | -GEN_VEXT_VX(vnmsac_vx_h, 2, 2) | ||
623 | -GEN_VEXT_VX(vnmsac_vx_w, 4, 4) | ||
624 | -GEN_VEXT_VX(vnmsac_vx_d, 8, 8) | ||
625 | -GEN_VEXT_VX(vmadd_vx_b, 1, 1) | ||
626 | -GEN_VEXT_VX(vmadd_vx_h, 2, 2) | ||
627 | -GEN_VEXT_VX(vmadd_vx_w, 4, 4) | ||
628 | -GEN_VEXT_VX(vmadd_vx_d, 8, 8) | ||
629 | -GEN_VEXT_VX(vnmsub_vx_b, 1, 1) | ||
630 | -GEN_VEXT_VX(vnmsub_vx_h, 2, 2) | ||
631 | -GEN_VEXT_VX(vnmsub_vx_w, 4, 4) | ||
632 | -GEN_VEXT_VX(vnmsub_vx_d, 8, 8) | ||
633 | +GEN_VEXT_VX(vmacc_vx_b) | ||
634 | +GEN_VEXT_VX(vmacc_vx_h) | ||
635 | +GEN_VEXT_VX(vmacc_vx_w) | ||
636 | +GEN_VEXT_VX(vmacc_vx_d) | ||
637 | +GEN_VEXT_VX(vnmsac_vx_b) | ||
638 | +GEN_VEXT_VX(vnmsac_vx_h) | ||
639 | +GEN_VEXT_VX(vnmsac_vx_w) | ||
640 | +GEN_VEXT_VX(vnmsac_vx_d) | ||
641 | +GEN_VEXT_VX(vmadd_vx_b) | ||
642 | +GEN_VEXT_VX(vmadd_vx_h) | ||
643 | +GEN_VEXT_VX(vmadd_vx_w) | ||
644 | +GEN_VEXT_VX(vmadd_vx_d) | ||
645 | +GEN_VEXT_VX(vnmsub_vx_b) | ||
646 | +GEN_VEXT_VX(vnmsub_vx_h) | ||
647 | +GEN_VEXT_VX(vnmsub_vx_w) | ||
648 | +GEN_VEXT_VX(vnmsub_vx_d) | ||
649 | |||
650 | /* Vector Widening Integer Multiply-Add Instructions */ | ||
651 | RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) | ||
652 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) | ||
653 | RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) | ||
654 | RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) | ||
655 | RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) | ||
656 | -GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) | ||
657 | -GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) | ||
658 | -GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) | ||
659 | -GEN_VEXT_VV(vwmacc_vv_b, 1, 2) | ||
660 | -GEN_VEXT_VV(vwmacc_vv_h, 2, 4) | ||
661 | -GEN_VEXT_VV(vwmacc_vv_w, 4, 8) | ||
662 | -GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) | ||
663 | -GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) | ||
664 | -GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) | ||
665 | +GEN_VEXT_VV(vwmaccu_vv_b) | ||
666 | +GEN_VEXT_VV(vwmaccu_vv_h) | ||
667 | +GEN_VEXT_VV(vwmaccu_vv_w) | ||
668 | +GEN_VEXT_VV(vwmacc_vv_b) | ||
669 | +GEN_VEXT_VV(vwmacc_vv_h) | ||
670 | +GEN_VEXT_VV(vwmacc_vv_w) | ||
671 | +GEN_VEXT_VV(vwmaccsu_vv_b) | ||
672 | +GEN_VEXT_VV(vwmaccsu_vv_h) | ||
673 | +GEN_VEXT_VV(vwmaccsu_vv_w) | ||
674 | |||
675 | RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) | ||
676 | RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) | ||
677 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) | ||
678 | RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) | ||
679 | RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) | ||
680 | RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) | ||
681 | -GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) | ||
682 | -GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) | ||
683 | -GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) | ||
684 | -GEN_VEXT_VX(vwmacc_vx_b, 1, 2) | ||
685 | -GEN_VEXT_VX(vwmacc_vx_h, 2, 4) | ||
686 | -GEN_VEXT_VX(vwmacc_vx_w, 4, 8) | ||
687 | -GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) | ||
688 | -GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) | ||
689 | -GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) | ||
690 | -GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) | ||
691 | -GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) | ||
692 | -GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) | ||
693 | +GEN_VEXT_VX(vwmaccu_vx_b) | ||
694 | +GEN_VEXT_VX(vwmaccu_vx_h) | ||
695 | +GEN_VEXT_VX(vwmaccu_vx_w) | ||
696 | +GEN_VEXT_VX(vwmacc_vx_b) | ||
697 | +GEN_VEXT_VX(vwmacc_vx_h) | ||
698 | +GEN_VEXT_VX(vwmacc_vx_w) | ||
699 | +GEN_VEXT_VX(vwmaccsu_vx_b) | ||
700 | +GEN_VEXT_VX(vwmaccsu_vx_h) | ||
701 | +GEN_VEXT_VX(vwmaccsu_vx_w) | ||
702 | +GEN_VEXT_VX(vwmaccus_vx_b) | ||
703 | +GEN_VEXT_VX(vwmaccus_vx_h) | ||
704 | +GEN_VEXT_VX(vwmaccus_vx_w) | ||
705 | 94 | ||
706 | /* Vector Integer Merge and Move Instructions */ | 95 | /* Vector Integer Merge and Move Instructions */ |
707 | #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ | 96 | static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) |
708 | @@ -XXX,XX +XXX,XX @@ vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, | ||
709 | static inline void | ||
710 | vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, | ||
711 | CPURISCVState *env, | ||
712 | - uint32_t desc, uint32_t esz, uint32_t dsz, | ||
713 | + uint32_t desc, | ||
714 | opivv2_rm_fn *fn) | ||
715 | { | ||
716 | uint32_t vm = vext_vm(desc); | ||
717 | @@ -XXX,XX +XXX,XX @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, | ||
718 | } | ||
719 | |||
720 | /* generate helpers for fixed point instructions with OPIVV format */ | ||
721 | -#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ | ||
722 | +#define GEN_VEXT_VV_RM(NAME) \ | ||
723 | void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
724 | CPURISCVState *env, uint32_t desc) \ | ||
725 | { \ | ||
726 | - vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ | ||
727 | + vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ | ||
728 | do_##NAME); \ | ||
729 | } | ||
730 | |||
731 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) | ||
732 | RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) | ||
733 | RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) | ||
734 | RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) | ||
735 | -GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) | ||
736 | -GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) | ||
737 | -GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) | ||
738 | -GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) | ||
739 | +GEN_VEXT_VV_RM(vsaddu_vv_b) | ||
740 | +GEN_VEXT_VV_RM(vsaddu_vv_h) | ||
741 | +GEN_VEXT_VV_RM(vsaddu_vv_w) | ||
742 | +GEN_VEXT_VV_RM(vsaddu_vv_d) | ||
743 | |||
744 | typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, | ||
745 | CPURISCVState *env, int vxrm); | ||
746 | @@ -XXX,XX +XXX,XX @@ vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, | ||
747 | static inline void | ||
748 | vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, | ||
749 | CPURISCVState *env, | ||
750 | - uint32_t desc, uint32_t esz, uint32_t dsz, | ||
751 | + uint32_t desc, | ||
752 | opivx2_rm_fn *fn) | ||
753 | { | ||
754 | uint32_t vm = vext_vm(desc); | ||
755 | @@ -XXX,XX +XXX,XX @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, | ||
756 | } | ||
757 | |||
758 | /* generate helpers for fixed point instructions with OPIVX format */ | ||
759 | -#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ | ||
760 | +#define GEN_VEXT_VX_RM(NAME) \ | ||
761 | void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
762 | void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
763 | { \ | ||
764 | - vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ | ||
765 | + vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ | ||
766 | do_##NAME); \ | ||
767 | } | ||
768 | |||
769 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) | ||
770 | RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) | ||
771 | RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) | ||
772 | RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) | ||
773 | -GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) | ||
774 | -GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) | ||
775 | -GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) | ||
776 | -GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) | ||
777 | +GEN_VEXT_VX_RM(vsaddu_vx_b) | ||
778 | +GEN_VEXT_VX_RM(vsaddu_vx_h) | ||
779 | +GEN_VEXT_VX_RM(vsaddu_vx_w) | ||
780 | +GEN_VEXT_VX_RM(vsaddu_vx_d) | ||
781 | |||
782 | static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
783 | { | ||
784 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) | ||
785 | RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) | ||
786 | RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) | ||
787 | RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) | ||
788 | -GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) | ||
789 | -GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) | ||
790 | -GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) | ||
791 | -GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) | ||
792 | +GEN_VEXT_VV_RM(vsadd_vv_b) | ||
793 | +GEN_VEXT_VV_RM(vsadd_vv_h) | ||
794 | +GEN_VEXT_VV_RM(vsadd_vv_w) | ||
795 | +GEN_VEXT_VV_RM(vsadd_vv_d) | ||
796 | |||
797 | RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) | ||
798 | RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) | ||
799 | RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) | ||
800 | RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) | ||
801 | -GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) | ||
802 | -GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) | ||
803 | -GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) | ||
804 | -GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) | ||
805 | +GEN_VEXT_VX_RM(vsadd_vx_b) | ||
806 | +GEN_VEXT_VX_RM(vsadd_vx_h) | ||
807 | +GEN_VEXT_VX_RM(vsadd_vx_w) | ||
808 | +GEN_VEXT_VX_RM(vsadd_vx_d) | ||
809 | |||
810 | static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) | ||
811 | { | ||
812 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) | ||
813 | RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) | ||
814 | RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) | ||
815 | RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) | ||
816 | -GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) | ||
817 | -GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) | ||
818 | -GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) | ||
819 | -GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) | ||
820 | +GEN_VEXT_VV_RM(vssubu_vv_b) | ||
821 | +GEN_VEXT_VV_RM(vssubu_vv_h) | ||
822 | +GEN_VEXT_VV_RM(vssubu_vv_w) | ||
823 | +GEN_VEXT_VV_RM(vssubu_vv_d) | ||
824 | |||
825 | RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) | ||
826 | RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) | ||
827 | RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) | ||
828 | RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) | ||
829 | -GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) | ||
830 | -GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) | ||
831 | -GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) | ||
832 | -GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) | ||
833 | +GEN_VEXT_VX_RM(vssubu_vx_b) | ||
834 | +GEN_VEXT_VX_RM(vssubu_vx_h) | ||
835 | +GEN_VEXT_VX_RM(vssubu_vx_w) | ||
836 | +GEN_VEXT_VX_RM(vssubu_vx_d) | ||
837 | |||
838 | static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
839 | { | ||
840 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) | ||
841 | RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) | ||
842 | RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) | ||
843 | RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) | ||
844 | -GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) | ||
845 | -GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) | ||
846 | -GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) | ||
847 | -GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) | ||
848 | +GEN_VEXT_VV_RM(vssub_vv_b) | ||
849 | +GEN_VEXT_VV_RM(vssub_vv_h) | ||
850 | +GEN_VEXT_VV_RM(vssub_vv_w) | ||
851 | +GEN_VEXT_VV_RM(vssub_vv_d) | ||
852 | |||
853 | RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) | ||
854 | RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) | ||
855 | RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) | ||
856 | RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) | ||
857 | -GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) | ||
858 | -GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) | ||
859 | -GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) | ||
860 | -GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) | ||
861 | +GEN_VEXT_VX_RM(vssub_vx_b) | ||
862 | +GEN_VEXT_VX_RM(vssub_vx_h) | ||
863 | +GEN_VEXT_VX_RM(vssub_vx_w) | ||
864 | +GEN_VEXT_VX_RM(vssub_vx_d) | ||
865 | |||
866 | /* Vector Single-Width Averaging Add and Subtract */ | ||
867 | static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) | ||
868 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) | ||
869 | RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) | ||
870 | RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) | ||
871 | RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) | ||
872 | -GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) | ||
873 | -GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) | ||
874 | -GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) | ||
875 | -GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) | ||
876 | +GEN_VEXT_VV_RM(vaadd_vv_b) | ||
877 | +GEN_VEXT_VV_RM(vaadd_vv_h) | ||
878 | +GEN_VEXT_VV_RM(vaadd_vv_w) | ||
879 | +GEN_VEXT_VV_RM(vaadd_vv_d) | ||
880 | |||
881 | RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) | ||
882 | RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) | ||
883 | RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) | ||
884 | RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) | ||
885 | -GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) | ||
886 | -GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) | ||
887 | -GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) | ||
888 | -GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) | ||
889 | +GEN_VEXT_VX_RM(vaadd_vx_b) | ||
890 | +GEN_VEXT_VX_RM(vaadd_vx_h) | ||
891 | +GEN_VEXT_VX_RM(vaadd_vx_w) | ||
892 | +GEN_VEXT_VX_RM(vaadd_vx_d) | ||
893 | |||
894 | static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, | ||
895 | uint32_t a, uint32_t b) | ||
896 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) | ||
897 | RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) | ||
898 | RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) | ||
899 | RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) | ||
900 | -GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) | ||
901 | -GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) | ||
902 | -GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) | ||
903 | -GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) | ||
904 | +GEN_VEXT_VV_RM(vaaddu_vv_b) | ||
905 | +GEN_VEXT_VV_RM(vaaddu_vv_h) | ||
906 | +GEN_VEXT_VV_RM(vaaddu_vv_w) | ||
907 | +GEN_VEXT_VV_RM(vaaddu_vv_d) | ||
908 | |||
909 | RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) | ||
910 | RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) | ||
911 | RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) | ||
912 | RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) | ||
913 | -GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) | ||
914 | -GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) | ||
915 | -GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) | ||
916 | -GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) | ||
917 | +GEN_VEXT_VX_RM(vaaddu_vx_b) | ||
918 | +GEN_VEXT_VX_RM(vaaddu_vx_h) | ||
919 | +GEN_VEXT_VX_RM(vaaddu_vx_w) | ||
920 | +GEN_VEXT_VX_RM(vaaddu_vx_d) | ||
921 | |||
922 | static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) | ||
923 | { | ||
924 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) | ||
925 | RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) | ||
926 | RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) | ||
927 | RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) | ||
928 | -GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) | ||
929 | -GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) | ||
930 | -GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) | ||
931 | -GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) | ||
932 | +GEN_VEXT_VV_RM(vasub_vv_b) | ||
933 | +GEN_VEXT_VV_RM(vasub_vv_h) | ||
934 | +GEN_VEXT_VV_RM(vasub_vv_w) | ||
935 | +GEN_VEXT_VV_RM(vasub_vv_d) | ||
936 | |||
937 | RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) | ||
938 | RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) | ||
939 | RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) | ||
940 | RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) | ||
941 | -GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) | ||
942 | -GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) | ||
943 | -GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) | ||
944 | -GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) | ||
945 | +GEN_VEXT_VX_RM(vasub_vx_b) | ||
946 | +GEN_VEXT_VX_RM(vasub_vx_h) | ||
947 | +GEN_VEXT_VX_RM(vasub_vx_w) | ||
948 | +GEN_VEXT_VX_RM(vasub_vx_d) | ||
949 | |||
950 | static inline uint32_t asubu32(CPURISCVState *env, int vxrm, | ||
951 | uint32_t a, uint32_t b) | ||
952 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) | ||
953 | RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) | ||
954 | RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) | ||
955 | RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) | ||
956 | -GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) | ||
957 | -GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) | ||
958 | -GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) | ||
959 | -GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) | ||
960 | +GEN_VEXT_VV_RM(vasubu_vv_b) | ||
961 | +GEN_VEXT_VV_RM(vasubu_vv_h) | ||
962 | +GEN_VEXT_VV_RM(vasubu_vv_w) | ||
963 | +GEN_VEXT_VV_RM(vasubu_vv_d) | ||
964 | |||
965 | RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) | ||
966 | RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) | ||
967 | RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) | ||
968 | RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) | ||
969 | -GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) | ||
970 | -GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) | ||
971 | -GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) | ||
972 | -GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) | ||
973 | +GEN_VEXT_VX_RM(vasubu_vx_b) | ||
974 | +GEN_VEXT_VX_RM(vasubu_vx_h) | ||
975 | +GEN_VEXT_VX_RM(vasubu_vx_w) | ||
976 | +GEN_VEXT_VX_RM(vasubu_vx_d) | ||
977 | |||
978 | /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ | ||
979 | static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
980 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) | ||
981 | RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) | ||
982 | RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) | ||
983 | RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) | ||
984 | -GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) | ||
985 | -GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) | ||
986 | -GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) | ||
987 | -GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) | ||
988 | +GEN_VEXT_VV_RM(vsmul_vv_b) | ||
989 | +GEN_VEXT_VV_RM(vsmul_vv_h) | ||
990 | +GEN_VEXT_VV_RM(vsmul_vv_w) | ||
991 | +GEN_VEXT_VV_RM(vsmul_vv_d) | ||
992 | |||
993 | RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) | ||
994 | RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) | ||
995 | RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) | ||
996 | RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) | ||
997 | -GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) | ||
998 | -GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) | ||
999 | -GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) | ||
1000 | -GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) | ||
1001 | +GEN_VEXT_VX_RM(vsmul_vx_b) | ||
1002 | +GEN_VEXT_VX_RM(vsmul_vx_h) | ||
1003 | +GEN_VEXT_VX_RM(vsmul_vx_w) | ||
1004 | +GEN_VEXT_VX_RM(vsmul_vx_d) | ||
1005 | |||
1006 | /* Vector Single-Width Scaling Shift Instructions */ | ||
1007 | static inline uint8_t | ||
1008 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) | ||
1009 | RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) | ||
1010 | RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) | ||
1011 | RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) | ||
1012 | -GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) | ||
1013 | -GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) | ||
1014 | -GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) | ||
1015 | -GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) | ||
1016 | +GEN_VEXT_VV_RM(vssrl_vv_b) | ||
1017 | +GEN_VEXT_VV_RM(vssrl_vv_h) | ||
1018 | +GEN_VEXT_VV_RM(vssrl_vv_w) | ||
1019 | +GEN_VEXT_VV_RM(vssrl_vv_d) | ||
1020 | |||
1021 | RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) | ||
1022 | RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) | ||
1023 | RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) | ||
1024 | RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) | ||
1025 | -GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) | ||
1026 | -GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) | ||
1027 | -GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) | ||
1028 | -GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) | ||
1029 | +GEN_VEXT_VX_RM(vssrl_vx_b) | ||
1030 | +GEN_VEXT_VX_RM(vssrl_vx_h) | ||
1031 | +GEN_VEXT_VX_RM(vssrl_vx_w) | ||
1032 | +GEN_VEXT_VX_RM(vssrl_vx_d) | ||
1033 | |||
1034 | static inline int8_t | ||
1035 | vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
1036 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) | ||
1037 | RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) | ||
1038 | RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) | ||
1039 | RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) | ||
1040 | -GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) | ||
1041 | -GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) | ||
1042 | -GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) | ||
1043 | -GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) | ||
1044 | +GEN_VEXT_VV_RM(vssra_vv_b) | ||
1045 | +GEN_VEXT_VV_RM(vssra_vv_h) | ||
1046 | +GEN_VEXT_VV_RM(vssra_vv_w) | ||
1047 | +GEN_VEXT_VV_RM(vssra_vv_d) | ||
1048 | |||
1049 | RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) | ||
1050 | RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) | ||
1051 | RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) | ||
1052 | RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) | ||
1053 | -GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) | ||
1054 | -GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) | ||
1055 | -GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) | ||
1056 | -GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) | ||
1057 | +GEN_VEXT_VX_RM(vssra_vx_b) | ||
1058 | +GEN_VEXT_VX_RM(vssra_vx_h) | ||
1059 | +GEN_VEXT_VX_RM(vssra_vx_w) | ||
1060 | +GEN_VEXT_VX_RM(vssra_vx_d) | ||
1061 | |||
1062 | /* Vector Narrowing Fixed-Point Clip Instructions */ | ||
1063 | static inline int8_t | ||
1064 | @@ -XXX,XX +XXX,XX @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) | ||
1065 | RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) | ||
1066 | RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) | ||
1067 | RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) | ||
1068 | -GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1) | ||
1069 | -GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2) | ||
1070 | -GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4) | ||
1071 | +GEN_VEXT_VV_RM(vnclip_wv_b) | ||
1072 | +GEN_VEXT_VV_RM(vnclip_wv_h) | ||
1073 | +GEN_VEXT_VV_RM(vnclip_wv_w) | ||
1074 | |||
1075 | RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) | ||
1076 | RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) | ||
1077 | RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) | ||
1078 | -GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1) | ||
1079 | -GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2) | ||
1080 | -GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4) | ||
1081 | +GEN_VEXT_VX_RM(vnclip_wx_b) | ||
1082 | +GEN_VEXT_VX_RM(vnclip_wx_h) | ||
1083 | +GEN_VEXT_VX_RM(vnclip_wx_w) | ||
1084 | |||
1085 | static inline uint8_t | ||
1086 | vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) | ||
1087 | @@ -XXX,XX +XXX,XX @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) | ||
1088 | RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) | ||
1089 | RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) | ||
1090 | RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) | ||
1091 | -GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1) | ||
1092 | -GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2) | ||
1093 | -GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4) | ||
1094 | +GEN_VEXT_VV_RM(vnclipu_wv_b) | ||
1095 | +GEN_VEXT_VV_RM(vnclipu_wv_h) | ||
1096 | +GEN_VEXT_VV_RM(vnclipu_wv_w) | ||
1097 | |||
1098 | RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) | ||
1099 | RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) | ||
1100 | RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) | ||
1101 | -GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1) | ||
1102 | -GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2) | ||
1103 | -GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4) | ||
1104 | +GEN_VEXT_VX_RM(vnclipu_wx_b) | ||
1105 | +GEN_VEXT_VX_RM(vnclipu_wx_h) | ||
1106 | +GEN_VEXT_VX_RM(vnclipu_wx_w) | ||
1107 | |||
1108 | /* | ||
1109 | *** Vector Float Point Arithmetic Instructions | ||
1110 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ | ||
1111 | *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ | ||
1112 | } | ||
1113 | |||
1114 | -#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ | ||
1115 | +#define GEN_VEXT_VV_ENV(NAME) \ | ||
1116 | void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
1117 | void *vs2, CPURISCVState *env, \ | ||
1118 | uint32_t desc) \ | ||
1119 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
1120 | RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) | ||
1121 | RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) | ||
1122 | RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) | ||
1123 | -GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) | ||
1124 | -GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) | ||
1125 | -GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) | ||
1126 | +GEN_VEXT_VV_ENV(vfadd_vv_h) | ||
1127 | +GEN_VEXT_VV_ENV(vfadd_vv_w) | ||
1128 | +GEN_VEXT_VV_ENV(vfadd_vv_d) | ||
1129 | |||
1130 | #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
1131 | static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
1132 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
1133 | *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ | ||
1134 | } | ||
1135 | |||
1136 | -#define GEN_VEXT_VF(NAME, ESZ, DSZ) \ | ||
1137 | +#define GEN_VEXT_VF(NAME) \ | ||
1138 | void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ | ||
1139 | void *vs2, CPURISCVState *env, \ | ||
1140 | uint32_t desc) \ | ||
1141 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ | ||
1142 | RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) | ||
1143 | RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) | ||
1144 | RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) | ||
1145 | -GEN_VEXT_VF(vfadd_vf_h, 2, 2) | ||
1146 | -GEN_VEXT_VF(vfadd_vf_w, 4, 4) | ||
1147 | -GEN_VEXT_VF(vfadd_vf_d, 8, 8) | ||
1148 | +GEN_VEXT_VF(vfadd_vf_h) | ||
1149 | +GEN_VEXT_VF(vfadd_vf_w) | ||
1150 | +GEN_VEXT_VF(vfadd_vf_d) | ||
1151 | |||
1152 | RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) | ||
1153 | RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) | ||
1154 | RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) | ||
1155 | -GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) | ||
1156 | -GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) | ||
1157 | -GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) | ||
1158 | +GEN_VEXT_VV_ENV(vfsub_vv_h) | ||
1159 | +GEN_VEXT_VV_ENV(vfsub_vv_w) | ||
1160 | +GEN_VEXT_VV_ENV(vfsub_vv_d) | ||
1161 | RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) | ||
1162 | RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) | ||
1163 | RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) | ||
1164 | -GEN_VEXT_VF(vfsub_vf_h, 2, 2) | ||
1165 | -GEN_VEXT_VF(vfsub_vf_w, 4, 4) | ||
1166 | -GEN_VEXT_VF(vfsub_vf_d, 8, 8) | ||
1167 | +GEN_VEXT_VF(vfsub_vf_h) | ||
1168 | +GEN_VEXT_VF(vfsub_vf_w) | ||
1169 | +GEN_VEXT_VF(vfsub_vf_d) | ||
1170 | |||
1171 | static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) | ||
1172 | { | ||
1173 | @@ -XXX,XX +XXX,XX @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) | ||
1174 | RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) | ||
1175 | RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) | ||
1176 | RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) | ||
1177 | -GEN_VEXT_VF(vfrsub_vf_h, 2, 2) | ||
1178 | -GEN_VEXT_VF(vfrsub_vf_w, 4, 4) | ||
1179 | -GEN_VEXT_VF(vfrsub_vf_d, 8, 8) | ||
1180 | +GEN_VEXT_VF(vfrsub_vf_h) | ||
1181 | +GEN_VEXT_VF(vfrsub_vf_w) | ||
1182 | +GEN_VEXT_VF(vfrsub_vf_d) | ||
1183 | |||
1184 | /* Vector Widening Floating-Point Add/Subtract Instructions */ | ||
1185 | static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) | ||
1186 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) | ||
1187 | |||
1188 | RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) | ||
1189 | RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) | ||
1190 | -GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) | ||
1191 | -GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) | ||
1192 | +GEN_VEXT_VV_ENV(vfwadd_vv_h) | ||
1193 | +GEN_VEXT_VV_ENV(vfwadd_vv_w) | ||
1194 | RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) | ||
1195 | RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) | ||
1196 | -GEN_VEXT_VF(vfwadd_vf_h, 2, 4) | ||
1197 | -GEN_VEXT_VF(vfwadd_vf_w, 4, 8) | ||
1198 | +GEN_VEXT_VF(vfwadd_vf_h) | ||
1199 | +GEN_VEXT_VF(vfwadd_vf_w) | ||
1200 | |||
1201 | static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) | ||
1202 | { | ||
1203 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) | ||
1204 | |||
1205 | RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) | ||
1206 | RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) | ||
1207 | -GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) | ||
1208 | -GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) | ||
1209 | +GEN_VEXT_VV_ENV(vfwsub_vv_h) | ||
1210 | +GEN_VEXT_VV_ENV(vfwsub_vv_w) | ||
1211 | RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) | ||
1212 | RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) | ||
1213 | -GEN_VEXT_VF(vfwsub_vf_h, 2, 4) | ||
1214 | -GEN_VEXT_VF(vfwsub_vf_w, 4, 8) | ||
1215 | +GEN_VEXT_VF(vfwsub_vf_h) | ||
1216 | +GEN_VEXT_VF(vfwsub_vf_w) | ||
1217 | |||
1218 | static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) | ||
1219 | { | ||
1220 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) | ||
1221 | |||
1222 | RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) | ||
1223 | RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) | ||
1224 | -GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) | ||
1225 | -GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) | ||
1226 | +GEN_VEXT_VV_ENV(vfwadd_wv_h) | ||
1227 | +GEN_VEXT_VV_ENV(vfwadd_wv_w) | ||
1228 | RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) | ||
1229 | RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) | ||
1230 | -GEN_VEXT_VF(vfwadd_wf_h, 2, 4) | ||
1231 | -GEN_VEXT_VF(vfwadd_wf_w, 4, 8) | ||
1232 | +GEN_VEXT_VF(vfwadd_wf_h) | ||
1233 | +GEN_VEXT_VF(vfwadd_wf_w) | ||
1234 | |||
1235 | static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) | ||
1236 | { | ||
1237 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) | ||
1238 | |||
1239 | RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) | ||
1240 | RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) | ||
1241 | -GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) | ||
1242 | -GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) | ||
1243 | +GEN_VEXT_VV_ENV(vfwsub_wv_h) | ||
1244 | +GEN_VEXT_VV_ENV(vfwsub_wv_w) | ||
1245 | RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) | ||
1246 | RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) | ||
1247 | -GEN_VEXT_VF(vfwsub_wf_h, 2, 4) | ||
1248 | -GEN_VEXT_VF(vfwsub_wf_w, 4, 8) | ||
1249 | +GEN_VEXT_VF(vfwsub_wf_h) | ||
1250 | +GEN_VEXT_VF(vfwsub_wf_w) | ||
1251 | |||
1252 | /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ | ||
1253 | RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) | ||
1254 | RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) | ||
1255 | RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) | ||
1256 | -GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) | ||
1257 | -GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) | ||
1258 | -GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) | ||
1259 | +GEN_VEXT_VV_ENV(vfmul_vv_h) | ||
1260 | +GEN_VEXT_VV_ENV(vfmul_vv_w) | ||
1261 | +GEN_VEXT_VV_ENV(vfmul_vv_d) | ||
1262 | RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) | ||
1263 | RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) | ||
1264 | RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) | ||
1265 | -GEN_VEXT_VF(vfmul_vf_h, 2, 2) | ||
1266 | -GEN_VEXT_VF(vfmul_vf_w, 4, 4) | ||
1267 | -GEN_VEXT_VF(vfmul_vf_d, 8, 8) | ||
1268 | +GEN_VEXT_VF(vfmul_vf_h) | ||
1269 | +GEN_VEXT_VF(vfmul_vf_w) | ||
1270 | +GEN_VEXT_VF(vfmul_vf_d) | ||
1271 | |||
1272 | RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) | ||
1273 | RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) | ||
1274 | RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) | ||
1275 | -GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) | ||
1276 | -GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) | ||
1277 | -GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) | ||
1278 | +GEN_VEXT_VV_ENV(vfdiv_vv_h) | ||
1279 | +GEN_VEXT_VV_ENV(vfdiv_vv_w) | ||
1280 | +GEN_VEXT_VV_ENV(vfdiv_vv_d) | ||
1281 | RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) | ||
1282 | RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) | ||
1283 | RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) | ||
1284 | -GEN_VEXT_VF(vfdiv_vf_h, 2, 2) | ||
1285 | -GEN_VEXT_VF(vfdiv_vf_w, 4, 4) | ||
1286 | -GEN_VEXT_VF(vfdiv_vf_d, 8, 8) | ||
1287 | +GEN_VEXT_VF(vfdiv_vf_h) | ||
1288 | +GEN_VEXT_VF(vfdiv_vf_w) | ||
1289 | +GEN_VEXT_VF(vfdiv_vf_d) | ||
1290 | |||
1291 | static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) | ||
1292 | { | ||
1293 | @@ -XXX,XX +XXX,XX @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) | ||
1294 | RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) | ||
1295 | RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) | ||
1296 | RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) | ||
1297 | -GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) | ||
1298 | -GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) | ||
1299 | -GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) | ||
1300 | +GEN_VEXT_VF(vfrdiv_vf_h) | ||
1301 | +GEN_VEXT_VF(vfrdiv_vf_w) | ||
1302 | +GEN_VEXT_VF(vfrdiv_vf_d) | ||
1303 | |||
1304 | /* Vector Widening Floating-Point Multiply */ | ||
1305 | static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) | ||
1306 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) | ||
1307 | } | ||
1308 | RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) | ||
1309 | RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) | ||
1310 | -GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) | ||
1311 | -GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) | ||
1312 | +GEN_VEXT_VV_ENV(vfwmul_vv_h) | ||
1313 | +GEN_VEXT_VV_ENV(vfwmul_vv_w) | ||
1314 | RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) | ||
1315 | RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) | ||
1316 | -GEN_VEXT_VF(vfwmul_vf_h, 2, 4) | ||
1317 | -GEN_VEXT_VF(vfwmul_vf_w, 4, 8) | ||
1318 | +GEN_VEXT_VF(vfwmul_vf_h) | ||
1319 | +GEN_VEXT_VF(vfwmul_vf_w) | ||
1320 | |||
1321 | /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ | ||
1322 | #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
1323 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1324 | RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) | ||
1325 | RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) | ||
1326 | RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) | ||
1327 | -GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) | ||
1328 | -GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) | ||
1329 | -GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) | ||
1330 | +GEN_VEXT_VV_ENV(vfmacc_vv_h) | ||
1331 | +GEN_VEXT_VV_ENV(vfmacc_vv_w) | ||
1332 | +GEN_VEXT_VV_ENV(vfmacc_vv_d) | ||
1333 | |||
1334 | #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
1335 | static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
1336 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
1337 | RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) | ||
1338 | RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) | ||
1339 | RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) | ||
1340 | -GEN_VEXT_VF(vfmacc_vf_h, 2, 2) | ||
1341 | -GEN_VEXT_VF(vfmacc_vf_w, 4, 4) | ||
1342 | -GEN_VEXT_VF(vfmacc_vf_d, 8, 8) | ||
1343 | +GEN_VEXT_VF(vfmacc_vf_h) | ||
1344 | +GEN_VEXT_VF(vfmacc_vf_w) | ||
1345 | +GEN_VEXT_VF(vfmacc_vf_d) | ||
1346 | |||
1347 | static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1348 | { | ||
1349 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1350 | RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) | ||
1351 | RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) | ||
1352 | RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) | ||
1353 | -GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) | ||
1354 | -GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) | ||
1355 | -GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) | ||
1356 | +GEN_VEXT_VV_ENV(vfnmacc_vv_h) | ||
1357 | +GEN_VEXT_VV_ENV(vfnmacc_vv_w) | ||
1358 | +GEN_VEXT_VV_ENV(vfnmacc_vv_d) | ||
1359 | RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) | ||
1360 | RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) | ||
1361 | RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) | ||
1362 | -GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) | ||
1363 | -GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) | ||
1364 | -GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) | ||
1365 | +GEN_VEXT_VF(vfnmacc_vf_h) | ||
1366 | +GEN_VEXT_VF(vfnmacc_vf_w) | ||
1367 | +GEN_VEXT_VF(vfnmacc_vf_d) | ||
1368 | |||
1369 | static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1370 | { | ||
1371 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1372 | RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) | ||
1373 | RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) | ||
1374 | RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) | ||
1375 | -GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) | ||
1376 | -GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) | ||
1377 | -GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) | ||
1378 | +GEN_VEXT_VV_ENV(vfmsac_vv_h) | ||
1379 | +GEN_VEXT_VV_ENV(vfmsac_vv_w) | ||
1380 | +GEN_VEXT_VV_ENV(vfmsac_vv_d) | ||
1381 | RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) | ||
1382 | RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) | ||
1383 | RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) | ||
1384 | -GEN_VEXT_VF(vfmsac_vf_h, 2, 2) | ||
1385 | -GEN_VEXT_VF(vfmsac_vf_w, 4, 4) | ||
1386 | -GEN_VEXT_VF(vfmsac_vf_d, 8, 8) | ||
1387 | +GEN_VEXT_VF(vfmsac_vf_h) | ||
1388 | +GEN_VEXT_VF(vfmsac_vf_w) | ||
1389 | +GEN_VEXT_VF(vfmsac_vf_d) | ||
1390 | |||
1391 | static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1392 | { | ||
1393 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1394 | RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) | ||
1395 | RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) | ||
1396 | RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) | ||
1397 | -GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) | ||
1398 | -GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) | ||
1399 | -GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) | ||
1400 | +GEN_VEXT_VV_ENV(vfnmsac_vv_h) | ||
1401 | +GEN_VEXT_VV_ENV(vfnmsac_vv_w) | ||
1402 | +GEN_VEXT_VV_ENV(vfnmsac_vv_d) | ||
1403 | RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) | ||
1404 | RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) | ||
1405 | RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) | ||
1406 | -GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) | ||
1407 | -GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) | ||
1408 | -GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) | ||
1409 | +GEN_VEXT_VF(vfnmsac_vf_h) | ||
1410 | +GEN_VEXT_VF(vfnmsac_vf_w) | ||
1411 | +GEN_VEXT_VF(vfnmsac_vf_d) | ||
1412 | |||
1413 | static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1414 | { | ||
1415 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1416 | RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) | ||
1417 | RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) | ||
1418 | RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) | ||
1419 | -GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) | ||
1420 | -GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) | ||
1421 | -GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) | ||
1422 | +GEN_VEXT_VV_ENV(vfmadd_vv_h) | ||
1423 | +GEN_VEXT_VV_ENV(vfmadd_vv_w) | ||
1424 | +GEN_VEXT_VV_ENV(vfmadd_vv_d) | ||
1425 | RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) | ||
1426 | RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) | ||
1427 | RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) | ||
1428 | -GEN_VEXT_VF(vfmadd_vf_h, 2, 2) | ||
1429 | -GEN_VEXT_VF(vfmadd_vf_w, 4, 4) | ||
1430 | -GEN_VEXT_VF(vfmadd_vf_d, 8, 8) | ||
1431 | +GEN_VEXT_VF(vfmadd_vf_h) | ||
1432 | +GEN_VEXT_VF(vfmadd_vf_w) | ||
1433 | +GEN_VEXT_VF(vfmadd_vf_d) | ||
1434 | |||
1435 | static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1436 | { | ||
1437 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1438 | RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) | ||
1439 | RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) | ||
1440 | RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) | ||
1441 | -GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) | ||
1442 | -GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) | ||
1443 | -GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) | ||
1444 | +GEN_VEXT_VV_ENV(vfnmadd_vv_h) | ||
1445 | +GEN_VEXT_VV_ENV(vfnmadd_vv_w) | ||
1446 | +GEN_VEXT_VV_ENV(vfnmadd_vv_d) | ||
1447 | RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) | ||
1448 | RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) | ||
1449 | RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) | ||
1450 | -GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) | ||
1451 | -GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) | ||
1452 | -GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) | ||
1453 | +GEN_VEXT_VF(vfnmadd_vf_h) | ||
1454 | +GEN_VEXT_VF(vfnmadd_vf_w) | ||
1455 | +GEN_VEXT_VF(vfnmadd_vf_d) | ||
1456 | |||
1457 | static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1458 | { | ||
1459 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1460 | RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) | ||
1461 | RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) | ||
1462 | RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) | ||
1463 | -GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) | ||
1464 | -GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) | ||
1465 | -GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) | ||
1466 | +GEN_VEXT_VV_ENV(vfmsub_vv_h) | ||
1467 | +GEN_VEXT_VV_ENV(vfmsub_vv_w) | ||
1468 | +GEN_VEXT_VV_ENV(vfmsub_vv_d) | ||
1469 | RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) | ||
1470 | RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) | ||
1471 | RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) | ||
1472 | -GEN_VEXT_VF(vfmsub_vf_h, 2, 2) | ||
1473 | -GEN_VEXT_VF(vfmsub_vf_w, 4, 4) | ||
1474 | -GEN_VEXT_VF(vfmsub_vf_d, 8, 8) | ||
1475 | +GEN_VEXT_VF(vfmsub_vf_h) | ||
1476 | +GEN_VEXT_VF(vfmsub_vf_w) | ||
1477 | +GEN_VEXT_VF(vfmsub_vf_d) | ||
1478 | |||
1479 | static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1480 | { | ||
1481 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1482 | RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) | ||
1483 | RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) | ||
1484 | RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) | ||
1485 | -GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) | ||
1486 | -GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) | ||
1487 | -GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) | ||
1488 | +GEN_VEXT_VV_ENV(vfnmsub_vv_h) | ||
1489 | +GEN_VEXT_VV_ENV(vfnmsub_vv_w) | ||
1490 | +GEN_VEXT_VV_ENV(vfnmsub_vv_d) | ||
1491 | RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) | ||
1492 | RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) | ||
1493 | RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) | ||
1494 | -GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) | ||
1495 | -GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) | ||
1496 | -GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) | ||
1497 | +GEN_VEXT_VF(vfnmsub_vf_h) | ||
1498 | +GEN_VEXT_VF(vfnmsub_vf_w) | ||
1499 | +GEN_VEXT_VF(vfnmsub_vf_d) | ||
1500 | |||
1501 | /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ | ||
1502 | static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
1503 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
1504 | |||
1505 | RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) | ||
1506 | RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) | ||
1507 | -GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) | ||
1508 | -GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) | ||
1509 | +GEN_VEXT_VV_ENV(vfwmacc_vv_h) | ||
1510 | +GEN_VEXT_VV_ENV(vfwmacc_vv_w) | ||
1511 | RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) | ||
1512 | RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) | ||
1513 | -GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) | ||
1514 | -GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) | ||
1515 | +GEN_VEXT_VF(vfwmacc_vf_h) | ||
1516 | +GEN_VEXT_VF(vfwmacc_vf_w) | ||
1517 | |||
1518 | static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
1519 | { | ||
1520 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
1521 | |||
1522 | RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) | ||
1523 | RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) | ||
1524 | -GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) | ||
1525 | -GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) | ||
1526 | +GEN_VEXT_VV_ENV(vfwnmacc_vv_h) | ||
1527 | +GEN_VEXT_VV_ENV(vfwnmacc_vv_w) | ||
1528 | RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) | ||
1529 | RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) | ||
1530 | -GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) | ||
1531 | -GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) | ||
1532 | +GEN_VEXT_VF(vfwnmacc_vf_h) | ||
1533 | +GEN_VEXT_VF(vfwnmacc_vf_w) | ||
1534 | |||
1535 | static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
1536 | { | ||
1537 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
1538 | |||
1539 | RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) | ||
1540 | RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) | ||
1541 | -GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) | ||
1542 | -GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) | ||
1543 | +GEN_VEXT_VV_ENV(vfwmsac_vv_h) | ||
1544 | +GEN_VEXT_VV_ENV(vfwmsac_vv_w) | ||
1545 | RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) | ||
1546 | RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) | ||
1547 | -GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) | ||
1548 | -GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) | ||
1549 | +GEN_VEXT_VF(vfwmsac_vf_h) | ||
1550 | +GEN_VEXT_VF(vfwmsac_vf_w) | ||
1551 | |||
1552 | static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
1553 | { | ||
1554 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
1555 | |||
1556 | RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) | ||
1557 | RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) | ||
1558 | -GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) | ||
1559 | -GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) | ||
1560 | +GEN_VEXT_VV_ENV(vfwnmsac_vv_h) | ||
1561 | +GEN_VEXT_VV_ENV(vfwnmsac_vv_w) | ||
1562 | RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) | ||
1563 | RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) | ||
1564 | -GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) | ||
1565 | -GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) | ||
1566 | +GEN_VEXT_VF(vfwnmsac_vf_h) | ||
1567 | +GEN_VEXT_VF(vfwnmsac_vf_w) | ||
1568 | |||
1569 | /* Vector Floating-Point Square-Root Instruction */ | ||
1570 | /* (TD, T2, TX2) */ | ||
1571 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i, \ | ||
1572 | *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ | ||
1573 | } | ||
1574 | |||
1575 | -#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ | ||
1576 | +#define GEN_VEXT_V_ENV(NAME) \ | ||
1577 | void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
1578 | CPURISCVState *env, uint32_t desc) \ | ||
1579 | { \ | ||
1580 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
1581 | RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) | ||
1582 | RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) | ||
1583 | RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) | ||
1584 | -GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) | ||
1585 | -GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) | ||
1586 | -GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) | ||
1587 | +GEN_VEXT_V_ENV(vfsqrt_v_h) | ||
1588 | +GEN_VEXT_V_ENV(vfsqrt_v_w) | ||
1589 | +GEN_VEXT_V_ENV(vfsqrt_v_d) | ||
1590 | |||
1591 | /* | ||
1592 | * Vector Floating-Point Reciprocal Square-Root Estimate Instruction | ||
1593 | @@ -XXX,XX +XXX,XX @@ static float64 frsqrt7_d(float64 f, float_status *s) | ||
1594 | RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) | ||
1595 | RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) | ||
1596 | RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) | ||
1597 | -GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2) | ||
1598 | -GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4) | ||
1599 | -GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8) | ||
1600 | +GEN_VEXT_V_ENV(vfrsqrt7_v_h) | ||
1601 | +GEN_VEXT_V_ENV(vfrsqrt7_v_w) | ||
1602 | +GEN_VEXT_V_ENV(vfrsqrt7_v_d) | ||
1603 | |||
1604 | /* | ||
1605 | * Vector Floating-Point Reciprocal Estimate Instruction | ||
1606 | @@ -XXX,XX +XXX,XX @@ static float64 frec7_d(float64 f, float_status *s) | ||
1607 | RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) | ||
1608 | RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) | ||
1609 | RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) | ||
1610 | -GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2) | ||
1611 | -GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4) | ||
1612 | -GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8) | ||
1613 | +GEN_VEXT_V_ENV(vfrec7_v_h) | ||
1614 | +GEN_VEXT_V_ENV(vfrec7_v_w) | ||
1615 | +GEN_VEXT_V_ENV(vfrec7_v_d) | ||
1616 | |||
1617 | /* Vector Floating-Point MIN/MAX Instructions */ | ||
1618 | RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) | ||
1619 | RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) | ||
1620 | RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) | ||
1621 | -GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) | ||
1622 | -GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) | ||
1623 | -GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) | ||
1624 | +GEN_VEXT_VV_ENV(vfmin_vv_h) | ||
1625 | +GEN_VEXT_VV_ENV(vfmin_vv_w) | ||
1626 | +GEN_VEXT_VV_ENV(vfmin_vv_d) | ||
1627 | RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) | ||
1628 | RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) | ||
1629 | RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) | ||
1630 | -GEN_VEXT_VF(vfmin_vf_h, 2, 2) | ||
1631 | -GEN_VEXT_VF(vfmin_vf_w, 4, 4) | ||
1632 | -GEN_VEXT_VF(vfmin_vf_d, 8, 8) | ||
1633 | +GEN_VEXT_VF(vfmin_vf_h) | ||
1634 | +GEN_VEXT_VF(vfmin_vf_w) | ||
1635 | +GEN_VEXT_VF(vfmin_vf_d) | ||
1636 | |||
1637 | RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) | ||
1638 | RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) | ||
1639 | RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) | ||
1640 | -GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) | ||
1641 | -GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) | ||
1642 | -GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) | ||
1643 | +GEN_VEXT_VV_ENV(vfmax_vv_h) | ||
1644 | +GEN_VEXT_VV_ENV(vfmax_vv_w) | ||
1645 | +GEN_VEXT_VV_ENV(vfmax_vv_d) | ||
1646 | RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) | ||
1647 | RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) | ||
1648 | RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) | ||
1649 | -GEN_VEXT_VF(vfmax_vf_h, 2, 2) | ||
1650 | -GEN_VEXT_VF(vfmax_vf_w, 4, 4) | ||
1651 | -GEN_VEXT_VF(vfmax_vf_d, 8, 8) | ||
1652 | +GEN_VEXT_VF(vfmax_vf_h) | ||
1653 | +GEN_VEXT_VF(vfmax_vf_w) | ||
1654 | +GEN_VEXT_VF(vfmax_vf_d) | ||
1655 | |||
1656 | /* Vector Floating-Point Sign-Injection Instructions */ | ||
1657 | static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) | ||
1658 | @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) | ||
1659 | RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) | ||
1660 | RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) | ||
1661 | RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) | ||
1662 | -GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) | ||
1663 | -GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) | ||
1664 | -GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) | ||
1665 | +GEN_VEXT_VV_ENV(vfsgnj_vv_h) | ||
1666 | +GEN_VEXT_VV_ENV(vfsgnj_vv_w) | ||
1667 | +GEN_VEXT_VV_ENV(vfsgnj_vv_d) | ||
1668 | RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) | ||
1669 | RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) | ||
1670 | RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) | ||
1671 | -GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) | ||
1672 | -GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) | ||
1673 | -GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) | ||
1674 | +GEN_VEXT_VF(vfsgnj_vf_h) | ||
1675 | +GEN_VEXT_VF(vfsgnj_vf_w) | ||
1676 | +GEN_VEXT_VF(vfsgnj_vf_d) | ||
1677 | |||
1678 | static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) | ||
1679 | { | ||
1680 | @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) | ||
1681 | RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) | ||
1682 | RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) | ||
1683 | RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) | ||
1684 | -GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) | ||
1685 | -GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) | ||
1686 | -GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) | ||
1687 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_h) | ||
1688 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_w) | ||
1689 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_d) | ||
1690 | RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) | ||
1691 | RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) | ||
1692 | RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) | ||
1693 | -GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) | ||
1694 | -GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) | ||
1695 | -GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) | ||
1696 | +GEN_VEXT_VF(vfsgnjn_vf_h) | ||
1697 | +GEN_VEXT_VF(vfsgnjn_vf_w) | ||
1698 | +GEN_VEXT_VF(vfsgnjn_vf_d) | ||
1699 | |||
1700 | static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) | ||
1701 | { | ||
1702 | @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) | ||
1703 | RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) | ||
1704 | RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) | ||
1705 | RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) | ||
1706 | -GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) | ||
1707 | -GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) | ||
1708 | -GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) | ||
1709 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_h) | ||
1710 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_w) | ||
1711 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_d) | ||
1712 | RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) | ||
1713 | RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) | ||
1714 | RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) | ||
1715 | -GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) | ||
1716 | -GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) | ||
1717 | -GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) | ||
1718 | +GEN_VEXT_VF(vfsgnjx_vf_h) | ||
1719 | +GEN_VEXT_VF(vfsgnjx_vf_w) | ||
1720 | +GEN_VEXT_VF(vfsgnjx_vf_d) | ||
1721 | |||
1722 | /* Vector Floating-Point Compare Instructions */ | ||
1723 | #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ | ||
1724 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i) \ | ||
1725 | *((TD *)vd + HD(i)) = OP(s2); \ | ||
1726 | } | ||
1727 | |||
1728 | -#define GEN_VEXT_V(NAME, ESZ, DSZ) \ | ||
1729 | +#define GEN_VEXT_V(NAME) \ | ||
1730 | void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
1731 | CPURISCVState *env, uint32_t desc) \ | ||
1732 | { \ | ||
1733 | @@ -XXX,XX +XXX,XX @@ target_ulong fclass_d(uint64_t frs1) | ||
1734 | RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) | ||
1735 | RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) | ||
1736 | RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) | ||
1737 | -GEN_VEXT_V(vfclass_v_h, 2, 2) | ||
1738 | -GEN_VEXT_V(vfclass_v_w, 4, 4) | ||
1739 | -GEN_VEXT_V(vfclass_v_d, 8, 8) | ||
1740 | +GEN_VEXT_V(vfclass_v_h) | ||
1741 | +GEN_VEXT_V(vfclass_v_w) | ||
1742 | +GEN_VEXT_V(vfclass_v_d) | ||
1743 | |||
1744 | /* Vector Floating-Point Merge Instruction */ | ||
1745 | #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ | ||
1746 | @@ -XXX,XX +XXX,XX @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) | ||
1747 | RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) | ||
1748 | RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) | ||
1749 | RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) | ||
1750 | -GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) | ||
1751 | -GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) | ||
1752 | -GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) | ||
1753 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) | ||
1754 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) | ||
1755 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) | ||
1756 | |||
1757 | /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ | ||
1758 | RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) | ||
1759 | RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) | ||
1760 | RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) | ||
1761 | -GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) | ||
1762 | -GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) | ||
1763 | -GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) | ||
1764 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_h) | ||
1765 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_w) | ||
1766 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_d) | ||
1767 | |||
1768 | /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ | ||
1769 | RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) | ||
1770 | RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) | ||
1771 | RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) | ||
1772 | -GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) | ||
1773 | -GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) | ||
1774 | -GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) | ||
1775 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) | ||
1776 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) | ||
1777 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) | ||
1778 | |||
1779 | /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ | ||
1780 | RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) | ||
1781 | RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) | ||
1782 | RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) | ||
1783 | -GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) | ||
1784 | -GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) | ||
1785 | -GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) | ||
1786 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_h) | ||
1787 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_w) | ||
1788 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_d) | ||
1789 | |||
1790 | /* Widening Floating-Point/Integer Type-Convert Instructions */ | ||
1791 | /* (TD, T2, TX2) */ | ||
1792 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) | ||
1793 | /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ | ||
1794 | RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) | ||
1795 | RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) | ||
1796 | -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) | ||
1797 | -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) | ||
1798 | +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) | ||
1799 | +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) | ||
1800 | |||
1801 | /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ | ||
1802 | RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) | ||
1803 | RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) | ||
1804 | -GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) | ||
1805 | -GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) | ||
1806 | +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) | ||
1807 | +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) | ||
1808 | |||
1809 | /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ | ||
1810 | RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) | ||
1811 | RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) | ||
1812 | RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) | ||
1813 | -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2) | ||
1814 | -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) | ||
1815 | -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) | ||
1816 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) | ||
1817 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) | ||
1818 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) | ||
1819 | |||
1820 | /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ | ||
1821 | RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) | ||
1822 | RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) | ||
1823 | RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) | ||
1824 | -GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2) | ||
1825 | -GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) | ||
1826 | -GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) | ||
1827 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) | ||
1828 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) | ||
1829 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) | ||
1830 | |||
1831 | /* | ||
1832 | * vfwcvt.f.f.v vd, vs2, vm | ||
1833 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfwcvtffv16(uint16_t a, float_status *s) | ||
1834 | |||
1835 | RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) | ||
1836 | RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) | ||
1837 | -GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) | ||
1838 | -GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) | ||
1839 | +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) | ||
1840 | +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) | ||
1841 | |||
1842 | /* Narrowing Floating-Point/Integer Type-Convert Instructions */ | ||
1843 | /* (TD, T2, TX2) */ | ||
1844 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) | ||
1845 | RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) | ||
1846 | RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) | ||
1847 | RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) | ||
1848 | -GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1) | ||
1849 | -GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2) | ||
1850 | -GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4) | ||
1851 | +GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) | ||
1852 | +GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) | ||
1853 | +GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) | ||
1854 | |||
1855 | /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ | ||
1856 | RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) | ||
1857 | RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) | ||
1858 | RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) | ||
1859 | -GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1) | ||
1860 | -GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2) | ||
1861 | -GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4) | ||
1862 | +GEN_VEXT_V_ENV(vfncvt_x_f_w_b) | ||
1863 | +GEN_VEXT_V_ENV(vfncvt_x_f_w_h) | ||
1864 | +GEN_VEXT_V_ENV(vfncvt_x_f_w_w) | ||
1865 | |||
1866 | /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ | ||
1867 | RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) | ||
1868 | RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) | ||
1869 | -GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2) | ||
1870 | -GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4) | ||
1871 | +GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) | ||
1872 | +GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) | ||
1873 | |||
1874 | /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ | ||
1875 | RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) | ||
1876 | RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) | ||
1877 | -GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2) | ||
1878 | -GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4) | ||
1879 | +GEN_VEXT_V_ENV(vfncvt_f_x_w_h) | ||
1880 | +GEN_VEXT_V_ENV(vfncvt_f_x_w_w) | ||
1881 | |||
1882 | /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ | ||
1883 | static uint16_t vfncvtffv16(uint32_t a, float_status *s) | ||
1884 | @@ -XXX,XX +XXX,XX @@ static uint16_t vfncvtffv16(uint32_t a, float_status *s) | ||
1885 | |||
1886 | RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) | ||
1887 | RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) | ||
1888 | -GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2) | ||
1889 | -GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4) | ||
1890 | +GEN_VEXT_V_ENV(vfncvt_f_f_w_h) | ||
1891 | +GEN_VEXT_V_ENV(vfncvt_f_f_w_w) | ||
1892 | |||
1893 | /* | ||
1894 | *** Vector Reduction Operations | ||
1895 | -- | 97 | -- |
1896 | 2.36.1 | 98 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: eop Chen <eop.chen@sifive.com> | 3 | Move some macros out of `vector_helper` and into `vector_internals`. |
4 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | 4 | This ensures they can be used by both vector and vector-crypto helpers |
5 | (latter implemented in proceeding commits). | ||
6 | |||
7 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
5 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 8 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> |
6 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | 9 | Signed-off-by: Max Chou <max.chou@sifive.com> |
7 | Message-Id: <165449614532.19704.7000832880482980398-13@git.sr.ht> | 10 | Message-ID: <20230711165917.2629866-8-max.chou@sifive.com> |
8 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
9 | --- | 12 | --- |
10 | target/riscv/vector_helper.c | 20 ++++++++++++++++++++ | 13 | target/riscv/vector_internals.h | 46 +++++++++++++++++++++++++++++++++ |
11 | 1 file changed, 20 insertions(+) | 14 | target/riscv/vector_helper.c | 42 ------------------------------ |
15 | 2 files changed, 46 insertions(+), 42 deletions(-) | ||
12 | 16 | ||
17 | diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/riscv/vector_internals.h | ||
20 | +++ b/target/riscv/vector_internals.h | ||
21 | @@ -XXX,XX +XXX,XX @@ void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, | ||
22 | /* expand macro args before macro */ | ||
23 | #define RVVCALL(macro, ...) macro(__VA_ARGS__) | ||
24 | |||
25 | +/* (TD, T2, TX2) */ | ||
26 | +#define OP_UU_B uint8_t, uint8_t, uint8_t | ||
27 | +#define OP_UU_H uint16_t, uint16_t, uint16_t | ||
28 | +#define OP_UU_W uint32_t, uint32_t, uint32_t | ||
29 | +#define OP_UU_D uint64_t, uint64_t, uint64_t | ||
30 | + | ||
31 | /* (TD, T1, T2, TX1, TX2) */ | ||
32 | #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t | ||
33 | #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t | ||
34 | #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t | ||
35 | #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t | ||
36 | |||
37 | +#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ | ||
38 | +static void do_##NAME(void *vd, void *vs2, int i) \ | ||
39 | +{ \ | ||
40 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
41 | + *((TD *)vd + HD(i)) = OP(s2); \ | ||
42 | +} | ||
43 | + | ||
44 | +#define GEN_VEXT_V(NAME, ESZ) \ | ||
45 | +void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
46 | + CPURISCVState *env, uint32_t desc) \ | ||
47 | +{ \ | ||
48 | + uint32_t vm = vext_vm(desc); \ | ||
49 | + uint32_t vl = env->vl; \ | ||
50 | + uint32_t total_elems = \ | ||
51 | + vext_get_total_elems(env, desc, ESZ); \ | ||
52 | + uint32_t vta = vext_vta(desc); \ | ||
53 | + uint32_t vma = vext_vma(desc); \ | ||
54 | + uint32_t i; \ | ||
55 | + \ | ||
56 | + for (i = env->vstart; i < vl; i++) { \ | ||
57 | + if (!vm && !vext_elem_mask(v0, i)) { \ | ||
58 | + /* set masked-off elements to 1s */ \ | ||
59 | + vext_set_elems_1s(vd, vma, i * ESZ, \ | ||
60 | + (i + 1) * ESZ); \ | ||
61 | + continue; \ | ||
62 | + } \ | ||
63 | + do_##NAME(vd, vs2, i); \ | ||
64 | + } \ | ||
65 | + env->vstart = 0; \ | ||
66 | + /* set tail elements to 1s */ \ | ||
67 | + vext_set_elems_1s(vd, vta, vl * ESZ, \ | ||
68 | + total_elems * ESZ); \ | ||
69 | +} | ||
70 | + | ||
71 | /* operation of two vector elements */ | ||
72 | typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); | ||
73 | |||
74 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
75 | do_##NAME, ESZ); \ | ||
76 | } | ||
77 | |||
78 | +/* Three of the widening shortening macros: */ | ||
79 | +/* (TD, T1, T2, TX1, TX2) */ | ||
80 | +#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t | ||
81 | +#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t | ||
82 | +#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t | ||
83 | + | ||
84 | #endif /* TARGET_RISCV_VECTOR_INTERNALS_H */ | ||
13 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 85 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c |
14 | index XXXXXXX..XXXXXXX 100644 | 86 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/riscv/vector_helper.c | 87 | --- a/target/riscv/vector_helper.c |
16 | +++ b/target/riscv/vector_helper.c | 88 | +++ b/target/riscv/vector_helper.c |
17 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | 89 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) |
18 | { \ | 90 | #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t |
19 | uint32_t vm = vext_vm(desc); \ | 91 | #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t |
20 | uint32_t vl = env->vl; \ | 92 | #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t |
21 | + uint32_t esz = sizeof(TD); \ | 93 | -#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t |
22 | + uint32_t vlenb = simd_maxsz(desc); \ | 94 | -#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t |
23 | + uint32_t vta = vext_vta(desc); \ | 95 | -#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t |
24 | uint32_t i; \ | 96 | #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t |
25 | TD s1 = *((TD *)vs1 + HD(0)); \ | 97 | #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t |
26 | \ | 98 | #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t |
27 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | 99 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VF(vfwnmsac_vf_h, 4) |
28 | } \ | 100 | GEN_VEXT_VF(vfwnmsac_vf_w, 8) |
29 | *((TD *)vd + HD(0)) = s1; \ | 101 | |
30 | env->vstart = 0; \ | 102 | /* Vector Floating-Point Square-Root Instruction */ |
31 | + /* set tail elements to 1s */ \ | 103 | -/* (TD, T2, TX2) */ |
32 | + vext_set_elems_1s(vd, vta, esz, vlenb); \ | 104 | -#define OP_UU_H uint16_t, uint16_t, uint16_t |
33 | } | 105 | -#define OP_UU_W uint32_t, uint32_t, uint32_t |
34 | 106 | -#define OP_UU_D uint64_t, uint64_t, uint64_t | |
35 | /* vd[0] = sum(vs1[0], vs2[*]) */ | 107 | - |
36 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | 108 | #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ |
37 | { \ | 109 | static void do_##NAME(void *vd, void *vs2, int i, \ |
38 | uint32_t vm = vext_vm(desc); \ | 110 | CPURISCVState *env) \ |
39 | uint32_t vl = env->vl; \ | 111 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) |
40 | + uint32_t esz = sizeof(TD); \ | 112 | GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) |
41 | + uint32_t vlenb = simd_maxsz(desc); \ | 113 | |
42 | + uint32_t vta = vext_vta(desc); \ | 114 | /* Vector Floating-Point Classify Instruction */ |
43 | uint32_t i; \ | 115 | -#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ |
44 | TD s1 = *((TD *)vs1 + HD(0)); \ | 116 | -static void do_##NAME(void *vd, void *vs2, int i) \ |
45 | \ | 117 | -{ \ |
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | 118 | - TX2 s2 = *((T2 *)vs2 + HS2(i)); \ |
47 | } \ | 119 | - *((TD *)vd + HD(i)) = OP(s2); \ |
48 | *((TD *)vd + HD(0)) = s1; \ | 120 | -} |
49 | env->vstart = 0; \ | 121 | - |
50 | + /* set tail elements to 1s */ \ | 122 | -#define GEN_VEXT_V(NAME, ESZ) \ |
51 | + vext_set_elems_1s(vd, vta, esz, vlenb); \ | 123 | -void HELPER(NAME)(void *vd, void *v0, void *vs2, \ |
52 | } | 124 | - CPURISCVState *env, uint32_t desc) \ |
53 | 125 | -{ \ | |
54 | /* Unordered sum */ | 126 | - uint32_t vm = vext_vm(desc); \ |
55 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, | 127 | - uint32_t vl = env->vl; \ |
128 | - uint32_t total_elems = \ | ||
129 | - vext_get_total_elems(env, desc, ESZ); \ | ||
130 | - uint32_t vta = vext_vta(desc); \ | ||
131 | - uint32_t vma = vext_vma(desc); \ | ||
132 | - uint32_t i; \ | ||
133 | - \ | ||
134 | - for (i = env->vstart; i < vl; i++) { \ | ||
135 | - if (!vm && !vext_elem_mask(v0, i)) { \ | ||
136 | - /* set masked-off elements to 1s */ \ | ||
137 | - vext_set_elems_1s(vd, vma, i * ESZ, \ | ||
138 | - (i + 1) * ESZ); \ | ||
139 | - continue; \ | ||
140 | - } \ | ||
141 | - do_##NAME(vd, vs2, i); \ | ||
142 | - } \ | ||
143 | - env->vstart = 0; \ | ||
144 | - /* set tail elements to 1s */ \ | ||
145 | - vext_set_elems_1s(vd, vta, vl * ESZ, \ | ||
146 | - total_elems * ESZ); \ | ||
147 | -} | ||
148 | - | ||
149 | target_ulong fclass_h(uint64_t frs1) | ||
56 | { | 150 | { |
57 | uint32_t vm = vext_vm(desc); | 151 | float16 f = frs1; |
58 | uint32_t vl = env->vl; | ||
59 | + uint32_t esz = sizeof(uint32_t); | ||
60 | + uint32_t vlenb = simd_maxsz(desc); | ||
61 | + uint32_t vta = vext_vta(desc); | ||
62 | uint32_t i; | ||
63 | uint32_t s1 = *((uint32_t *)vs1 + H4(0)); | ||
64 | |||
65 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, | ||
66 | } | ||
67 | *((uint32_t *)vd + H4(0)) = s1; | ||
68 | env->vstart = 0; | ||
69 | + /* set tail elements to 1s */ | ||
70 | + vext_set_elems_1s(vd, vta, esz, vlenb); | ||
71 | } | ||
72 | |||
73 | void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, | ||
74 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, | ||
75 | { | ||
76 | uint32_t vm = vext_vm(desc); | ||
77 | uint32_t vl = env->vl; | ||
78 | + uint32_t esz = sizeof(uint64_t); | ||
79 | + uint32_t vlenb = simd_maxsz(desc); | ||
80 | + uint32_t vta = vext_vta(desc); | ||
81 | uint32_t i; | ||
82 | uint64_t s1 = *((uint64_t *)vs1); | ||
83 | |||
84 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, | ||
85 | } | ||
86 | *((uint64_t *)vd) = s1; | ||
87 | env->vstart = 0; | ||
88 | + /* set tail elements to 1s */ | ||
89 | + vext_set_elems_1s(vd, vta, esz, vlenb); | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | -- | 152 | -- |
94 | 2.36.1 | 153 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Dickon Hood <dickon.hood@codethink.co.uk> | ||
1 | 2 | ||
3 | This commit adds support for the Zvbb vector-crypto extension, which | ||
4 | consists of the following instructions: | ||
5 | |||
6 | * vrol.[vv,vx] | ||
7 | * vror.[vv,vx,vi] | ||
8 | * vbrev8.v | ||
9 | * vrev8.v | ||
10 | * vandn.[vv,vx] | ||
11 | * vbrev.v | ||
12 | * vclz.v | ||
13 | * vctz.v | ||
14 | * vcpop.v | ||
15 | * vwsll.[vv,vx,vi] | ||
16 | |||
17 | Translation functions are defined in | ||
18 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in | ||
19 | `target/riscv/vcrypto_helper.c`. | ||
20 | |||
21 | Co-authored-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
22 | Co-authored-by: William Salmon <will.salmon@codethink.co.uk> | ||
23 | Co-authored-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
24 | [max.chou@sifive.com: Fix imm mode of vror.vi] | ||
25 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
26 | Signed-off-by: William Salmon <will.salmon@codethink.co.uk> | ||
27 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
28 | Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk> | ||
29 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
30 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
31 | [max.chou@sifive.com: Exposed x-zvbb property] | ||
32 | Message-ID: <20230711165917.2629866-9-max.chou@sifive.com> | ||
33 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
34 | --- | ||
35 | target/riscv/cpu_cfg.h | 1 + | ||
36 | target/riscv/helper.h | 62 +++++++++ | ||
37 | target/riscv/insn32.decode | 20 +++ | ||
38 | target/riscv/cpu.c | 12 ++ | ||
39 | target/riscv/vcrypto_helper.c | 138 +++++++++++++++++++ | ||
40 | target/riscv/insn_trans/trans_rvvk.c.inc | 164 +++++++++++++++++++++++ | ||
41 | 6 files changed, 397 insertions(+) | ||
42 | |||
43 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/target/riscv/cpu_cfg.h | ||
46 | +++ b/target/riscv/cpu_cfg.h | ||
47 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
48 | bool ext_zve32f; | ||
49 | bool ext_zve64f; | ||
50 | bool ext_zve64d; | ||
51 | + bool ext_zvbb; | ||
52 | bool ext_zvbc; | ||
53 | bool ext_zmmul; | ||
54 | bool ext_zvfbfmin; | ||
55 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/riscv/helper.h | ||
58 | +++ b/target/riscv/helper.h | ||
59 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32) | ||
60 | DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32) | ||
61 | DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32) | ||
62 | DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32) | ||
63 | + | ||
64 | +DEF_HELPER_6(vror_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
65 | +DEF_HELPER_6(vror_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
66 | +DEF_HELPER_6(vror_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
67 | +DEF_HELPER_6(vror_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
68 | + | ||
69 | +DEF_HELPER_6(vror_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
70 | +DEF_HELPER_6(vror_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
71 | +DEF_HELPER_6(vror_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
72 | +DEF_HELPER_6(vror_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
73 | + | ||
74 | +DEF_HELPER_6(vrol_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
75 | +DEF_HELPER_6(vrol_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
76 | +DEF_HELPER_6(vrol_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
77 | +DEF_HELPER_6(vrol_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
78 | + | ||
79 | +DEF_HELPER_6(vrol_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
80 | +DEF_HELPER_6(vrol_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
81 | +DEF_HELPER_6(vrol_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
82 | +DEF_HELPER_6(vrol_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
83 | + | ||
84 | +DEF_HELPER_5(vrev8_v_b, void, ptr, ptr, ptr, env, i32) | ||
85 | +DEF_HELPER_5(vrev8_v_h, void, ptr, ptr, ptr, env, i32) | ||
86 | +DEF_HELPER_5(vrev8_v_w, void, ptr, ptr, ptr, env, i32) | ||
87 | +DEF_HELPER_5(vrev8_v_d, void, ptr, ptr, ptr, env, i32) | ||
88 | +DEF_HELPER_5(vbrev8_v_b, void, ptr, ptr, ptr, env, i32) | ||
89 | +DEF_HELPER_5(vbrev8_v_h, void, ptr, ptr, ptr, env, i32) | ||
90 | +DEF_HELPER_5(vbrev8_v_w, void, ptr, ptr, ptr, env, i32) | ||
91 | +DEF_HELPER_5(vbrev8_v_d, void, ptr, ptr, ptr, env, i32) | ||
92 | +DEF_HELPER_5(vbrev_v_b, void, ptr, ptr, ptr, env, i32) | ||
93 | +DEF_HELPER_5(vbrev_v_h, void, ptr, ptr, ptr, env, i32) | ||
94 | +DEF_HELPER_5(vbrev_v_w, void, ptr, ptr, ptr, env, i32) | ||
95 | +DEF_HELPER_5(vbrev_v_d, void, ptr, ptr, ptr, env, i32) | ||
96 | + | ||
97 | +DEF_HELPER_5(vclz_v_b, void, ptr, ptr, ptr, env, i32) | ||
98 | +DEF_HELPER_5(vclz_v_h, void, ptr, ptr, ptr, env, i32) | ||
99 | +DEF_HELPER_5(vclz_v_w, void, ptr, ptr, ptr, env, i32) | ||
100 | +DEF_HELPER_5(vclz_v_d, void, ptr, ptr, ptr, env, i32) | ||
101 | +DEF_HELPER_5(vctz_v_b, void, ptr, ptr, ptr, env, i32) | ||
102 | +DEF_HELPER_5(vctz_v_h, void, ptr, ptr, ptr, env, i32) | ||
103 | +DEF_HELPER_5(vctz_v_w, void, ptr, ptr, ptr, env, i32) | ||
104 | +DEF_HELPER_5(vctz_v_d, void, ptr, ptr, ptr, env, i32) | ||
105 | +DEF_HELPER_5(vcpop_v_b, void, ptr, ptr, ptr, env, i32) | ||
106 | +DEF_HELPER_5(vcpop_v_h, void, ptr, ptr, ptr, env, i32) | ||
107 | +DEF_HELPER_5(vcpop_v_w, void, ptr, ptr, ptr, env, i32) | ||
108 | +DEF_HELPER_5(vcpop_v_d, void, ptr, ptr, ptr, env, i32) | ||
109 | + | ||
110 | +DEF_HELPER_6(vwsll_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
111 | +DEF_HELPER_6(vwsll_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
112 | +DEF_HELPER_6(vwsll_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
113 | +DEF_HELPER_6(vwsll_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
114 | +DEF_HELPER_6(vwsll_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
115 | +DEF_HELPER_6(vwsll_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
116 | + | ||
117 | +DEF_HELPER_6(vandn_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
118 | +DEF_HELPER_6(vandn_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
119 | +DEF_HELPER_6(vandn_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
120 | +DEF_HELPER_6(vandn_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
121 | +DEF_HELPER_6(vandn_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
122 | +DEF_HELPER_6(vandn_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
123 | +DEF_HELPER_6(vandn_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
124 | +DEF_HELPER_6(vandn_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
125 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
126 | index XXXXXXX..XXXXXXX 100644 | ||
127 | --- a/target/riscv/insn32.decode | ||
128 | +++ b/target/riscv/insn32.decode | ||
129 | @@ -XXX,XX +XXX,XX @@ | ||
130 | %imm_u 12:s20 !function=ex_shift_12 | ||
131 | %imm_bs 30:2 !function=ex_shift_3 | ||
132 | %imm_rnum 20:4 | ||
133 | +%imm_z6 26:1 15:5 | ||
134 | |||
135 | # Argument sets: | ||
136 | &empty | ||
137 | @@ -XXX,XX +XXX,XX @@ | ||
138 | @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd | ||
139 | @r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd | ||
140 | @r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd | ||
141 | +@r2_zimm6 ..... . vm:1 ..... ..... ... ..... ....... &rmrr %rs2 rs1=%imm_z6 %rd | ||
142 | @r2_zimm11 . zimm:11 ..... ... ..... ....... %rs1 %rd | ||
143 | @r2_zimm10 .. zimm:10 ..... ... ..... ....... %rs1 %rd | ||
144 | @r2_s ....... ..... ..... ... ..... ....... %rs2 %rs1 | ||
145 | @@ -XXX,XX +XXX,XX @@ vclmul_vv 001100 . ..... ..... 010 ..... 1010111 @r_vm | ||
146 | vclmul_vx 001100 . ..... ..... 110 ..... 1010111 @r_vm | ||
147 | vclmulh_vv 001101 . ..... ..... 010 ..... 1010111 @r_vm | ||
148 | vclmulh_vx 001101 . ..... ..... 110 ..... 1010111 @r_vm | ||
149 | + | ||
150 | +# *** Zvbb vector crypto extension *** | ||
151 | +vrol_vv 010101 . ..... ..... 000 ..... 1010111 @r_vm | ||
152 | +vrol_vx 010101 . ..... ..... 100 ..... 1010111 @r_vm | ||
153 | +vror_vv 010100 . ..... ..... 000 ..... 1010111 @r_vm | ||
154 | +vror_vx 010100 . ..... ..... 100 ..... 1010111 @r_vm | ||
155 | +vror_vi 01010. . ..... ..... 011 ..... 1010111 @r2_zimm6 | ||
156 | +vbrev8_v 010010 . ..... 01000 010 ..... 1010111 @r2_vm | ||
157 | +vrev8_v 010010 . ..... 01001 010 ..... 1010111 @r2_vm | ||
158 | +vandn_vv 000001 . ..... ..... 000 ..... 1010111 @r_vm | ||
159 | +vandn_vx 000001 . ..... ..... 100 ..... 1010111 @r_vm | ||
160 | +vbrev_v 010010 . ..... 01010 010 ..... 1010111 @r2_vm | ||
161 | +vclz_v 010010 . ..... 01100 010 ..... 1010111 @r2_vm | ||
162 | +vctz_v 010010 . ..... 01101 010 ..... 1010111 @r2_vm | ||
163 | +vcpop_v 010010 . ..... 01110 010 ..... 1010111 @r2_vm | ||
164 | +vwsll_vv 110101 . ..... ..... 000 ..... 1010111 @r_vm | ||
165 | +vwsll_vx 110101 . ..... ..... 100 ..... 1010111 @r_vm | ||
166 | +vwsll_vi 110101 . ..... ..... 011 ..... 1010111 @r_vm | ||
167 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
168 | index XXXXXXX..XXXXXXX 100644 | ||
169 | --- a/target/riscv/cpu.c | ||
170 | +++ b/target/riscv/cpu.c | ||
171 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { | ||
172 | ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed), | ||
173 | ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh), | ||
174 | ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt), | ||
175 | + ISA_EXT_DATA_ENTRY(zvbb, PRIV_VERSION_1_12_0, ext_zvbb), | ||
176 | ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc), | ||
177 | ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f), | ||
178 | ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f), | ||
179 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
180 | return; | ||
181 | } | ||
182 | |||
183 | + /* | ||
184 | + * In principle Zve*x would also suffice here, were they supported | ||
185 | + * in qemu | ||
186 | + */ | ||
187 | + if (cpu->cfg.ext_zvbb && !cpu->cfg.ext_zve32f) { | ||
188 | + error_setg(errp, | ||
189 | + "Vector crypto extensions require V or Zve* extensions"); | ||
190 | + return; | ||
191 | + } | ||
192 | + | ||
193 | if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) { | ||
194 | error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions"); | ||
195 | return; | ||
196 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
197 | DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false), | ||
198 | |||
199 | /* Vector cryptography extensions */ | ||
200 | + DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), | ||
201 | DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), | ||
202 | |||
203 | DEFINE_PROP_END_OF_LIST(), | ||
204 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c | ||
205 | index XXXXXXX..XXXXXXX 100644 | ||
206 | --- a/target/riscv/vcrypto_helper.c | ||
207 | +++ b/target/riscv/vcrypto_helper.c | ||
208 | @@ -XXX,XX +XXX,XX @@ | ||
209 | #include "qemu/osdep.h" | ||
210 | #include "qemu/host-utils.h" | ||
211 | #include "qemu/bitops.h" | ||
212 | +#include "qemu/bswap.h" | ||
213 | #include "cpu.h" | ||
214 | #include "exec/memop.h" | ||
215 | #include "exec/exec-all.h" | ||
216 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vclmulh_vv, OP_UUU_D, H8, H8, H8, clmulh64) | ||
217 | GEN_VEXT_VV(vclmulh_vv, 8) | ||
218 | RVVCALL(OPIVX2, vclmulh_vx, OP_UUU_D, H8, H8, clmulh64) | ||
219 | GEN_VEXT_VX(vclmulh_vx, 8) | ||
220 | + | ||
221 | +RVVCALL(OPIVV2, vror_vv_b, OP_UUU_B, H1, H1, H1, ror8) | ||
222 | +RVVCALL(OPIVV2, vror_vv_h, OP_UUU_H, H2, H2, H2, ror16) | ||
223 | +RVVCALL(OPIVV2, vror_vv_w, OP_UUU_W, H4, H4, H4, ror32) | ||
224 | +RVVCALL(OPIVV2, vror_vv_d, OP_UUU_D, H8, H8, H8, ror64) | ||
225 | +GEN_VEXT_VV(vror_vv_b, 1) | ||
226 | +GEN_VEXT_VV(vror_vv_h, 2) | ||
227 | +GEN_VEXT_VV(vror_vv_w, 4) | ||
228 | +GEN_VEXT_VV(vror_vv_d, 8) | ||
229 | + | ||
230 | +RVVCALL(OPIVX2, vror_vx_b, OP_UUU_B, H1, H1, ror8) | ||
231 | +RVVCALL(OPIVX2, vror_vx_h, OP_UUU_H, H2, H2, ror16) | ||
232 | +RVVCALL(OPIVX2, vror_vx_w, OP_UUU_W, H4, H4, ror32) | ||
233 | +RVVCALL(OPIVX2, vror_vx_d, OP_UUU_D, H8, H8, ror64) | ||
234 | +GEN_VEXT_VX(vror_vx_b, 1) | ||
235 | +GEN_VEXT_VX(vror_vx_h, 2) | ||
236 | +GEN_VEXT_VX(vror_vx_w, 4) | ||
237 | +GEN_VEXT_VX(vror_vx_d, 8) | ||
238 | + | ||
239 | +RVVCALL(OPIVV2, vrol_vv_b, OP_UUU_B, H1, H1, H1, rol8) | ||
240 | +RVVCALL(OPIVV2, vrol_vv_h, OP_UUU_H, H2, H2, H2, rol16) | ||
241 | +RVVCALL(OPIVV2, vrol_vv_w, OP_UUU_W, H4, H4, H4, rol32) | ||
242 | +RVVCALL(OPIVV2, vrol_vv_d, OP_UUU_D, H8, H8, H8, rol64) | ||
243 | +GEN_VEXT_VV(vrol_vv_b, 1) | ||
244 | +GEN_VEXT_VV(vrol_vv_h, 2) | ||
245 | +GEN_VEXT_VV(vrol_vv_w, 4) | ||
246 | +GEN_VEXT_VV(vrol_vv_d, 8) | ||
247 | + | ||
248 | +RVVCALL(OPIVX2, vrol_vx_b, OP_UUU_B, H1, H1, rol8) | ||
249 | +RVVCALL(OPIVX2, vrol_vx_h, OP_UUU_H, H2, H2, rol16) | ||
250 | +RVVCALL(OPIVX2, vrol_vx_w, OP_UUU_W, H4, H4, rol32) | ||
251 | +RVVCALL(OPIVX2, vrol_vx_d, OP_UUU_D, H8, H8, rol64) | ||
252 | +GEN_VEXT_VX(vrol_vx_b, 1) | ||
253 | +GEN_VEXT_VX(vrol_vx_h, 2) | ||
254 | +GEN_VEXT_VX(vrol_vx_w, 4) | ||
255 | +GEN_VEXT_VX(vrol_vx_d, 8) | ||
256 | + | ||
257 | +static uint64_t brev8(uint64_t val) | ||
258 | +{ | ||
259 | + val = ((val & 0x5555555555555555ull) << 1) | | ||
260 | + ((val & 0xAAAAAAAAAAAAAAAAull) >> 1); | ||
261 | + val = ((val & 0x3333333333333333ull) << 2) | | ||
262 | + ((val & 0xCCCCCCCCCCCCCCCCull) >> 2); | ||
263 | + val = ((val & 0x0F0F0F0F0F0F0F0Full) << 4) | | ||
264 | + ((val & 0xF0F0F0F0F0F0F0F0ull) >> 4); | ||
265 | + | ||
266 | + return val; | ||
267 | +} | ||
268 | + | ||
269 | +RVVCALL(OPIVV1, vbrev8_v_b, OP_UU_B, H1, H1, brev8) | ||
270 | +RVVCALL(OPIVV1, vbrev8_v_h, OP_UU_H, H2, H2, brev8) | ||
271 | +RVVCALL(OPIVV1, vbrev8_v_w, OP_UU_W, H4, H4, brev8) | ||
272 | +RVVCALL(OPIVV1, vbrev8_v_d, OP_UU_D, H8, H8, brev8) | ||
273 | +GEN_VEXT_V(vbrev8_v_b, 1) | ||
274 | +GEN_VEXT_V(vbrev8_v_h, 2) | ||
275 | +GEN_VEXT_V(vbrev8_v_w, 4) | ||
276 | +GEN_VEXT_V(vbrev8_v_d, 8) | ||
277 | + | ||
278 | +#define DO_IDENTITY(a) (a) | ||
279 | +RVVCALL(OPIVV1, vrev8_v_b, OP_UU_B, H1, H1, DO_IDENTITY) | ||
280 | +RVVCALL(OPIVV1, vrev8_v_h, OP_UU_H, H2, H2, bswap16) | ||
281 | +RVVCALL(OPIVV1, vrev8_v_w, OP_UU_W, H4, H4, bswap32) | ||
282 | +RVVCALL(OPIVV1, vrev8_v_d, OP_UU_D, H8, H8, bswap64) | ||
283 | +GEN_VEXT_V(vrev8_v_b, 1) | ||
284 | +GEN_VEXT_V(vrev8_v_h, 2) | ||
285 | +GEN_VEXT_V(vrev8_v_w, 4) | ||
286 | +GEN_VEXT_V(vrev8_v_d, 8) | ||
287 | + | ||
288 | +#define DO_ANDN(a, b) ((a) & ~(b)) | ||
289 | +RVVCALL(OPIVV2, vandn_vv_b, OP_UUU_B, H1, H1, H1, DO_ANDN) | ||
290 | +RVVCALL(OPIVV2, vandn_vv_h, OP_UUU_H, H2, H2, H2, DO_ANDN) | ||
291 | +RVVCALL(OPIVV2, vandn_vv_w, OP_UUU_W, H4, H4, H4, DO_ANDN) | ||
292 | +RVVCALL(OPIVV2, vandn_vv_d, OP_UUU_D, H8, H8, H8, DO_ANDN) | ||
293 | +GEN_VEXT_VV(vandn_vv_b, 1) | ||
294 | +GEN_VEXT_VV(vandn_vv_h, 2) | ||
295 | +GEN_VEXT_VV(vandn_vv_w, 4) | ||
296 | +GEN_VEXT_VV(vandn_vv_d, 8) | ||
297 | + | ||
298 | +RVVCALL(OPIVX2, vandn_vx_b, OP_UUU_B, H1, H1, DO_ANDN) | ||
299 | +RVVCALL(OPIVX2, vandn_vx_h, OP_UUU_H, H2, H2, DO_ANDN) | ||
300 | +RVVCALL(OPIVX2, vandn_vx_w, OP_UUU_W, H4, H4, DO_ANDN) | ||
301 | +RVVCALL(OPIVX2, vandn_vx_d, OP_UUU_D, H8, H8, DO_ANDN) | ||
302 | +GEN_VEXT_VX(vandn_vx_b, 1) | ||
303 | +GEN_VEXT_VX(vandn_vx_h, 2) | ||
304 | +GEN_VEXT_VX(vandn_vx_w, 4) | ||
305 | +GEN_VEXT_VX(vandn_vx_d, 8) | ||
306 | + | ||
307 | +RVVCALL(OPIVV1, vbrev_v_b, OP_UU_B, H1, H1, revbit8) | ||
308 | +RVVCALL(OPIVV1, vbrev_v_h, OP_UU_H, H2, H2, revbit16) | ||
309 | +RVVCALL(OPIVV1, vbrev_v_w, OP_UU_W, H4, H4, revbit32) | ||
310 | +RVVCALL(OPIVV1, vbrev_v_d, OP_UU_D, H8, H8, revbit64) | ||
311 | +GEN_VEXT_V(vbrev_v_b, 1) | ||
312 | +GEN_VEXT_V(vbrev_v_h, 2) | ||
313 | +GEN_VEXT_V(vbrev_v_w, 4) | ||
314 | +GEN_VEXT_V(vbrev_v_d, 8) | ||
315 | + | ||
316 | +RVVCALL(OPIVV1, vclz_v_b, OP_UU_B, H1, H1, clz8) | ||
317 | +RVVCALL(OPIVV1, vclz_v_h, OP_UU_H, H2, H2, clz16) | ||
318 | +RVVCALL(OPIVV1, vclz_v_w, OP_UU_W, H4, H4, clz32) | ||
319 | +RVVCALL(OPIVV1, vclz_v_d, OP_UU_D, H8, H8, clz64) | ||
320 | +GEN_VEXT_V(vclz_v_b, 1) | ||
321 | +GEN_VEXT_V(vclz_v_h, 2) | ||
322 | +GEN_VEXT_V(vclz_v_w, 4) | ||
323 | +GEN_VEXT_V(vclz_v_d, 8) | ||
324 | + | ||
325 | +RVVCALL(OPIVV1, vctz_v_b, OP_UU_B, H1, H1, ctz8) | ||
326 | +RVVCALL(OPIVV1, vctz_v_h, OP_UU_H, H2, H2, ctz16) | ||
327 | +RVVCALL(OPIVV1, vctz_v_w, OP_UU_W, H4, H4, ctz32) | ||
328 | +RVVCALL(OPIVV1, vctz_v_d, OP_UU_D, H8, H8, ctz64) | ||
329 | +GEN_VEXT_V(vctz_v_b, 1) | ||
330 | +GEN_VEXT_V(vctz_v_h, 2) | ||
331 | +GEN_VEXT_V(vctz_v_w, 4) | ||
332 | +GEN_VEXT_V(vctz_v_d, 8) | ||
333 | + | ||
334 | +RVVCALL(OPIVV1, vcpop_v_b, OP_UU_B, H1, H1, ctpop8) | ||
335 | +RVVCALL(OPIVV1, vcpop_v_h, OP_UU_H, H2, H2, ctpop16) | ||
336 | +RVVCALL(OPIVV1, vcpop_v_w, OP_UU_W, H4, H4, ctpop32) | ||
337 | +RVVCALL(OPIVV1, vcpop_v_d, OP_UU_D, H8, H8, ctpop64) | ||
338 | +GEN_VEXT_V(vcpop_v_b, 1) | ||
339 | +GEN_VEXT_V(vcpop_v_h, 2) | ||
340 | +GEN_VEXT_V(vcpop_v_w, 4) | ||
341 | +GEN_VEXT_V(vcpop_v_d, 8) | ||
342 | + | ||
343 | +#define DO_SLL(N, M) (N << (M & (sizeof(N) * 8 - 1))) | ||
344 | +RVVCALL(OPIVV2, vwsll_vv_b, WOP_UUU_B, H2, H1, H1, DO_SLL) | ||
345 | +RVVCALL(OPIVV2, vwsll_vv_h, WOP_UUU_H, H4, H2, H2, DO_SLL) | ||
346 | +RVVCALL(OPIVV2, vwsll_vv_w, WOP_UUU_W, H8, H4, H4, DO_SLL) | ||
347 | +GEN_VEXT_VV(vwsll_vv_b, 2) | ||
348 | +GEN_VEXT_VV(vwsll_vv_h, 4) | ||
349 | +GEN_VEXT_VV(vwsll_vv_w, 8) | ||
350 | + | ||
351 | +RVVCALL(OPIVX2, vwsll_vx_b, WOP_UUU_B, H2, H1, DO_SLL) | ||
352 | +RVVCALL(OPIVX2, vwsll_vx_h, WOP_UUU_H, H4, H2, DO_SLL) | ||
353 | +RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4, DO_SLL) | ||
354 | +GEN_VEXT_VX(vwsll_vx_b, 2) | ||
355 | +GEN_VEXT_VX(vwsll_vx_h, 4) | ||
356 | +GEN_VEXT_VX(vwsll_vx_w, 8) | ||
357 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
358 | index XXXXXXX..XXXXXXX 100644 | ||
359 | --- a/target/riscv/insn_trans/trans_rvvk.c.inc | ||
360 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
361 | @@ -XXX,XX +XXX,XX @@ static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a) | ||
362 | |||
363 | GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check) | ||
364 | GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check) | ||
365 | + | ||
366 | +/* | ||
367 | + * Zvbb | ||
368 | + */ | ||
369 | + | ||
370 | +#define GEN_OPIVI_GVEC_TRANS_CHECK(NAME, IMM_MODE, OPIVX, SUF, CHECK) \ | ||
371 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
372 | + { \ | ||
373 | + if (CHECK(s, a)) { \ | ||
374 | + static gen_helper_opivx *const fns[4] = { \ | ||
375 | + gen_helper_##OPIVX##_b, \ | ||
376 | + gen_helper_##OPIVX##_h, \ | ||
377 | + gen_helper_##OPIVX##_w, \ | ||
378 | + gen_helper_##OPIVX##_d, \ | ||
379 | + }; \ | ||
380 | + return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew], \ | ||
381 | + IMM_MODE); \ | ||
382 | + } \ | ||
383 | + return false; \ | ||
384 | + } | ||
385 | + | ||
386 | +#define GEN_OPIVV_GVEC_TRANS_CHECK(NAME, SUF, CHECK) \ | ||
387 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
388 | + { \ | ||
389 | + if (CHECK(s, a)) { \ | ||
390 | + static gen_helper_gvec_4_ptr *const fns[4] = { \ | ||
391 | + gen_helper_##NAME##_b, \ | ||
392 | + gen_helper_##NAME##_h, \ | ||
393 | + gen_helper_##NAME##_w, \ | ||
394 | + gen_helper_##NAME##_d, \ | ||
395 | + }; \ | ||
396 | + return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ | ||
397 | + } \ | ||
398 | + return false; \ | ||
399 | + } | ||
400 | + | ||
401 | +#define GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(NAME, SUF, CHECK) \ | ||
402 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
403 | + { \ | ||
404 | + if (CHECK(s, a)) { \ | ||
405 | + static gen_helper_opivx *const fns[4] = { \ | ||
406 | + gen_helper_##NAME##_b, \ | ||
407 | + gen_helper_##NAME##_h, \ | ||
408 | + gen_helper_##NAME##_w, \ | ||
409 | + gen_helper_##NAME##_d, \ | ||
410 | + }; \ | ||
411 | + return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, \ | ||
412 | + fns[s->sew]); \ | ||
413 | + } \ | ||
414 | + return false; \ | ||
415 | + } | ||
416 | + | ||
417 | +static bool zvbb_vv_check(DisasContext *s, arg_rmrr *a) | ||
418 | +{ | ||
419 | + return opivv_check(s, a) && s->cfg_ptr->ext_zvbb == true; | ||
420 | +} | ||
421 | + | ||
422 | +static bool zvbb_vx_check(DisasContext *s, arg_rmrr *a) | ||
423 | +{ | ||
424 | + return opivx_check(s, a) && s->cfg_ptr->ext_zvbb == true; | ||
425 | +} | ||
426 | + | ||
427 | +/* vrol.v[vx] */ | ||
428 | +GEN_OPIVV_GVEC_TRANS_CHECK(vrol_vv, rotlv, zvbb_vv_check) | ||
429 | +GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vrol_vx, rotls, zvbb_vx_check) | ||
430 | + | ||
431 | +/* vror.v[vxi] */ | ||
432 | +GEN_OPIVV_GVEC_TRANS_CHECK(vror_vv, rotrv, zvbb_vv_check) | ||
433 | +GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vror_vx, rotrs, zvbb_vx_check) | ||
434 | +GEN_OPIVI_GVEC_TRANS_CHECK(vror_vi, IMM_TRUNC_SEW, vror_vx, rotri, zvbb_vx_check) | ||
435 | + | ||
436 | +#define GEN_OPIVX_GVEC_TRANS_CHECK(NAME, SUF, CHECK) \ | ||
437 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
438 | + { \ | ||
439 | + if (CHECK(s, a)) { \ | ||
440 | + static gen_helper_opivx *const fns[4] = { \ | ||
441 | + gen_helper_##NAME##_b, \ | ||
442 | + gen_helper_##NAME##_h, \ | ||
443 | + gen_helper_##NAME##_w, \ | ||
444 | + gen_helper_##NAME##_d, \ | ||
445 | + }; \ | ||
446 | + return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ | ||
447 | + } \ | ||
448 | + return false; \ | ||
449 | + } | ||
450 | + | ||
451 | +/* vandn.v[vx] */ | ||
452 | +GEN_OPIVV_GVEC_TRANS_CHECK(vandn_vv, andc, zvbb_vv_check) | ||
453 | +GEN_OPIVX_GVEC_TRANS_CHECK(vandn_vx, andcs, zvbb_vx_check) | ||
454 | + | ||
455 | +#define GEN_OPIV_TRANS(NAME, CHECK) \ | ||
456 | + static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
457 | + { \ | ||
458 | + if (CHECK(s, a)) { \ | ||
459 | + uint32_t data = 0; \ | ||
460 | + static gen_helper_gvec_3_ptr *const fns[4] = { \ | ||
461 | + gen_helper_##NAME##_b, \ | ||
462 | + gen_helper_##NAME##_h, \ | ||
463 | + gen_helper_##NAME##_w, \ | ||
464 | + gen_helper_##NAME##_d, \ | ||
465 | + }; \ | ||
466 | + TCGLabel *over = gen_new_label(); \ | ||
467 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
468 | + \ | ||
469 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
470 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
471 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
472 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ | ||
473 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ | ||
474 | + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
475 | + vreg_ofs(s, a->rs2), cpu_env, \ | ||
476 | + s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, \ | ||
477 | + data, fns[s->sew]); \ | ||
478 | + mark_vs_dirty(s); \ | ||
479 | + gen_set_label(over); \ | ||
480 | + return true; \ | ||
481 | + } \ | ||
482 | + return false; \ | ||
483 | + } | ||
484 | + | ||
485 | +static bool zvbb_opiv_check(DisasContext *s, arg_rmr *a) | ||
486 | +{ | ||
487 | + return s->cfg_ptr->ext_zvbb == true && | ||
488 | + require_rvv(s) && | ||
489 | + vext_check_isa_ill(s) && | ||
490 | + vext_check_ss(s, a->rd, a->rs2, a->vm); | ||
491 | +} | ||
492 | + | ||
493 | +GEN_OPIV_TRANS(vbrev8_v, zvbb_opiv_check) | ||
494 | +GEN_OPIV_TRANS(vrev8_v, zvbb_opiv_check) | ||
495 | +GEN_OPIV_TRANS(vbrev_v, zvbb_opiv_check) | ||
496 | +GEN_OPIV_TRANS(vclz_v, zvbb_opiv_check) | ||
497 | +GEN_OPIV_TRANS(vctz_v, zvbb_opiv_check) | ||
498 | +GEN_OPIV_TRANS(vcpop_v, zvbb_opiv_check) | ||
499 | + | ||
500 | +static bool vwsll_vv_check(DisasContext *s, arg_rmrr *a) | ||
501 | +{ | ||
502 | + return s->cfg_ptr->ext_zvbb && opivv_widen_check(s, a); | ||
503 | +} | ||
504 | + | ||
505 | +static bool vwsll_vx_check(DisasContext *s, arg_rmrr *a) | ||
506 | +{ | ||
507 | + return s->cfg_ptr->ext_zvbb && opivx_widen_check(s, a); | ||
508 | +} | ||
509 | + | ||
510 | +/* OPIVI without GVEC IR */ | ||
511 | +#define GEN_OPIVI_WIDEN_TRANS(NAME, IMM_MODE, OPIVX, CHECK) \ | ||
512 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
513 | + { \ | ||
514 | + if (CHECK(s, a)) { \ | ||
515 | + static gen_helper_opivx *const fns[3] = { \ | ||
516 | + gen_helper_##OPIVX##_b, \ | ||
517 | + gen_helper_##OPIVX##_h, \ | ||
518 | + gen_helper_##OPIVX##_w, \ | ||
519 | + }; \ | ||
520 | + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, \ | ||
521 | + IMM_MODE); \ | ||
522 | + } \ | ||
523 | + return false; \ | ||
524 | + } | ||
525 | + | ||
526 | +GEN_OPIVV_WIDEN_TRANS(vwsll_vv, vwsll_vv_check) | ||
527 | +GEN_OPIVX_WIDEN_TRANS(vwsll_vx, vwsll_vx_check) | ||
528 | +GEN_OPIVI_WIDEN_TRANS(vwsll_vi, IMM_ZX, vwsll_vx, vwsll_vx_check) | ||
529 | -- | ||
530 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
1 | 2 | ||
3 | This commit adds support for the Zvkned vector-crypto extension, which | ||
4 | consists of the following instructions: | ||
5 | |||
6 | * vaesef.[vv,vs] | ||
7 | * vaesdf.[vv,vs] | ||
8 | * vaesdm.[vv,vs] | ||
9 | * vaesz.vs | ||
10 | * vaesem.[vv,vs] | ||
11 | * vaeskf1.vi | ||
12 | * vaeskf2.vi | ||
13 | |||
14 | Translation functions are defined in | ||
15 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in | ||
16 | `target/riscv/vcrypto_helper.c`. | ||
17 | |||
18 | Co-authored-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
19 | Co-authored-by: William Salmon <will.salmon@codethink.co.uk> | ||
20 | [max.chou@sifive.com: Replaced vstart checking by TCG op] | ||
21 | Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
22 | Signed-off-by: William Salmon <will.salmon@codethink.co.uk> | ||
23 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
24 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
25 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
26 | [max.chou@sifive.com: Imported aes-round.h and exposed x-zvkned | ||
27 | property] | ||
28 | [max.chou@sifive.com: Fixed endian issues and replaced the vstart & vl | ||
29 | egs checking by helper function] | ||
30 | [max.chou@sifive.com: Replaced bswap32 calls in aes key expanding] | ||
31 | Message-ID: <20230711165917.2629866-10-max.chou@sifive.com> | ||
32 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
33 | --- | ||
34 | target/riscv/cpu_cfg.h | 1 + | ||
35 | target/riscv/helper.h | 14 ++ | ||
36 | target/riscv/insn32.decode | 14 ++ | ||
37 | target/riscv/cpu.c | 4 +- | ||
38 | target/riscv/vcrypto_helper.c | 202 +++++++++++++++++++++++ | ||
39 | target/riscv/insn_trans/trans_rvvk.c.inc | 147 +++++++++++++++++ | ||
40 | 6 files changed, 381 insertions(+), 1 deletion(-) | ||
41 | |||
42 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/riscv/cpu_cfg.h | ||
45 | +++ b/target/riscv/cpu_cfg.h | ||
46 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
47 | bool ext_zve64d; | ||
48 | bool ext_zvbb; | ||
49 | bool ext_zvbc; | ||
50 | + bool ext_zvkned; | ||
51 | bool ext_zmmul; | ||
52 | bool ext_zvfbfmin; | ||
53 | bool ext_zvfbfwma; | ||
54 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/target/riscv/helper.h | ||
57 | +++ b/target/riscv/helper.h | ||
58 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vandn_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
59 | DEF_HELPER_6(vandn_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
60 | DEF_HELPER_6(vandn_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
61 | DEF_HELPER_6(vandn_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
62 | + | ||
63 | +DEF_HELPER_2(egs_check, void, i32, env) | ||
64 | + | ||
65 | +DEF_HELPER_4(vaesef_vv, void, ptr, ptr, env, i32) | ||
66 | +DEF_HELPER_4(vaesef_vs, void, ptr, ptr, env, i32) | ||
67 | +DEF_HELPER_4(vaesdf_vv, void, ptr, ptr, env, i32) | ||
68 | +DEF_HELPER_4(vaesdf_vs, void, ptr, ptr, env, i32) | ||
69 | +DEF_HELPER_4(vaesem_vv, void, ptr, ptr, env, i32) | ||
70 | +DEF_HELPER_4(vaesem_vs, void, ptr, ptr, env, i32) | ||
71 | +DEF_HELPER_4(vaesdm_vv, void, ptr, ptr, env, i32) | ||
72 | +DEF_HELPER_4(vaesdm_vs, void, ptr, ptr, env, i32) | ||
73 | +DEF_HELPER_4(vaesz_vs, void, ptr, ptr, env, i32) | ||
74 | +DEF_HELPER_5(vaeskf1_vi, void, ptr, ptr, i32, env, i32) | ||
75 | +DEF_HELPER_5(vaeskf2_vi, void, ptr, ptr, i32, env, i32) | ||
76 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/target/riscv/insn32.decode | ||
79 | +++ b/target/riscv/insn32.decode | ||
80 | @@ -XXX,XX +XXX,XX @@ | ||
81 | @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd | ||
82 | @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd | ||
83 | @r2 ....... ..... ..... ... ..... ....... &r2 %rs1 %rd | ||
84 | +@r2_vm_1 ...... . ..... ..... ... ..... ....... &rmr vm=1 %rs2 %rd | ||
85 | @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd | ||
86 | @r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd | ||
87 | @r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd | ||
88 | @@ -XXX,XX +XXX,XX @@ vcpop_v 010010 . ..... 01110 010 ..... 1010111 @r2_vm | ||
89 | vwsll_vv 110101 . ..... ..... 000 ..... 1010111 @r_vm | ||
90 | vwsll_vx 110101 . ..... ..... 100 ..... 1010111 @r_vm | ||
91 | vwsll_vi 110101 . ..... ..... 011 ..... 1010111 @r_vm | ||
92 | + | ||
93 | +# *** Zvkned vector crypto extension *** | ||
94 | +vaesef_vv 101000 1 ..... 00011 010 ..... 1110111 @r2_vm_1 | ||
95 | +vaesef_vs 101001 1 ..... 00011 010 ..... 1110111 @r2_vm_1 | ||
96 | +vaesdf_vv 101000 1 ..... 00001 010 ..... 1110111 @r2_vm_1 | ||
97 | +vaesdf_vs 101001 1 ..... 00001 010 ..... 1110111 @r2_vm_1 | ||
98 | +vaesem_vv 101000 1 ..... 00010 010 ..... 1110111 @r2_vm_1 | ||
99 | +vaesem_vs 101001 1 ..... 00010 010 ..... 1110111 @r2_vm_1 | ||
100 | +vaesdm_vv 101000 1 ..... 00000 010 ..... 1110111 @r2_vm_1 | ||
101 | +vaesdm_vs 101001 1 ..... 00000 010 ..... 1110111 @r2_vm_1 | ||
102 | +vaesz_vs 101001 1 ..... 00111 010 ..... 1110111 @r2_vm_1 | ||
103 | +vaeskf1_vi 100010 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
104 | +vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
105 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/target/riscv/cpu.c | ||
108 | +++ b/target/riscv/cpu.c | ||
109 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { | ||
110 | ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma), | ||
111 | ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh), | ||
112 | ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin), | ||
113 | + ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), | ||
114 | ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), | ||
115 | ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), | ||
116 | ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia), | ||
117 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
118 | * In principle Zve*x would also suffice here, were they supported | ||
119 | * in qemu | ||
120 | */ | ||
121 | - if (cpu->cfg.ext_zvbb && !cpu->cfg.ext_zve32f) { | ||
122 | + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned) && !cpu->cfg.ext_zve32f) { | ||
123 | error_setg(errp, | ||
124 | "Vector crypto extensions require V or Zve* extensions"); | ||
125 | return; | ||
126 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
127 | /* Vector cryptography extensions */ | ||
128 | DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), | ||
129 | DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), | ||
130 | + DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), | ||
131 | |||
132 | DEFINE_PROP_END_OF_LIST(), | ||
133 | }; | ||
134 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c | ||
135 | index XXXXXXX..XXXXXXX 100644 | ||
136 | --- a/target/riscv/vcrypto_helper.c | ||
137 | +++ b/target/riscv/vcrypto_helper.c | ||
138 | @@ -XXX,XX +XXX,XX @@ | ||
139 | #include "qemu/bitops.h" | ||
140 | #include "qemu/bswap.h" | ||
141 | #include "cpu.h" | ||
142 | +#include "crypto/aes.h" | ||
143 | +#include "crypto/aes-round.h" | ||
144 | #include "exec/memop.h" | ||
145 | #include "exec/exec-all.h" | ||
146 | #include "exec/helper-proto.h" | ||
147 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4, DO_SLL) | ||
148 | GEN_VEXT_VX(vwsll_vx_b, 2) | ||
149 | GEN_VEXT_VX(vwsll_vx_h, 4) | ||
150 | GEN_VEXT_VX(vwsll_vx_w, 8) | ||
151 | + | ||
152 | +void HELPER(egs_check)(uint32_t egs, CPURISCVState *env) | ||
153 | +{ | ||
154 | + uint32_t vl = env->vl; | ||
155 | + uint32_t vstart = env->vstart; | ||
156 | + | ||
157 | + if (vl % egs != 0 || vstart % egs != 0) { | ||
158 | + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); | ||
159 | + } | ||
160 | +} | ||
161 | + | ||
162 | +static inline void xor_round_key(AESState *round_state, AESState *round_key) | ||
163 | +{ | ||
164 | + round_state->v = round_state->v ^ round_key->v; | ||
165 | +} | ||
166 | + | ||
167 | +#define GEN_ZVKNED_HELPER_VV(NAME, ...) \ | ||
168 | + void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ | ||
169 | + uint32_t desc) \ | ||
170 | + { \ | ||
171 | + uint32_t vl = env->vl; \ | ||
172 | + uint32_t total_elems = vext_get_total_elems(env, desc, 4); \ | ||
173 | + uint32_t vta = vext_vta(desc); \ | ||
174 | + \ | ||
175 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \ | ||
176 | + AESState round_key; \ | ||
177 | + round_key.d[0] = *((uint64_t *)vs2 + H8(i * 2 + 0)); \ | ||
178 | + round_key.d[1] = *((uint64_t *)vs2 + H8(i * 2 + 1)); \ | ||
179 | + AESState round_state; \ | ||
180 | + round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0)); \ | ||
181 | + round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1)); \ | ||
182 | + __VA_ARGS__; \ | ||
183 | + *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0]; \ | ||
184 | + *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1]; \ | ||
185 | + } \ | ||
186 | + env->vstart = 0; \ | ||
187 | + /* set tail elements to 1s */ \ | ||
188 | + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); \ | ||
189 | + } | ||
190 | + | ||
191 | +#define GEN_ZVKNED_HELPER_VS(NAME, ...) \ | ||
192 | + void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ | ||
193 | + uint32_t desc) \ | ||
194 | + { \ | ||
195 | + uint32_t vl = env->vl; \ | ||
196 | + uint32_t total_elems = vext_get_total_elems(env, desc, 4); \ | ||
197 | + uint32_t vta = vext_vta(desc); \ | ||
198 | + \ | ||
199 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \ | ||
200 | + AESState round_key; \ | ||
201 | + round_key.d[0] = *((uint64_t *)vs2 + H8(0)); \ | ||
202 | + round_key.d[1] = *((uint64_t *)vs2 + H8(1)); \ | ||
203 | + AESState round_state; \ | ||
204 | + round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0)); \ | ||
205 | + round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1)); \ | ||
206 | + __VA_ARGS__; \ | ||
207 | + *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0]; \ | ||
208 | + *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1]; \ | ||
209 | + } \ | ||
210 | + env->vstart = 0; \ | ||
211 | + /* set tail elements to 1s */ \ | ||
212 | + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); \ | ||
213 | + } | ||
214 | + | ||
215 | +GEN_ZVKNED_HELPER_VV(vaesef_vv, aesenc_SB_SR_AK(&round_state, | ||
216 | + &round_state, | ||
217 | + &round_key, | ||
218 | + false);) | ||
219 | +GEN_ZVKNED_HELPER_VS(vaesef_vs, aesenc_SB_SR_AK(&round_state, | ||
220 | + &round_state, | ||
221 | + &round_key, | ||
222 | + false);) | ||
223 | +GEN_ZVKNED_HELPER_VV(vaesdf_vv, aesdec_ISB_ISR_AK(&round_state, | ||
224 | + &round_state, | ||
225 | + &round_key, | ||
226 | + false);) | ||
227 | +GEN_ZVKNED_HELPER_VS(vaesdf_vs, aesdec_ISB_ISR_AK(&round_state, | ||
228 | + &round_state, | ||
229 | + &round_key, | ||
230 | + false);) | ||
231 | +GEN_ZVKNED_HELPER_VV(vaesem_vv, aesenc_SB_SR_MC_AK(&round_state, | ||
232 | + &round_state, | ||
233 | + &round_key, | ||
234 | + false);) | ||
235 | +GEN_ZVKNED_HELPER_VS(vaesem_vs, aesenc_SB_SR_MC_AK(&round_state, | ||
236 | + &round_state, | ||
237 | + &round_key, | ||
238 | + false);) | ||
239 | +GEN_ZVKNED_HELPER_VV(vaesdm_vv, aesdec_ISB_ISR_AK_IMC(&round_state, | ||
240 | + &round_state, | ||
241 | + &round_key, | ||
242 | + false);) | ||
243 | +GEN_ZVKNED_HELPER_VS(vaesdm_vs, aesdec_ISB_ISR_AK_IMC(&round_state, | ||
244 | + &round_state, | ||
245 | + &round_key, | ||
246 | + false);) | ||
247 | +GEN_ZVKNED_HELPER_VS(vaesz_vs, xor_round_key(&round_state, &round_key);) | ||
248 | + | ||
249 | +void HELPER(vaeskf1_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, | ||
250 | + CPURISCVState *env, uint32_t desc) | ||
251 | +{ | ||
252 | + uint32_t *vd = vd_vptr; | ||
253 | + uint32_t *vs2 = vs2_vptr; | ||
254 | + uint32_t vl = env->vl; | ||
255 | + uint32_t total_elems = vext_get_total_elems(env, desc, 4); | ||
256 | + uint32_t vta = vext_vta(desc); | ||
257 | + | ||
258 | + uimm &= 0b1111; | ||
259 | + if (uimm > 10 || uimm == 0) { | ||
260 | + uimm ^= 0b1000; | ||
261 | + } | ||
262 | + | ||
263 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
264 | + uint32_t rk[8], tmp; | ||
265 | + static const uint32_t rcon[] = { | ||
266 | + 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, | ||
267 | + 0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036, | ||
268 | + }; | ||
269 | + | ||
270 | + rk[0] = vs2[i * 4 + H4(0)]; | ||
271 | + rk[1] = vs2[i * 4 + H4(1)]; | ||
272 | + rk[2] = vs2[i * 4 + H4(2)]; | ||
273 | + rk[3] = vs2[i * 4 + H4(3)]; | ||
274 | + tmp = ror32(rk[3], 8); | ||
275 | + | ||
276 | + rk[4] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) | | ||
277 | + ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) | | ||
278 | + ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) | | ||
279 | + ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0)) | ||
280 | + ^ rcon[uimm - 1]; | ||
281 | + rk[5] = rk[1] ^ rk[4]; | ||
282 | + rk[6] = rk[2] ^ rk[5]; | ||
283 | + rk[7] = rk[3] ^ rk[6]; | ||
284 | + | ||
285 | + vd[i * 4 + H4(0)] = rk[4]; | ||
286 | + vd[i * 4 + H4(1)] = rk[5]; | ||
287 | + vd[i * 4 + H4(2)] = rk[6]; | ||
288 | + vd[i * 4 + H4(3)] = rk[7]; | ||
289 | + } | ||
290 | + env->vstart = 0; | ||
291 | + /* set tail elements to 1s */ | ||
292 | + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); | ||
293 | +} | ||
294 | + | ||
295 | +void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, | ||
296 | + CPURISCVState *env, uint32_t desc) | ||
297 | +{ | ||
298 | + uint32_t *vd = vd_vptr; | ||
299 | + uint32_t *vs2 = vs2_vptr; | ||
300 | + uint32_t vl = env->vl; | ||
301 | + uint32_t total_elems = vext_get_total_elems(env, desc, 4); | ||
302 | + uint32_t vta = vext_vta(desc); | ||
303 | + | ||
304 | + uimm &= 0b1111; | ||
305 | + if (uimm > 14 || uimm < 2) { | ||
306 | + uimm ^= 0b1000; | ||
307 | + } | ||
308 | + | ||
309 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
310 | + uint32_t rk[12], tmp; | ||
311 | + static const uint32_t rcon[] = { | ||
312 | + 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, | ||
313 | + 0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036, | ||
314 | + }; | ||
315 | + | ||
316 | + rk[0] = vd[i * 4 + H4(0)]; | ||
317 | + rk[1] = vd[i * 4 + H4(1)]; | ||
318 | + rk[2] = vd[i * 4 + H4(2)]; | ||
319 | + rk[3] = vd[i * 4 + H4(3)]; | ||
320 | + rk[4] = vs2[i * 4 + H4(0)]; | ||
321 | + rk[5] = vs2[i * 4 + H4(1)]; | ||
322 | + rk[6] = vs2[i * 4 + H4(2)]; | ||
323 | + rk[7] = vs2[i * 4 + H4(3)]; | ||
324 | + | ||
325 | + if (uimm % 2 == 0) { | ||
326 | + tmp = ror32(rk[7], 8); | ||
327 | + rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) | | ||
328 | + ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) | | ||
329 | + ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) | | ||
330 | + ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0)) | ||
331 | + ^ rcon[(uimm - 1) / 2]; | ||
332 | + } else { | ||
333 | + rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(rk[7] >> 24) & 0xff] << 24) | | ||
334 | + ((uint32_t)AES_sbox[(rk[7] >> 16) & 0xff] << 16) | | ||
335 | + ((uint32_t)AES_sbox[(rk[7] >> 8) & 0xff] << 8) | | ||
336 | + ((uint32_t)AES_sbox[(rk[7] >> 0) & 0xff] << 0)); | ||
337 | + } | ||
338 | + rk[9] = rk[1] ^ rk[8]; | ||
339 | + rk[10] = rk[2] ^ rk[9]; | ||
340 | + rk[11] = rk[3] ^ rk[10]; | ||
341 | + | ||
342 | + vd[i * 4 + H4(0)] = rk[8]; | ||
343 | + vd[i * 4 + H4(1)] = rk[9]; | ||
344 | + vd[i * 4 + H4(2)] = rk[10]; | ||
345 | + vd[i * 4 + H4(3)] = rk[11]; | ||
346 | + } | ||
347 | + env->vstart = 0; | ||
348 | + /* set tail elements to 1s */ | ||
349 | + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); | ||
350 | +} | ||
351 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
352 | index XXXXXXX..XXXXXXX 100644 | ||
353 | --- a/target/riscv/insn_trans/trans_rvvk.c.inc | ||
354 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
355 | @@ -XXX,XX +XXX,XX @@ static bool vwsll_vx_check(DisasContext *s, arg_rmrr *a) | ||
356 | GEN_OPIVV_WIDEN_TRANS(vwsll_vv, vwsll_vv_check) | ||
357 | GEN_OPIVX_WIDEN_TRANS(vwsll_vx, vwsll_vx_check) | ||
358 | GEN_OPIVI_WIDEN_TRANS(vwsll_vi, IMM_ZX, vwsll_vx, vwsll_vx_check) | ||
359 | + | ||
360 | +/* | ||
361 | + * Zvkned | ||
362 | + */ | ||
363 | + | ||
364 | +#define ZVKNED_EGS 4 | ||
365 | + | ||
366 | +#define GEN_V_UNMASKED_TRANS(NAME, CHECK, EGS) \ | ||
367 | + static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ | ||
368 | + { \ | ||
369 | + if (CHECK(s, a)) { \ | ||
370 | + TCGv_ptr rd_v, rs2_v; \ | ||
371 | + TCGv_i32 desc, egs; \ | ||
372 | + uint32_t data = 0; \ | ||
373 | + TCGLabel *over = gen_new_label(); \ | ||
374 | + \ | ||
375 | + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ | ||
376 | + /* save opcode for unwinding in case we throw an exception */ \ | ||
377 | + decode_save_opc(s); \ | ||
378 | + egs = tcg_constant_i32(EGS); \ | ||
379 | + gen_helper_egs_check(egs, cpu_env); \ | ||
380 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
381 | + } \ | ||
382 | + \ | ||
383 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
384 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
385 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
386 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ | ||
387 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ | ||
388 | + rd_v = tcg_temp_new_ptr(); \ | ||
389 | + rs2_v = tcg_temp_new_ptr(); \ | ||
390 | + desc = tcg_constant_i32( \ | ||
391 | + simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \ | ||
392 | + tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd)); \ | ||
393 | + tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2)); \ | ||
394 | + gen_helper_##NAME(rd_v, rs2_v, cpu_env, desc); \ | ||
395 | + mark_vs_dirty(s); \ | ||
396 | + gen_set_label(over); \ | ||
397 | + return true; \ | ||
398 | + } \ | ||
399 | + return false; \ | ||
400 | + } | ||
401 | + | ||
402 | +static bool vaes_check_vv(DisasContext *s, arg_rmr *a) | ||
403 | +{ | ||
404 | + int egw_bytes = ZVKNED_EGS << s->sew; | ||
405 | + return s->cfg_ptr->ext_zvkned == true && | ||
406 | + require_rvv(s) && | ||
407 | + vext_check_isa_ill(s) && | ||
408 | + MAXSZ(s) >= egw_bytes && | ||
409 | + require_align(a->rd, s->lmul) && | ||
410 | + require_align(a->rs2, s->lmul) && | ||
411 | + s->sew == MO_32; | ||
412 | +} | ||
413 | + | ||
414 | +static bool vaes_check_overlap(DisasContext *s, int vd, int vs2) | ||
415 | +{ | ||
416 | + int8_t op_size = s->lmul <= 0 ? 1 : 1 << s->lmul; | ||
417 | + return !is_overlapped(vd, op_size, vs2, 1); | ||
418 | +} | ||
419 | + | ||
420 | +static bool vaes_check_vs(DisasContext *s, arg_rmr *a) | ||
421 | +{ | ||
422 | + int egw_bytes = ZVKNED_EGS << s->sew; | ||
423 | + return vaes_check_overlap(s, a->rd, a->rs2) && | ||
424 | + MAXSZ(s) >= egw_bytes && | ||
425 | + s->cfg_ptr->ext_zvkned == true && | ||
426 | + require_rvv(s) && | ||
427 | + vext_check_isa_ill(s) && | ||
428 | + require_align(a->rd, s->lmul) && | ||
429 | + s->sew == MO_32; | ||
430 | +} | ||
431 | + | ||
432 | +GEN_V_UNMASKED_TRANS(vaesef_vv, vaes_check_vv, ZVKNED_EGS) | ||
433 | +GEN_V_UNMASKED_TRANS(vaesef_vs, vaes_check_vs, ZVKNED_EGS) | ||
434 | +GEN_V_UNMASKED_TRANS(vaesdf_vv, vaes_check_vv, ZVKNED_EGS) | ||
435 | +GEN_V_UNMASKED_TRANS(vaesdf_vs, vaes_check_vs, ZVKNED_EGS) | ||
436 | +GEN_V_UNMASKED_TRANS(vaesdm_vv, vaes_check_vv, ZVKNED_EGS) | ||
437 | +GEN_V_UNMASKED_TRANS(vaesdm_vs, vaes_check_vs, ZVKNED_EGS) | ||
438 | +GEN_V_UNMASKED_TRANS(vaesz_vs, vaes_check_vs, ZVKNED_EGS) | ||
439 | +GEN_V_UNMASKED_TRANS(vaesem_vv, vaes_check_vv, ZVKNED_EGS) | ||
440 | +GEN_V_UNMASKED_TRANS(vaesem_vs, vaes_check_vs, ZVKNED_EGS) | ||
441 | + | ||
442 | +#define GEN_VI_UNMASKED_TRANS(NAME, CHECK, EGS) \ | ||
443 | + static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ | ||
444 | + { \ | ||
445 | + if (CHECK(s, a)) { \ | ||
446 | + TCGv_ptr rd_v, rs2_v; \ | ||
447 | + TCGv_i32 uimm_v, desc, egs; \ | ||
448 | + uint32_t data = 0; \ | ||
449 | + TCGLabel *over = gen_new_label(); \ | ||
450 | + \ | ||
451 | + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ | ||
452 | + /* save opcode for unwinding in case we throw an exception */ \ | ||
453 | + decode_save_opc(s); \ | ||
454 | + egs = tcg_constant_i32(EGS); \ | ||
455 | + gen_helper_egs_check(egs, cpu_env); \ | ||
456 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
457 | + } \ | ||
458 | + \ | ||
459 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
460 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
461 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
462 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ | ||
463 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ | ||
464 | + \ | ||
465 | + rd_v = tcg_temp_new_ptr(); \ | ||
466 | + rs2_v = tcg_temp_new_ptr(); \ | ||
467 | + uimm_v = tcg_constant_i32(a->rs1); \ | ||
468 | + desc = tcg_constant_i32( \ | ||
469 | + simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \ | ||
470 | + tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd)); \ | ||
471 | + tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2)); \ | ||
472 | + gen_helper_##NAME(rd_v, rs2_v, uimm_v, cpu_env, desc); \ | ||
473 | + mark_vs_dirty(s); \ | ||
474 | + gen_set_label(over); \ | ||
475 | + return true; \ | ||
476 | + } \ | ||
477 | + return false; \ | ||
478 | + } | ||
479 | + | ||
480 | +static bool vaeskf1_check(DisasContext *s, arg_vaeskf1_vi *a) | ||
481 | +{ | ||
482 | + int egw_bytes = ZVKNED_EGS << s->sew; | ||
483 | + return s->cfg_ptr->ext_zvkned == true && | ||
484 | + require_rvv(s) && | ||
485 | + vext_check_isa_ill(s) && | ||
486 | + MAXSZ(s) >= egw_bytes && | ||
487 | + s->sew == MO_32 && | ||
488 | + require_align(a->rd, s->lmul) && | ||
489 | + require_align(a->rs2, s->lmul); | ||
490 | +} | ||
491 | + | ||
492 | +static bool vaeskf2_check(DisasContext *s, arg_vaeskf2_vi *a) | ||
493 | +{ | ||
494 | + int egw_bytes = ZVKNED_EGS << s->sew; | ||
495 | + return s->cfg_ptr->ext_zvkned == true && | ||
496 | + require_rvv(s) && | ||
497 | + vext_check_isa_ill(s) && | ||
498 | + MAXSZ(s) >= egw_bytes && | ||
499 | + s->sew == MO_32 && | ||
500 | + require_align(a->rd, s->lmul) && | ||
501 | + require_align(a->rs2, s->lmul); | ||
502 | +} | ||
503 | + | ||
504 | +GEN_VI_UNMASKED_TRANS(vaeskf1_vi, vaeskf1_check, ZVKNED_EGS) | ||
505 | +GEN_VI_UNMASKED_TRANS(vaeskf2_vi, vaeskf2_check, ZVKNED_EGS) | ||
506 | -- | ||
507 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
1 | 2 | ||
3 | This commit adds support for the Zvknh vector-crypto extension, which | ||
4 | consists of the following instructions: | ||
5 | |||
6 | * vsha2ms.vv | ||
7 | * vsha2c[hl].vv | ||
8 | |||
9 | Translation functions are defined in | ||
10 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in | ||
11 | `target/riscv/vcrypto_helper.c`. | ||
12 | |||
13 | Co-authored-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
14 | Co-authored-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
15 | [max.chou@sifive.com: Replaced vstart checking by TCG op] | ||
16 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
17 | Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
18 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
19 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
20 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
21 | [max.chou@sifive.com: Exposed x-zvknha & x-zvknhb properties] | ||
22 | [max.chou@sifive.com: Replaced SEW selection to happened during | ||
23 | translation] | ||
24 | Message-ID: <20230711165917.2629866-11-max.chou@sifive.com> | ||
25 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
26 | --- | ||
27 | target/riscv/cpu_cfg.h | 2 + | ||
28 | target/riscv/helper.h | 6 + | ||
29 | target/riscv/insn32.decode | 5 + | ||
30 | target/riscv/cpu.c | 13 +- | ||
31 | target/riscv/vcrypto_helper.c | 238 +++++++++++++++++++++++ | ||
32 | target/riscv/insn_trans/trans_rvvk.c.inc | 129 ++++++++++++ | ||
33 | 6 files changed, 390 insertions(+), 3 deletions(-) | ||
34 | |||
35 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/target/riscv/cpu_cfg.h | ||
38 | +++ b/target/riscv/cpu_cfg.h | ||
39 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
40 | bool ext_zvbb; | ||
41 | bool ext_zvbc; | ||
42 | bool ext_zvkned; | ||
43 | + bool ext_zvknha; | ||
44 | + bool ext_zvknhb; | ||
45 | bool ext_zmmul; | ||
46 | bool ext_zvfbfmin; | ||
47 | bool ext_zvfbfwma; | ||
48 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/riscv/helper.h | ||
51 | +++ b/target/riscv/helper.h | ||
52 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vaesdm_vs, void, ptr, ptr, env, i32) | ||
53 | DEF_HELPER_4(vaesz_vs, void, ptr, ptr, env, i32) | ||
54 | DEF_HELPER_5(vaeskf1_vi, void, ptr, ptr, i32, env, i32) | ||
55 | DEF_HELPER_5(vaeskf2_vi, void, ptr, ptr, i32, env, i32) | ||
56 | + | ||
57 | +DEF_HELPER_5(vsha2ms_vv, void, ptr, ptr, ptr, env, i32) | ||
58 | +DEF_HELPER_5(vsha2ch32_vv, void, ptr, ptr, ptr, env, i32) | ||
59 | +DEF_HELPER_5(vsha2ch64_vv, void, ptr, ptr, ptr, env, i32) | ||
60 | +DEF_HELPER_5(vsha2cl32_vv, void, ptr, ptr, ptr, env, i32) | ||
61 | +DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32) | ||
62 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/riscv/insn32.decode | ||
65 | +++ b/target/riscv/insn32.decode | ||
66 | @@ -XXX,XX +XXX,XX @@ vaesdm_vs 101001 1 ..... 00000 010 ..... 1110111 @r2_vm_1 | ||
67 | vaesz_vs 101001 1 ..... 00111 010 ..... 1110111 @r2_vm_1 | ||
68 | vaeskf1_vi 100010 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
69 | vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
70 | + | ||
71 | +# *** Zvknh vector crypto extension *** | ||
72 | +vsha2ms_vv 101101 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
73 | +vsha2ch_vv 101110 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
74 | +vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
75 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/target/riscv/cpu.c | ||
78 | +++ b/target/riscv/cpu.c | ||
79 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { | ||
80 | ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh), | ||
81 | ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin), | ||
82 | ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), | ||
83 | + ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha), | ||
84 | + ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb), | ||
85 | ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), | ||
86 | ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), | ||
87 | ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia), | ||
88 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
89 | * In principle Zve*x would also suffice here, were they supported | ||
90 | * in qemu | ||
91 | */ | ||
92 | - if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned) && !cpu->cfg.ext_zve32f) { | ||
93 | + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha) && | ||
94 | + !cpu->cfg.ext_zve32f) { | ||
95 | error_setg(errp, | ||
96 | "Vector crypto extensions require V or Zve* extensions"); | ||
97 | return; | ||
98 | } | ||
99 | |||
100 | - if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) { | ||
101 | - error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions"); | ||
102 | + if ((cpu->cfg.ext_zvbc || cpu->cfg.ext_zvknhb) && !cpu->cfg.ext_zve64f) { | ||
103 | + error_setg( | ||
104 | + errp, | ||
105 | + "Zvbc and Zvknhb extensions require V or Zve64{f,d} extensions"); | ||
106 | return; | ||
107 | } | ||
108 | |||
109 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
110 | DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), | ||
111 | DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), | ||
112 | DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), | ||
113 | + DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), | ||
114 | + DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), | ||
115 | |||
116 | DEFINE_PROP_END_OF_LIST(), | ||
117 | }; | ||
118 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c | ||
119 | index XXXXXXX..XXXXXXX 100644 | ||
120 | --- a/target/riscv/vcrypto_helper.c | ||
121 | +++ b/target/riscv/vcrypto_helper.c | ||
122 | @@ -XXX,XX +XXX,XX @@ void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, | ||
123 | /* set tail elements to 1s */ | ||
124 | vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); | ||
125 | } | ||
126 | + | ||
127 | +static inline uint32_t sig0_sha256(uint32_t x) | ||
128 | +{ | ||
129 | + return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); | ||
130 | +} | ||
131 | + | ||
132 | +static inline uint32_t sig1_sha256(uint32_t x) | ||
133 | +{ | ||
134 | + return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); | ||
135 | +} | ||
136 | + | ||
137 | +static inline uint64_t sig0_sha512(uint64_t x) | ||
138 | +{ | ||
139 | + return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); | ||
140 | +} | ||
141 | + | ||
142 | +static inline uint64_t sig1_sha512(uint64_t x) | ||
143 | +{ | ||
144 | + return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); | ||
145 | +} | ||
146 | + | ||
147 | +static inline void vsha2ms_e32(uint32_t *vd, uint32_t *vs1, uint32_t *vs2) | ||
148 | +{ | ||
149 | + uint32_t res[4]; | ||
150 | + res[0] = sig1_sha256(vs1[H4(2)]) + vs2[H4(1)] + sig0_sha256(vd[H4(1)]) + | ||
151 | + vd[H4(0)]; | ||
152 | + res[1] = sig1_sha256(vs1[H4(3)]) + vs2[H4(2)] + sig0_sha256(vd[H4(2)]) + | ||
153 | + vd[H4(1)]; | ||
154 | + res[2] = | ||
155 | + sig1_sha256(res[0]) + vs2[H4(3)] + sig0_sha256(vd[H4(3)]) + vd[H4(2)]; | ||
156 | + res[3] = | ||
157 | + sig1_sha256(res[1]) + vs1[H4(0)] + sig0_sha256(vs2[H4(0)]) + vd[H4(3)]; | ||
158 | + vd[H4(3)] = res[3]; | ||
159 | + vd[H4(2)] = res[2]; | ||
160 | + vd[H4(1)] = res[1]; | ||
161 | + vd[H4(0)] = res[0]; | ||
162 | +} | ||
163 | + | ||
164 | +static inline void vsha2ms_e64(uint64_t *vd, uint64_t *vs1, uint64_t *vs2) | ||
165 | +{ | ||
166 | + uint64_t res[4]; | ||
167 | + res[0] = sig1_sha512(vs1[2]) + vs2[1] + sig0_sha512(vd[1]) + vd[0]; | ||
168 | + res[1] = sig1_sha512(vs1[3]) + vs2[2] + sig0_sha512(vd[2]) + vd[1]; | ||
169 | + res[2] = sig1_sha512(res[0]) + vs2[3] + sig0_sha512(vd[3]) + vd[2]; | ||
170 | + res[3] = sig1_sha512(res[1]) + vs1[0] + sig0_sha512(vs2[0]) + vd[3]; | ||
171 | + vd[3] = res[3]; | ||
172 | + vd[2] = res[2]; | ||
173 | + vd[1] = res[1]; | ||
174 | + vd[0] = res[0]; | ||
175 | +} | ||
176 | + | ||
177 | +void HELPER(vsha2ms_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, | ||
178 | + uint32_t desc) | ||
179 | +{ | ||
180 | + uint32_t sew = FIELD_EX64(env->vtype, VTYPE, VSEW); | ||
181 | + uint32_t esz = sew == MO_32 ? 4 : 8; | ||
182 | + uint32_t total_elems; | ||
183 | + uint32_t vta = vext_vta(desc); | ||
184 | + | ||
185 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
186 | + if (sew == MO_32) { | ||
187 | + vsha2ms_e32(((uint32_t *)vd) + i * 4, ((uint32_t *)vs1) + i * 4, | ||
188 | + ((uint32_t *)vs2) + i * 4); | ||
189 | + } else { | ||
190 | + /* If not 32 then SEW should be 64 */ | ||
191 | + vsha2ms_e64(((uint64_t *)vd) + i * 4, ((uint64_t *)vs1) + i * 4, | ||
192 | + ((uint64_t *)vs2) + i * 4); | ||
193 | + } | ||
194 | + } | ||
195 | + /* set tail elements to 1s */ | ||
196 | + total_elems = vext_get_total_elems(env, desc, esz); | ||
197 | + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); | ||
198 | + env->vstart = 0; | ||
199 | +} | ||
200 | + | ||
201 | +static inline uint64_t sum0_64(uint64_t x) | ||
202 | +{ | ||
203 | + return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); | ||
204 | +} | ||
205 | + | ||
206 | +static inline uint32_t sum0_32(uint32_t x) | ||
207 | +{ | ||
208 | + return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); | ||
209 | +} | ||
210 | + | ||
211 | +static inline uint64_t sum1_64(uint64_t x) | ||
212 | +{ | ||
213 | + return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); | ||
214 | +} | ||
215 | + | ||
216 | +static inline uint32_t sum1_32(uint32_t x) | ||
217 | +{ | ||
218 | + return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); | ||
219 | +} | ||
220 | + | ||
221 | +#define ch(x, y, z) ((x & y) ^ ((~x) & z)) | ||
222 | + | ||
223 | +#define maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) | ||
224 | + | ||
225 | +static void vsha2c_64(uint64_t *vs2, uint64_t *vd, uint64_t *vs1) | ||
226 | +{ | ||
227 | + uint64_t a = vs2[3], b = vs2[2], e = vs2[1], f = vs2[0]; | ||
228 | + uint64_t c = vd[3], d = vd[2], g = vd[1], h = vd[0]; | ||
229 | + uint64_t W0 = vs1[0], W1 = vs1[1]; | ||
230 | + uint64_t T1 = h + sum1_64(e) + ch(e, f, g) + W0; | ||
231 | + uint64_t T2 = sum0_64(a) + maj(a, b, c); | ||
232 | + | ||
233 | + h = g; | ||
234 | + g = f; | ||
235 | + f = e; | ||
236 | + e = d + T1; | ||
237 | + d = c; | ||
238 | + c = b; | ||
239 | + b = a; | ||
240 | + a = T1 + T2; | ||
241 | + | ||
242 | + T1 = h + sum1_64(e) + ch(e, f, g) + W1; | ||
243 | + T2 = sum0_64(a) + maj(a, b, c); | ||
244 | + h = g; | ||
245 | + g = f; | ||
246 | + f = e; | ||
247 | + e = d + T1; | ||
248 | + d = c; | ||
249 | + c = b; | ||
250 | + b = a; | ||
251 | + a = T1 + T2; | ||
252 | + | ||
253 | + vd[0] = f; | ||
254 | + vd[1] = e; | ||
255 | + vd[2] = b; | ||
256 | + vd[3] = a; | ||
257 | +} | ||
258 | + | ||
259 | +static void vsha2c_32(uint32_t *vs2, uint32_t *vd, uint32_t *vs1) | ||
260 | +{ | ||
261 | + uint32_t a = vs2[H4(3)], b = vs2[H4(2)], e = vs2[H4(1)], f = vs2[H4(0)]; | ||
262 | + uint32_t c = vd[H4(3)], d = vd[H4(2)], g = vd[H4(1)], h = vd[H4(0)]; | ||
263 | + uint32_t W0 = vs1[H4(0)], W1 = vs1[H4(1)]; | ||
264 | + uint32_t T1 = h + sum1_32(e) + ch(e, f, g) + W0; | ||
265 | + uint32_t T2 = sum0_32(a) + maj(a, b, c); | ||
266 | + | ||
267 | + h = g; | ||
268 | + g = f; | ||
269 | + f = e; | ||
270 | + e = d + T1; | ||
271 | + d = c; | ||
272 | + c = b; | ||
273 | + b = a; | ||
274 | + a = T1 + T2; | ||
275 | + | ||
276 | + T1 = h + sum1_32(e) + ch(e, f, g) + W1; | ||
277 | + T2 = sum0_32(a) + maj(a, b, c); | ||
278 | + h = g; | ||
279 | + g = f; | ||
280 | + f = e; | ||
281 | + e = d + T1; | ||
282 | + d = c; | ||
283 | + c = b; | ||
284 | + b = a; | ||
285 | + a = T1 + T2; | ||
286 | + | ||
287 | + vd[H4(0)] = f; | ||
288 | + vd[H4(1)] = e; | ||
289 | + vd[H4(2)] = b; | ||
290 | + vd[H4(3)] = a; | ||
291 | +} | ||
292 | + | ||
293 | +void HELPER(vsha2ch32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, | ||
294 | + uint32_t desc) | ||
295 | +{ | ||
296 | + const uint32_t esz = 4; | ||
297 | + uint32_t total_elems; | ||
298 | + uint32_t vta = vext_vta(desc); | ||
299 | + | ||
300 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
301 | + vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i, | ||
302 | + ((uint32_t *)vs1) + 4 * i + 2); | ||
303 | + } | ||
304 | + | ||
305 | + /* set tail elements to 1s */ | ||
306 | + total_elems = vext_get_total_elems(env, desc, esz); | ||
307 | + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); | ||
308 | + env->vstart = 0; | ||
309 | +} | ||
310 | + | ||
311 | +void HELPER(vsha2ch64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, | ||
312 | + uint32_t desc) | ||
313 | +{ | ||
314 | + const uint32_t esz = 8; | ||
315 | + uint32_t total_elems; | ||
316 | + uint32_t vta = vext_vta(desc); | ||
317 | + | ||
318 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
319 | + vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i, | ||
320 | + ((uint64_t *)vs1) + 4 * i + 2); | ||
321 | + } | ||
322 | + | ||
323 | + /* set tail elements to 1s */ | ||
324 | + total_elems = vext_get_total_elems(env, desc, esz); | ||
325 | + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); | ||
326 | + env->vstart = 0; | ||
327 | +} | ||
328 | + | ||
329 | +void HELPER(vsha2cl32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, | ||
330 | + uint32_t desc) | ||
331 | +{ | ||
332 | + const uint32_t esz = 4; | ||
333 | + uint32_t total_elems; | ||
334 | + uint32_t vta = vext_vta(desc); | ||
335 | + | ||
336 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
337 | + vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i, | ||
338 | + (((uint32_t *)vs1) + 4 * i)); | ||
339 | + } | ||
340 | + | ||
341 | + /* set tail elements to 1s */ | ||
342 | + total_elems = vext_get_total_elems(env, desc, esz); | ||
343 | + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); | ||
344 | + env->vstart = 0; | ||
345 | +} | ||
346 | + | ||
347 | +void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, | ||
348 | + uint32_t desc) | ||
349 | +{ | ||
350 | + uint32_t esz = 8; | ||
351 | + uint32_t total_elems; | ||
352 | + uint32_t vta = vext_vta(desc); | ||
353 | + | ||
354 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
355 | + vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i, | ||
356 | + (((uint64_t *)vs1) + 4 * i)); | ||
357 | + } | ||
358 | + | ||
359 | + /* set tail elements to 1s */ | ||
360 | + total_elems = vext_get_total_elems(env, desc, esz); | ||
361 | + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); | ||
362 | + env->vstart = 0; | ||
363 | +} | ||
364 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
365 | index XXXXXXX..XXXXXXX 100644 | ||
366 | --- a/target/riscv/insn_trans/trans_rvvk.c.inc | ||
367 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
368 | @@ -XXX,XX +XXX,XX @@ static bool vaeskf2_check(DisasContext *s, arg_vaeskf2_vi *a) | ||
369 | |||
370 | GEN_VI_UNMASKED_TRANS(vaeskf1_vi, vaeskf1_check, ZVKNED_EGS) | ||
371 | GEN_VI_UNMASKED_TRANS(vaeskf2_vi, vaeskf2_check, ZVKNED_EGS) | ||
372 | + | ||
373 | +/* | ||
374 | + * Zvknh | ||
375 | + */ | ||
376 | + | ||
377 | +#define ZVKNH_EGS 4 | ||
378 | + | ||
379 | +#define GEN_VV_UNMASKED_TRANS(NAME, CHECK, EGS) \ | ||
380 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
381 | + { \ | ||
382 | + if (CHECK(s, a)) { \ | ||
383 | + uint32_t data = 0; \ | ||
384 | + TCGLabel *over = gen_new_label(); \ | ||
385 | + TCGv_i32 egs; \ | ||
386 | + \ | ||
387 | + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ | ||
388 | + /* save opcode for unwinding in case we throw an exception */ \ | ||
389 | + decode_save_opc(s); \ | ||
390 | + egs = tcg_constant_i32(EGS); \ | ||
391 | + gen_helper_egs_check(egs, cpu_env); \ | ||
392 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
393 | + } \ | ||
394 | + \ | ||
395 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
396 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
397 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
398 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ | ||
399 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ | ||
400 | + \ | ||
401 | + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), \ | ||
402 | + vreg_ofs(s, a->rs2), cpu_env, \ | ||
403 | + s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, \ | ||
404 | + data, gen_helper_##NAME); \ | ||
405 | + \ | ||
406 | + mark_vs_dirty(s); \ | ||
407 | + gen_set_label(over); \ | ||
408 | + return true; \ | ||
409 | + } \ | ||
410 | + return false; \ | ||
411 | + } | ||
412 | + | ||
413 | +static bool vsha_check_sew(DisasContext *s) | ||
414 | +{ | ||
415 | + return (s->cfg_ptr->ext_zvknha == true && s->sew == MO_32) || | ||
416 | + (s->cfg_ptr->ext_zvknhb == true && | ||
417 | + (s->sew == MO_32 || s->sew == MO_64)); | ||
418 | +} | ||
419 | + | ||
420 | +static bool vsha_check(DisasContext *s, arg_rmrr *a) | ||
421 | +{ | ||
422 | + int egw_bytes = ZVKNH_EGS << s->sew; | ||
423 | + int mult = 1 << MAX(s->lmul, 0); | ||
424 | + return opivv_check(s, a) && | ||
425 | + vsha_check_sew(s) && | ||
426 | + MAXSZ(s) >= egw_bytes && | ||
427 | + !is_overlapped(a->rd, mult, a->rs1, mult) && | ||
428 | + !is_overlapped(a->rd, mult, a->rs2, mult) && | ||
429 | + s->lmul >= 0; | ||
430 | +} | ||
431 | + | ||
432 | +GEN_VV_UNMASKED_TRANS(vsha2ms_vv, vsha_check, ZVKNH_EGS) | ||
433 | + | ||
434 | +static bool trans_vsha2cl_vv(DisasContext *s, arg_rmrr *a) | ||
435 | +{ | ||
436 | + if (vsha_check(s, a)) { | ||
437 | + uint32_t data = 0; | ||
438 | + TCGLabel *over = gen_new_label(); | ||
439 | + TCGv_i32 egs; | ||
440 | + | ||
441 | + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { | ||
442 | + /* save opcode for unwinding in case we throw an exception */ | ||
443 | + decode_save_opc(s); | ||
444 | + egs = tcg_constant_i32(ZVKNH_EGS); | ||
445 | + gen_helper_egs_check(egs, cpu_env); | ||
446 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
447 | + } | ||
448 | + | ||
449 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
450 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
451 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
452 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); | ||
453 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); | ||
454 | + | ||
455 | + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), | ||
456 | + vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, | ||
457 | + s->cfg_ptr->vlen / 8, data, | ||
458 | + s->sew == MO_32 ? | ||
459 | + gen_helper_vsha2cl32_vv : gen_helper_vsha2cl64_vv); | ||
460 | + | ||
461 | + mark_vs_dirty(s); | ||
462 | + gen_set_label(over); | ||
463 | + return true; | ||
464 | + } | ||
465 | + return false; | ||
466 | +} | ||
467 | + | ||
468 | +static bool trans_vsha2ch_vv(DisasContext *s, arg_rmrr *a) | ||
469 | +{ | ||
470 | + if (vsha_check(s, a)) { | ||
471 | + uint32_t data = 0; | ||
472 | + TCGLabel *over = gen_new_label(); | ||
473 | + TCGv_i32 egs; | ||
474 | + | ||
475 | + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { | ||
476 | + /* save opcode for unwinding in case we throw an exception */ | ||
477 | + decode_save_opc(s); | ||
478 | + egs = tcg_constant_i32(ZVKNH_EGS); | ||
479 | + gen_helper_egs_check(egs, cpu_env); | ||
480 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
481 | + } | ||
482 | + | ||
483 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
484 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
485 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
486 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); | ||
487 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); | ||
488 | + | ||
489 | + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), | ||
490 | + vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, | ||
491 | + s->cfg_ptr->vlen / 8, data, | ||
492 | + s->sew == MO_32 ? | ||
493 | + gen_helper_vsha2ch32_vv : gen_helper_vsha2ch64_vv); | ||
494 | + | ||
495 | + mark_vs_dirty(s); | ||
496 | + gen_set_label(over); | ||
497 | + return true; | ||
498 | + } | ||
499 | + return false; | ||
500 | +} | ||
501 | -- | ||
502 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: Lawrence Hunter <lawrence.hunter@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: eop Chen <eop.chen@sifive.com> | 3 | This commit adds support for the Zvksh vector-crypto extension, which |
4 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | 4 | consists of the following instructions: |
5 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 5 | |
6 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | 6 | * vsm3me.vv |
7 | Message-Id: <165449614532.19704.7000832880482980398-11@git.sr.ht> | 7 | * vsm3c.vi |
8 | |||
9 | Translation functions are defined in | ||
10 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in | ||
11 | `target/riscv/vcrypto_helper.c`. | ||
12 | |||
13 | Co-authored-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
14 | [max.chou@sifive.com: Replaced vstart checking by TCG op] | ||
15 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
16 | Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
17 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
18 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
19 | [max.chou@sifive.com: Exposed x-zvksh property] | ||
20 | Message-ID: <20230711165917.2629866-12-max.chou@sifive.com> | ||
8 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 21 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
9 | --- | 22 | --- |
10 | target/riscv/vector_helper.c | 220 ++++++++++++++++++----------------- | 23 | target/riscv/cpu_cfg.h | 1 + |
11 | 1 file changed, 114 insertions(+), 106 deletions(-) | 24 | target/riscv/helper.h | 3 + |
12 | 25 | target/riscv/insn32.decode | 4 + | |
13 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 26 | target/riscv/cpu.c | 6 +- |
14 | index XXXXXXX..XXXXXXX 100644 | 27 | target/riscv/vcrypto_helper.c | 134 +++++++++++++++++++++++ |
15 | --- a/target/riscv/vector_helper.c | 28 | target/riscv/insn_trans/trans_rvvk.c.inc | 31 ++++++ |
16 | +++ b/target/riscv/vector_helper.c | 29 | 6 files changed, 177 insertions(+), 2 deletions(-) |
17 | @@ -XXX,XX +XXX,XX @@ static inline void | 30 | |
18 | vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, | 31 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h |
19 | CPURISCVState *env, | 32 | index XXXXXXX..XXXXXXX 100644 |
20 | uint32_t desc, | 33 | --- a/target/riscv/cpu_cfg.h |
21 | - opivv2_rm_fn *fn) | 34 | +++ b/target/riscv/cpu_cfg.h |
22 | + opivv2_rm_fn *fn, uint32_t esz) | 35 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { |
23 | { | 36 | bool ext_zvkned; |
24 | uint32_t vm = vext_vm(desc); | 37 | bool ext_zvknha; |
25 | uint32_t vl = env->vl; | 38 | bool ext_zvknhb; |
39 | + bool ext_zvksh; | ||
40 | bool ext_zmmul; | ||
41 | bool ext_zvfbfmin; | ||
42 | bool ext_zvfbfwma; | ||
43 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/target/riscv/helper.h | ||
46 | +++ b/target/riscv/helper.h | ||
47 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vsha2ch32_vv, void, ptr, ptr, ptr, env, i32) | ||
48 | DEF_HELPER_5(vsha2ch64_vv, void, ptr, ptr, ptr, env, i32) | ||
49 | DEF_HELPER_5(vsha2cl32_vv, void, ptr, ptr, ptr, env, i32) | ||
50 | DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32) | ||
51 | + | ||
52 | +DEF_HELPER_5(vsm3me_vv, void, ptr, ptr, ptr, env, i32) | ||
53 | +DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32) | ||
54 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/target/riscv/insn32.decode | ||
57 | +++ b/target/riscv/insn32.decode | ||
58 | @@ -XXX,XX +XXX,XX @@ vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
59 | vsha2ms_vv 101101 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
60 | vsha2ch_vv 101110 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
61 | vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
62 | + | ||
63 | +# *** Zvksh vector crypto extension *** | ||
64 | +vsm3me_vv 100000 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
65 | +vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
66 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/riscv/cpu.c | ||
69 | +++ b/target/riscv/cpu.c | ||
70 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { | ||
71 | ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), | ||
72 | ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha), | ||
73 | ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb), | ||
74 | + ISA_EXT_DATA_ENTRY(zvksh, PRIV_VERSION_1_12_0, ext_zvksh), | ||
75 | ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), | ||
76 | ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), | ||
77 | ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia), | ||
78 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
79 | * In principle Zve*x would also suffice here, were they supported | ||
80 | * in qemu | ||
81 | */ | ||
82 | - if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha) && | ||
83 | - !cpu->cfg.ext_zve32f) { | ||
84 | + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha || | ||
85 | + cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) { | ||
86 | error_setg(errp, | ||
87 | "Vector crypto extensions require V or Zve* extensions"); | ||
88 | return; | ||
89 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
90 | DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), | ||
91 | DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), | ||
92 | DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), | ||
93 | + DEFINE_PROP_BOOL("x-zvksh", RISCVCPU, cfg.ext_zvksh, false), | ||
94 | |||
95 | DEFINE_PROP_END_OF_LIST(), | ||
96 | }; | ||
97 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c | ||
98 | index XXXXXXX..XXXXXXX 100644 | ||
99 | --- a/target/riscv/vcrypto_helper.c | ||
100 | +++ b/target/riscv/vcrypto_helper.c | ||
101 | @@ -XXX,XX +XXX,XX @@ void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, | ||
102 | vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); | ||
103 | env->vstart = 0; | ||
104 | } | ||
105 | + | ||
106 | +static inline uint32_t p1(uint32_t x) | ||
107 | +{ | ||
108 | + return x ^ rol32(x, 15) ^ rol32(x, 23); | ||
109 | +} | ||
110 | + | ||
111 | +static inline uint32_t zvksh_w(uint32_t m16, uint32_t m9, uint32_t m3, | ||
112 | + uint32_t m13, uint32_t m6) | ||
113 | +{ | ||
114 | + return p1(m16 ^ m9 ^ rol32(m3, 15)) ^ rol32(m13, 7) ^ m6; | ||
115 | +} | ||
116 | + | ||
117 | +void HELPER(vsm3me_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr, | ||
118 | + CPURISCVState *env, uint32_t desc) | ||
119 | +{ | ||
120 | + uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW)); | ||
26 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | 121 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); |
27 | + uint32_t vta = vext_vta(desc); | 122 | + uint32_t vta = vext_vta(desc); |
28 | 123 | + uint32_t *vd = vd_vptr; | |
29 | switch (env->vxrm) { | 124 | + uint32_t *vs1 = vs1_vptr; |
30 | case 0: /* rnu */ | 125 | + uint32_t *vs2 = vs2_vptr; |
31 | @@ -XXX,XX +XXX,XX @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, | 126 | + |
32 | env, vl, vm, 3, fn); | 127 | + for (int i = env->vstart / 8; i < env->vl / 8; i++) { |
33 | break; | 128 | + uint32_t w[24]; |
34 | } | 129 | + for (int j = 0; j < 8; j++) { |
35 | + /* set tail elements to 1s */ | 130 | + w[j] = bswap32(vs1[H4((i * 8) + j)]); |
36 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); | 131 | + w[j + 8] = bswap32(vs2[H4((i * 8) + j)]); |
37 | } | 132 | + } |
38 | 133 | + for (int j = 0; j < 8; j++) { | |
39 | /* generate helpers for fixed point instructions with OPIVV format */ | 134 | + w[j + 16] = |
40 | -#define GEN_VEXT_VV_RM(NAME) \ | 135 | + zvksh_w(w[j], w[j + 7], w[j + 13], w[j + 3], w[j + 10]); |
41 | +#define GEN_VEXT_VV_RM(NAME, ESZ) \ | 136 | + } |
42 | void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | 137 | + for (int j = 0; j < 8; j++) { |
43 | CPURISCVState *env, uint32_t desc) \ | 138 | + vd[(i * 8) + j] = bswap32(w[H4(j + 16)]); |
44 | { \ | 139 | + } |
45 | vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ | 140 | + } |
46 | - do_##NAME); \ | 141 | + vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz); |
47 | + do_##NAME, ESZ); \ | 142 | + env->vstart = 0; |
48 | } | 143 | +} |
49 | 144 | + | |
50 | static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) | 145 | +static inline uint32_t ff1(uint32_t x, uint32_t y, uint32_t z) |
51 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) | 146 | +{ |
52 | RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) | 147 | + return x ^ y ^ z; |
53 | RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) | 148 | +} |
54 | RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) | 149 | + |
55 | -GEN_VEXT_VV_RM(vsaddu_vv_b) | 150 | +static inline uint32_t ff2(uint32_t x, uint32_t y, uint32_t z) |
56 | -GEN_VEXT_VV_RM(vsaddu_vv_h) | 151 | +{ |
57 | -GEN_VEXT_VV_RM(vsaddu_vv_w) | 152 | + return (x & y) | (x & z) | (y & z); |
58 | -GEN_VEXT_VV_RM(vsaddu_vv_d) | 153 | +} |
59 | +GEN_VEXT_VV_RM(vsaddu_vv_b, 1) | 154 | + |
60 | +GEN_VEXT_VV_RM(vsaddu_vv_h, 2) | 155 | +static inline uint32_t ff_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j) |
61 | +GEN_VEXT_VV_RM(vsaddu_vv_w, 4) | 156 | +{ |
62 | +GEN_VEXT_VV_RM(vsaddu_vv_d, 8) | 157 | + return (j <= 15) ? ff1(x, y, z) : ff2(x, y, z); |
63 | 158 | +} | |
64 | typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, | 159 | + |
65 | CPURISCVState *env, int vxrm); | 160 | +static inline uint32_t gg1(uint32_t x, uint32_t y, uint32_t z) |
66 | @@ -XXX,XX +XXX,XX @@ static inline void | 161 | +{ |
67 | vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, | 162 | + return x ^ y ^ z; |
68 | CPURISCVState *env, | 163 | +} |
69 | uint32_t desc, | 164 | + |
70 | - opivx2_rm_fn *fn) | 165 | +static inline uint32_t gg2(uint32_t x, uint32_t y, uint32_t z) |
71 | + opivx2_rm_fn *fn, uint32_t esz) | 166 | +{ |
72 | { | 167 | + return (x & y) | (~x & z); |
73 | uint32_t vm = vext_vm(desc); | 168 | +} |
74 | uint32_t vl = env->vl; | 169 | + |
170 | +static inline uint32_t gg_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j) | ||
171 | +{ | ||
172 | + return (j <= 15) ? gg1(x, y, z) : gg2(x, y, z); | ||
173 | +} | ||
174 | + | ||
175 | +static inline uint32_t t_j(uint32_t j) | ||
176 | +{ | ||
177 | + return (j <= 15) ? 0x79cc4519 : 0x7a879d8a; | ||
178 | +} | ||
179 | + | ||
180 | +static inline uint32_t p_0(uint32_t x) | ||
181 | +{ | ||
182 | + return x ^ rol32(x, 9) ^ rol32(x, 17); | ||
183 | +} | ||
184 | + | ||
185 | +static void sm3c(uint32_t *vd, uint32_t *vs1, uint32_t *vs2, uint32_t uimm) | ||
186 | +{ | ||
187 | + uint32_t x0, x1; | ||
188 | + uint32_t j; | ||
189 | + uint32_t ss1, ss2, tt1, tt2; | ||
190 | + x0 = vs2[0] ^ vs2[4]; | ||
191 | + x1 = vs2[1] ^ vs2[5]; | ||
192 | + j = 2 * uimm; | ||
193 | + ss1 = rol32(rol32(vs1[0], 12) + vs1[4] + rol32(t_j(j), j % 32), 7); | ||
194 | + ss2 = ss1 ^ rol32(vs1[0], 12); | ||
195 | + tt1 = ff_j(vs1[0], vs1[1], vs1[2], j) + vs1[3] + ss2 + x0; | ||
196 | + tt2 = gg_j(vs1[4], vs1[5], vs1[6], j) + vs1[7] + ss1 + vs2[0]; | ||
197 | + vs1[3] = vs1[2]; | ||
198 | + vd[3] = rol32(vs1[1], 9); | ||
199 | + vs1[1] = vs1[0]; | ||
200 | + vd[1] = tt1; | ||
201 | + vs1[7] = vs1[6]; | ||
202 | + vd[7] = rol32(vs1[5], 19); | ||
203 | + vs1[5] = vs1[4]; | ||
204 | + vd[5] = p_0(tt2); | ||
205 | + j = 2 * uimm + 1; | ||
206 | + ss1 = rol32(rol32(vd[1], 12) + vd[5] + rol32(t_j(j), j % 32), 7); | ||
207 | + ss2 = ss1 ^ rol32(vd[1], 12); | ||
208 | + tt1 = ff_j(vd[1], vs1[1], vd[3], j) + vs1[3] + ss2 + x1; | ||
209 | + tt2 = gg_j(vd[5], vs1[5], vd[7], j) + vs1[7] + ss1 + vs2[1]; | ||
210 | + vd[2] = rol32(vs1[1], 9); | ||
211 | + vd[0] = tt1; | ||
212 | + vd[6] = rol32(vs1[5], 19); | ||
213 | + vd[4] = p_0(tt2); | ||
214 | +} | ||
215 | + | ||
216 | +void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, | ||
217 | + CPURISCVState *env, uint32_t desc) | ||
218 | +{ | ||
219 | + uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW)); | ||
75 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | 220 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); |
76 | + uint32_t vta = vext_vta(desc); | 221 | + uint32_t vta = vext_vta(desc); |
77 | 222 | + uint32_t *vd = vd_vptr; | |
78 | switch (env->vxrm) { | 223 | + uint32_t *vs2 = vs2_vptr; |
79 | case 0: /* rnu */ | 224 | + uint32_t v1[8], v2[8], v3[8]; |
80 | @@ -XXX,XX +XXX,XX @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, | 225 | + |
81 | env, vl, vm, 3, fn); | 226 | + for (int i = env->vstart / 8; i < env->vl / 8; i++) { |
82 | break; | 227 | + for (int k = 0; k < 8; k++) { |
228 | + v2[k] = bswap32(vd[H4(i * 8 + k)]); | ||
229 | + v3[k] = bswap32(vs2[H4(i * 8 + k)]); | ||
230 | + } | ||
231 | + sm3c(v1, v2, v3, uimm); | ||
232 | + for (int k = 0; k < 8; k++) { | ||
233 | + vd[i * 8 + k] = bswap32(v1[H4(k)]); | ||
234 | + } | ||
235 | + } | ||
236 | + vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz); | ||
237 | + env->vstart = 0; | ||
238 | +} | ||
239 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
240 | index XXXXXXX..XXXXXXX 100644 | ||
241 | --- a/target/riscv/insn_trans/trans_rvvk.c.inc | ||
242 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
243 | @@ -XXX,XX +XXX,XX @@ static bool trans_vsha2ch_vv(DisasContext *s, arg_rmrr *a) | ||
83 | } | 244 | } |
84 | + /* set tail elements to 1s */ | 245 | return false; |
85 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); | ||
86 | } | 246 | } |
87 | 247 | + | |
88 | /* generate helpers for fixed point instructions with OPIVX format */ | 248 | +/* |
89 | -#define GEN_VEXT_VX_RM(NAME) \ | 249 | + * Zvksh |
90 | +#define GEN_VEXT_VX_RM(NAME, ESZ) \ | 250 | + */ |
91 | void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | 251 | + |
92 | void *vs2, CPURISCVState *env, uint32_t desc) \ | 252 | +#define ZVKSH_EGS 8 |
93 | { \ | 253 | + |
94 | vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ | 254 | +static inline bool vsm3_check(DisasContext *s, arg_rmrr *a) |
95 | - do_##NAME); \ | 255 | +{ |
96 | + do_##NAME, ESZ); \ | 256 | + int egw_bytes = ZVKSH_EGS << s->sew; |
97 | } | 257 | + int mult = 1 << MAX(s->lmul, 0); |
98 | 258 | + return s->cfg_ptr->ext_zvksh == true && | |
99 | RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) | 259 | + require_rvv(s) && |
100 | RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) | 260 | + vext_check_isa_ill(s) && |
101 | RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) | 261 | + !is_overlapped(a->rd, mult, a->rs2, mult) && |
102 | RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) | 262 | + MAXSZ(s) >= egw_bytes && |
103 | -GEN_VEXT_VX_RM(vsaddu_vx_b) | 263 | + s->sew == MO_32; |
104 | -GEN_VEXT_VX_RM(vsaddu_vx_h) | 264 | +} |
105 | -GEN_VEXT_VX_RM(vsaddu_vx_w) | 265 | + |
106 | -GEN_VEXT_VX_RM(vsaddu_vx_d) | 266 | +static inline bool vsm3me_check(DisasContext *s, arg_rmrr *a) |
107 | +GEN_VEXT_VX_RM(vsaddu_vx_b, 1) | 267 | +{ |
108 | +GEN_VEXT_VX_RM(vsaddu_vx_h, 2) | 268 | + return vsm3_check(s, a) && vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm); |
109 | +GEN_VEXT_VX_RM(vsaddu_vx_w, 4) | 269 | +} |
110 | +GEN_VEXT_VX_RM(vsaddu_vx_d, 8) | 270 | + |
111 | 271 | +static inline bool vsm3c_check(DisasContext *s, arg_rmrr *a) | |
112 | static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | 272 | +{ |
113 | { | 273 | + return vsm3_check(s, a) && vext_check_ss(s, a->rd, a->rs2, a->vm); |
114 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) | 274 | +} |
115 | RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) | 275 | + |
116 | RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) | 276 | +GEN_VV_UNMASKED_TRANS(vsm3me_vv, vsm3me_check, ZVKSH_EGS) |
117 | RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) | 277 | +GEN_VI_UNMASKED_TRANS(vsm3c_vi, vsm3c_check, ZVKSH_EGS) |
118 | -GEN_VEXT_VV_RM(vsadd_vv_b) | ||
119 | -GEN_VEXT_VV_RM(vsadd_vv_h) | ||
120 | -GEN_VEXT_VV_RM(vsadd_vv_w) | ||
121 | -GEN_VEXT_VV_RM(vsadd_vv_d) | ||
122 | +GEN_VEXT_VV_RM(vsadd_vv_b, 1) | ||
123 | +GEN_VEXT_VV_RM(vsadd_vv_h, 2) | ||
124 | +GEN_VEXT_VV_RM(vsadd_vv_w, 4) | ||
125 | +GEN_VEXT_VV_RM(vsadd_vv_d, 8) | ||
126 | |||
127 | RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) | ||
128 | RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) | ||
129 | RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) | ||
130 | RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) | ||
131 | -GEN_VEXT_VX_RM(vsadd_vx_b) | ||
132 | -GEN_VEXT_VX_RM(vsadd_vx_h) | ||
133 | -GEN_VEXT_VX_RM(vsadd_vx_w) | ||
134 | -GEN_VEXT_VX_RM(vsadd_vx_d) | ||
135 | +GEN_VEXT_VX_RM(vsadd_vx_b, 1) | ||
136 | +GEN_VEXT_VX_RM(vsadd_vx_h, 2) | ||
137 | +GEN_VEXT_VX_RM(vsadd_vx_w, 4) | ||
138 | +GEN_VEXT_VX_RM(vsadd_vx_d, 8) | ||
139 | |||
140 | static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) | ||
141 | { | ||
142 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) | ||
143 | RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) | ||
144 | RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) | ||
145 | RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) | ||
146 | -GEN_VEXT_VV_RM(vssubu_vv_b) | ||
147 | -GEN_VEXT_VV_RM(vssubu_vv_h) | ||
148 | -GEN_VEXT_VV_RM(vssubu_vv_w) | ||
149 | -GEN_VEXT_VV_RM(vssubu_vv_d) | ||
150 | +GEN_VEXT_VV_RM(vssubu_vv_b, 1) | ||
151 | +GEN_VEXT_VV_RM(vssubu_vv_h, 2) | ||
152 | +GEN_VEXT_VV_RM(vssubu_vv_w, 4) | ||
153 | +GEN_VEXT_VV_RM(vssubu_vv_d, 8) | ||
154 | |||
155 | RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) | ||
156 | RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) | ||
157 | RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) | ||
158 | RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) | ||
159 | -GEN_VEXT_VX_RM(vssubu_vx_b) | ||
160 | -GEN_VEXT_VX_RM(vssubu_vx_h) | ||
161 | -GEN_VEXT_VX_RM(vssubu_vx_w) | ||
162 | -GEN_VEXT_VX_RM(vssubu_vx_d) | ||
163 | +GEN_VEXT_VX_RM(vssubu_vx_b, 1) | ||
164 | +GEN_VEXT_VX_RM(vssubu_vx_h, 2) | ||
165 | +GEN_VEXT_VX_RM(vssubu_vx_w, 4) | ||
166 | +GEN_VEXT_VX_RM(vssubu_vx_d, 8) | ||
167 | |||
168 | static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
169 | { | ||
170 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) | ||
171 | RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) | ||
172 | RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) | ||
173 | RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) | ||
174 | -GEN_VEXT_VV_RM(vssub_vv_b) | ||
175 | -GEN_VEXT_VV_RM(vssub_vv_h) | ||
176 | -GEN_VEXT_VV_RM(vssub_vv_w) | ||
177 | -GEN_VEXT_VV_RM(vssub_vv_d) | ||
178 | +GEN_VEXT_VV_RM(vssub_vv_b, 1) | ||
179 | +GEN_VEXT_VV_RM(vssub_vv_h, 2) | ||
180 | +GEN_VEXT_VV_RM(vssub_vv_w, 4) | ||
181 | +GEN_VEXT_VV_RM(vssub_vv_d, 8) | ||
182 | |||
183 | RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) | ||
184 | RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) | ||
185 | RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) | ||
186 | RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) | ||
187 | -GEN_VEXT_VX_RM(vssub_vx_b) | ||
188 | -GEN_VEXT_VX_RM(vssub_vx_h) | ||
189 | -GEN_VEXT_VX_RM(vssub_vx_w) | ||
190 | -GEN_VEXT_VX_RM(vssub_vx_d) | ||
191 | +GEN_VEXT_VX_RM(vssub_vx_b, 1) | ||
192 | +GEN_VEXT_VX_RM(vssub_vx_h, 2) | ||
193 | +GEN_VEXT_VX_RM(vssub_vx_w, 4) | ||
194 | +GEN_VEXT_VX_RM(vssub_vx_d, 8) | ||
195 | |||
196 | /* Vector Single-Width Averaging Add and Subtract */ | ||
197 | static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) | ||
198 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) | ||
199 | RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) | ||
200 | RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) | ||
201 | RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) | ||
202 | -GEN_VEXT_VV_RM(vaadd_vv_b) | ||
203 | -GEN_VEXT_VV_RM(vaadd_vv_h) | ||
204 | -GEN_VEXT_VV_RM(vaadd_vv_w) | ||
205 | -GEN_VEXT_VV_RM(vaadd_vv_d) | ||
206 | +GEN_VEXT_VV_RM(vaadd_vv_b, 1) | ||
207 | +GEN_VEXT_VV_RM(vaadd_vv_h, 2) | ||
208 | +GEN_VEXT_VV_RM(vaadd_vv_w, 4) | ||
209 | +GEN_VEXT_VV_RM(vaadd_vv_d, 8) | ||
210 | |||
211 | RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) | ||
212 | RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) | ||
213 | RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) | ||
214 | RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) | ||
215 | -GEN_VEXT_VX_RM(vaadd_vx_b) | ||
216 | -GEN_VEXT_VX_RM(vaadd_vx_h) | ||
217 | -GEN_VEXT_VX_RM(vaadd_vx_w) | ||
218 | -GEN_VEXT_VX_RM(vaadd_vx_d) | ||
219 | +GEN_VEXT_VX_RM(vaadd_vx_b, 1) | ||
220 | +GEN_VEXT_VX_RM(vaadd_vx_h, 2) | ||
221 | +GEN_VEXT_VX_RM(vaadd_vx_w, 4) | ||
222 | +GEN_VEXT_VX_RM(vaadd_vx_d, 8) | ||
223 | |||
224 | static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, | ||
225 | uint32_t a, uint32_t b) | ||
226 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) | ||
227 | RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) | ||
228 | RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) | ||
229 | RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) | ||
230 | -GEN_VEXT_VV_RM(vaaddu_vv_b) | ||
231 | -GEN_VEXT_VV_RM(vaaddu_vv_h) | ||
232 | -GEN_VEXT_VV_RM(vaaddu_vv_w) | ||
233 | -GEN_VEXT_VV_RM(vaaddu_vv_d) | ||
234 | +GEN_VEXT_VV_RM(vaaddu_vv_b, 1) | ||
235 | +GEN_VEXT_VV_RM(vaaddu_vv_h, 2) | ||
236 | +GEN_VEXT_VV_RM(vaaddu_vv_w, 4) | ||
237 | +GEN_VEXT_VV_RM(vaaddu_vv_d, 8) | ||
238 | |||
239 | RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) | ||
240 | RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) | ||
241 | RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) | ||
242 | RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) | ||
243 | -GEN_VEXT_VX_RM(vaaddu_vx_b) | ||
244 | -GEN_VEXT_VX_RM(vaaddu_vx_h) | ||
245 | -GEN_VEXT_VX_RM(vaaddu_vx_w) | ||
246 | -GEN_VEXT_VX_RM(vaaddu_vx_d) | ||
247 | +GEN_VEXT_VX_RM(vaaddu_vx_b, 1) | ||
248 | +GEN_VEXT_VX_RM(vaaddu_vx_h, 2) | ||
249 | +GEN_VEXT_VX_RM(vaaddu_vx_w, 4) | ||
250 | +GEN_VEXT_VX_RM(vaaddu_vx_d, 8) | ||
251 | |||
252 | static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) | ||
253 | { | ||
254 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) | ||
255 | RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) | ||
256 | RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) | ||
257 | RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) | ||
258 | -GEN_VEXT_VV_RM(vasub_vv_b) | ||
259 | -GEN_VEXT_VV_RM(vasub_vv_h) | ||
260 | -GEN_VEXT_VV_RM(vasub_vv_w) | ||
261 | -GEN_VEXT_VV_RM(vasub_vv_d) | ||
262 | +GEN_VEXT_VV_RM(vasub_vv_b, 1) | ||
263 | +GEN_VEXT_VV_RM(vasub_vv_h, 2) | ||
264 | +GEN_VEXT_VV_RM(vasub_vv_w, 4) | ||
265 | +GEN_VEXT_VV_RM(vasub_vv_d, 8) | ||
266 | |||
267 | RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) | ||
268 | RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) | ||
269 | RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) | ||
270 | RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) | ||
271 | -GEN_VEXT_VX_RM(vasub_vx_b) | ||
272 | -GEN_VEXT_VX_RM(vasub_vx_h) | ||
273 | -GEN_VEXT_VX_RM(vasub_vx_w) | ||
274 | -GEN_VEXT_VX_RM(vasub_vx_d) | ||
275 | +GEN_VEXT_VX_RM(vasub_vx_b, 1) | ||
276 | +GEN_VEXT_VX_RM(vasub_vx_h, 2) | ||
277 | +GEN_VEXT_VX_RM(vasub_vx_w, 4) | ||
278 | +GEN_VEXT_VX_RM(vasub_vx_d, 8) | ||
279 | |||
280 | static inline uint32_t asubu32(CPURISCVState *env, int vxrm, | ||
281 | uint32_t a, uint32_t b) | ||
282 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) | ||
283 | RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) | ||
284 | RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) | ||
285 | RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) | ||
286 | -GEN_VEXT_VV_RM(vasubu_vv_b) | ||
287 | -GEN_VEXT_VV_RM(vasubu_vv_h) | ||
288 | -GEN_VEXT_VV_RM(vasubu_vv_w) | ||
289 | -GEN_VEXT_VV_RM(vasubu_vv_d) | ||
290 | +GEN_VEXT_VV_RM(vasubu_vv_b, 1) | ||
291 | +GEN_VEXT_VV_RM(vasubu_vv_h, 2) | ||
292 | +GEN_VEXT_VV_RM(vasubu_vv_w, 4) | ||
293 | +GEN_VEXT_VV_RM(vasubu_vv_d, 8) | ||
294 | |||
295 | RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) | ||
296 | RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) | ||
297 | RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) | ||
298 | RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) | ||
299 | -GEN_VEXT_VX_RM(vasubu_vx_b) | ||
300 | -GEN_VEXT_VX_RM(vasubu_vx_h) | ||
301 | -GEN_VEXT_VX_RM(vasubu_vx_w) | ||
302 | -GEN_VEXT_VX_RM(vasubu_vx_d) | ||
303 | +GEN_VEXT_VX_RM(vasubu_vx_b, 1) | ||
304 | +GEN_VEXT_VX_RM(vasubu_vx_h, 2) | ||
305 | +GEN_VEXT_VX_RM(vasubu_vx_w, 4) | ||
306 | +GEN_VEXT_VX_RM(vasubu_vx_d, 8) | ||
307 | |||
308 | /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ | ||
309 | static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
310 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) | ||
311 | RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) | ||
312 | RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) | ||
313 | RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) | ||
314 | -GEN_VEXT_VV_RM(vsmul_vv_b) | ||
315 | -GEN_VEXT_VV_RM(vsmul_vv_h) | ||
316 | -GEN_VEXT_VV_RM(vsmul_vv_w) | ||
317 | -GEN_VEXT_VV_RM(vsmul_vv_d) | ||
318 | +GEN_VEXT_VV_RM(vsmul_vv_b, 1) | ||
319 | +GEN_VEXT_VV_RM(vsmul_vv_h, 2) | ||
320 | +GEN_VEXT_VV_RM(vsmul_vv_w, 4) | ||
321 | +GEN_VEXT_VV_RM(vsmul_vv_d, 8) | ||
322 | |||
323 | RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) | ||
324 | RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) | ||
325 | RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) | ||
326 | RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) | ||
327 | -GEN_VEXT_VX_RM(vsmul_vx_b) | ||
328 | -GEN_VEXT_VX_RM(vsmul_vx_h) | ||
329 | -GEN_VEXT_VX_RM(vsmul_vx_w) | ||
330 | -GEN_VEXT_VX_RM(vsmul_vx_d) | ||
331 | +GEN_VEXT_VX_RM(vsmul_vx_b, 1) | ||
332 | +GEN_VEXT_VX_RM(vsmul_vx_h, 2) | ||
333 | +GEN_VEXT_VX_RM(vsmul_vx_w, 4) | ||
334 | +GEN_VEXT_VX_RM(vsmul_vx_d, 8) | ||
335 | |||
336 | /* Vector Single-Width Scaling Shift Instructions */ | ||
337 | static inline uint8_t | ||
338 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) | ||
339 | RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) | ||
340 | RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) | ||
341 | RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) | ||
342 | -GEN_VEXT_VV_RM(vssrl_vv_b) | ||
343 | -GEN_VEXT_VV_RM(vssrl_vv_h) | ||
344 | -GEN_VEXT_VV_RM(vssrl_vv_w) | ||
345 | -GEN_VEXT_VV_RM(vssrl_vv_d) | ||
346 | +GEN_VEXT_VV_RM(vssrl_vv_b, 1) | ||
347 | +GEN_VEXT_VV_RM(vssrl_vv_h, 2) | ||
348 | +GEN_VEXT_VV_RM(vssrl_vv_w, 4) | ||
349 | +GEN_VEXT_VV_RM(vssrl_vv_d, 8) | ||
350 | |||
351 | RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) | ||
352 | RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) | ||
353 | RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) | ||
354 | RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) | ||
355 | -GEN_VEXT_VX_RM(vssrl_vx_b) | ||
356 | -GEN_VEXT_VX_RM(vssrl_vx_h) | ||
357 | -GEN_VEXT_VX_RM(vssrl_vx_w) | ||
358 | -GEN_VEXT_VX_RM(vssrl_vx_d) | ||
359 | +GEN_VEXT_VX_RM(vssrl_vx_b, 1) | ||
360 | +GEN_VEXT_VX_RM(vssrl_vx_h, 2) | ||
361 | +GEN_VEXT_VX_RM(vssrl_vx_w, 4) | ||
362 | +GEN_VEXT_VX_RM(vssrl_vx_d, 8) | ||
363 | |||
364 | static inline int8_t | ||
365 | vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
366 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) | ||
367 | RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) | ||
368 | RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) | ||
369 | RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) | ||
370 | -GEN_VEXT_VV_RM(vssra_vv_b) | ||
371 | -GEN_VEXT_VV_RM(vssra_vv_h) | ||
372 | -GEN_VEXT_VV_RM(vssra_vv_w) | ||
373 | -GEN_VEXT_VV_RM(vssra_vv_d) | ||
374 | +GEN_VEXT_VV_RM(vssra_vv_b, 1) | ||
375 | +GEN_VEXT_VV_RM(vssra_vv_h, 2) | ||
376 | +GEN_VEXT_VV_RM(vssra_vv_w, 4) | ||
377 | +GEN_VEXT_VV_RM(vssra_vv_d, 8) | ||
378 | |||
379 | RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) | ||
380 | RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) | ||
381 | RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) | ||
382 | RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) | ||
383 | -GEN_VEXT_VX_RM(vssra_vx_b) | ||
384 | -GEN_VEXT_VX_RM(vssra_vx_h) | ||
385 | -GEN_VEXT_VX_RM(vssra_vx_w) | ||
386 | -GEN_VEXT_VX_RM(vssra_vx_d) | ||
387 | +GEN_VEXT_VX_RM(vssra_vx_b, 1) | ||
388 | +GEN_VEXT_VX_RM(vssra_vx_h, 2) | ||
389 | +GEN_VEXT_VX_RM(vssra_vx_w, 4) | ||
390 | +GEN_VEXT_VX_RM(vssra_vx_d, 8) | ||
391 | |||
392 | /* Vector Narrowing Fixed-Point Clip Instructions */ | ||
393 | static inline int8_t | ||
394 | @@ -XXX,XX +XXX,XX @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) | ||
395 | RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) | ||
396 | RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) | ||
397 | RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) | ||
398 | -GEN_VEXT_VV_RM(vnclip_wv_b) | ||
399 | -GEN_VEXT_VV_RM(vnclip_wv_h) | ||
400 | -GEN_VEXT_VV_RM(vnclip_wv_w) | ||
401 | +GEN_VEXT_VV_RM(vnclip_wv_b, 1) | ||
402 | +GEN_VEXT_VV_RM(vnclip_wv_h, 2) | ||
403 | +GEN_VEXT_VV_RM(vnclip_wv_w, 4) | ||
404 | |||
405 | RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) | ||
406 | RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) | ||
407 | RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) | ||
408 | -GEN_VEXT_VX_RM(vnclip_wx_b) | ||
409 | -GEN_VEXT_VX_RM(vnclip_wx_h) | ||
410 | -GEN_VEXT_VX_RM(vnclip_wx_w) | ||
411 | +GEN_VEXT_VX_RM(vnclip_wx_b, 1) | ||
412 | +GEN_VEXT_VX_RM(vnclip_wx_h, 2) | ||
413 | +GEN_VEXT_VX_RM(vnclip_wx_w, 4) | ||
414 | |||
415 | static inline uint8_t | ||
416 | vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) | ||
417 | @@ -XXX,XX +XXX,XX @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) | ||
418 | RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) | ||
419 | RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) | ||
420 | RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) | ||
421 | -GEN_VEXT_VV_RM(vnclipu_wv_b) | ||
422 | -GEN_VEXT_VV_RM(vnclipu_wv_h) | ||
423 | -GEN_VEXT_VV_RM(vnclipu_wv_w) | ||
424 | +GEN_VEXT_VV_RM(vnclipu_wv_b, 1) | ||
425 | +GEN_VEXT_VV_RM(vnclipu_wv_h, 2) | ||
426 | +GEN_VEXT_VV_RM(vnclipu_wv_w, 4) | ||
427 | |||
428 | RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) | ||
429 | RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) | ||
430 | RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) | ||
431 | -GEN_VEXT_VX_RM(vnclipu_wx_b) | ||
432 | -GEN_VEXT_VX_RM(vnclipu_wx_h) | ||
433 | -GEN_VEXT_VX_RM(vnclipu_wx_w) | ||
434 | +GEN_VEXT_VX_RM(vnclipu_wx_b, 1) | ||
435 | +GEN_VEXT_VX_RM(vnclipu_wx_h, 2) | ||
436 | +GEN_VEXT_VX_RM(vnclipu_wx_w, 4) | ||
437 | |||
438 | /* | ||
439 | *** Vector Float Point Arithmetic Instructions | ||
440 | -- | 278 | -- |
441 | 2.36.1 | 279 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | |
2 | |||
3 | This commit adds support for the Zvkg vector-crypto extension, which | ||
4 | consists of the following instructions: | ||
5 | |||
6 | * vgmul.vv | ||
7 | * vghsh.vv | ||
8 | |||
9 | Translation functions are defined in | ||
10 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in | ||
11 | `target/riscv/vcrypto_helper.c`. | ||
12 | |||
13 | Co-authored-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
14 | [max.chou@sifive.com: Replaced vstart checking by TCG op] | ||
15 | Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
16 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
17 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
18 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
19 | [max.chou@sifive.com: Exposed x-zvkg property] | ||
20 | [max.chou@sifive.com: Replaced uint by int for cross win32 build] | ||
21 | Message-ID: <20230711165917.2629866-13-max.chou@sifive.com> | ||
22 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
23 | --- | ||
24 | target/riscv/cpu_cfg.h | 1 + | ||
25 | target/riscv/helper.h | 3 + | ||
26 | target/riscv/insn32.decode | 4 ++ | ||
27 | target/riscv/cpu.c | 6 +- | ||
28 | target/riscv/vcrypto_helper.c | 72 ++++++++++++++++++++++++ | ||
29 | target/riscv/insn_trans/trans_rvvk.c.inc | 30 ++++++++++ | ||
30 | 6 files changed, 114 insertions(+), 2 deletions(-) | ||
31 | |||
32 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/riscv/cpu_cfg.h | ||
35 | +++ b/target/riscv/cpu_cfg.h | ||
36 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
37 | bool ext_zve64d; | ||
38 | bool ext_zvbb; | ||
39 | bool ext_zvbc; | ||
40 | + bool ext_zvkg; | ||
41 | bool ext_zvkned; | ||
42 | bool ext_zvknha; | ||
43 | bool ext_zvknhb; | ||
44 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/target/riscv/helper.h | ||
47 | +++ b/target/riscv/helper.h | ||
48 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32) | ||
49 | |||
50 | DEF_HELPER_5(vsm3me_vv, void, ptr, ptr, ptr, env, i32) | ||
51 | DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32) | ||
52 | + | ||
53 | +DEF_HELPER_5(vghsh_vv, void, ptr, ptr, ptr, env, i32) | ||
54 | +DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32) | ||
55 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/riscv/insn32.decode | ||
58 | +++ b/target/riscv/insn32.decode | ||
59 | @@ -XXX,XX +XXX,XX @@ vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
60 | # *** Zvksh vector crypto extension *** | ||
61 | vsm3me_vv 100000 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
62 | vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
63 | + | ||
64 | +# *** Zvkg vector crypto extension *** | ||
65 | +vghsh_vv 101100 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
66 | +vgmul_vv 101000 1 ..... 10001 010 ..... 1110111 @r2_vm_1 | ||
67 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/target/riscv/cpu.c | ||
70 | +++ b/target/riscv/cpu.c | ||
71 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { | ||
72 | ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma), | ||
73 | ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh), | ||
74 | ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin), | ||
75 | + ISA_EXT_DATA_ENTRY(zvkg, PRIV_VERSION_1_12_0, ext_zvkg), | ||
76 | ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), | ||
77 | ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha), | ||
78 | ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb), | ||
79 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
80 | * In principle Zve*x would also suffice here, were they supported | ||
81 | * in qemu | ||
82 | */ | ||
83 | - if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha || | ||
84 | - cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) { | ||
85 | + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned || | ||
86 | + cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) { | ||
87 | error_setg(errp, | ||
88 | "Vector crypto extensions require V or Zve* extensions"); | ||
89 | return; | ||
90 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
91 | /* Vector cryptography extensions */ | ||
92 | DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), | ||
93 | DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), | ||
94 | + DEFINE_PROP_BOOL("x-zvkg", RISCVCPU, cfg.ext_zvkg, false), | ||
95 | DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), | ||
96 | DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), | ||
97 | DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), | ||
98 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c | ||
99 | index XXXXXXX..XXXXXXX 100644 | ||
100 | --- a/target/riscv/vcrypto_helper.c | ||
101 | +++ b/target/riscv/vcrypto_helper.c | ||
102 | @@ -XXX,XX +XXX,XX @@ void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, | ||
103 | vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz); | ||
104 | env->vstart = 0; | ||
105 | } | ||
106 | + | ||
107 | +void HELPER(vghsh_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr, | ||
108 | + CPURISCVState *env, uint32_t desc) | ||
109 | +{ | ||
110 | + uint64_t *vd = vd_vptr; | ||
111 | + uint64_t *vs1 = vs1_vptr; | ||
112 | + uint64_t *vs2 = vs2_vptr; | ||
113 | + uint32_t vta = vext_vta(desc); | ||
114 | + uint32_t total_elems = vext_get_total_elems(env, desc, 4); | ||
115 | + | ||
116 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
117 | + uint64_t Y[2] = {vd[i * 2 + 0], vd[i * 2 + 1]}; | ||
118 | + uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])}; | ||
119 | + uint64_t X[2] = {vs1[i * 2 + 0], vs1[i * 2 + 1]}; | ||
120 | + uint64_t Z[2] = {0, 0}; | ||
121 | + | ||
122 | + uint64_t S[2] = {brev8(Y[0] ^ X[0]), brev8(Y[1] ^ X[1])}; | ||
123 | + | ||
124 | + for (int j = 0; j < 128; j++) { | ||
125 | + if ((S[j / 64] >> (j % 64)) & 1) { | ||
126 | + Z[0] ^= H[0]; | ||
127 | + Z[1] ^= H[1]; | ||
128 | + } | ||
129 | + bool reduce = ((H[1] >> 63) & 1); | ||
130 | + H[1] = H[1] << 1 | H[0] >> 63; | ||
131 | + H[0] = H[0] << 1; | ||
132 | + if (reduce) { | ||
133 | + H[0] ^= 0x87; | ||
134 | + } | ||
135 | + } | ||
136 | + | ||
137 | + vd[i * 2 + 0] = brev8(Z[0]); | ||
138 | + vd[i * 2 + 1] = brev8(Z[1]); | ||
139 | + } | ||
140 | + /* set tail elements to 1s */ | ||
141 | + vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4); | ||
142 | + env->vstart = 0; | ||
143 | +} | ||
144 | + | ||
145 | +void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env, | ||
146 | + uint32_t desc) | ||
147 | +{ | ||
148 | + uint64_t *vd = vd_vptr; | ||
149 | + uint64_t *vs2 = vs2_vptr; | ||
150 | + uint32_t vta = vext_vta(desc); | ||
151 | + uint32_t total_elems = vext_get_total_elems(env, desc, 4); | ||
152 | + | ||
153 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
154 | + uint64_t Y[2] = {brev8(vd[i * 2 + 0]), brev8(vd[i * 2 + 1])}; | ||
155 | + uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])}; | ||
156 | + uint64_t Z[2] = {0, 0}; | ||
157 | + | ||
158 | + for (int j = 0; j < 128; j++) { | ||
159 | + if ((Y[j / 64] >> (j % 64)) & 1) { | ||
160 | + Z[0] ^= H[0]; | ||
161 | + Z[1] ^= H[1]; | ||
162 | + } | ||
163 | + bool reduce = ((H[1] >> 63) & 1); | ||
164 | + H[1] = H[1] << 1 | H[0] >> 63; | ||
165 | + H[0] = H[0] << 1; | ||
166 | + if (reduce) { | ||
167 | + H[0] ^= 0x87; | ||
168 | + } | ||
169 | + } | ||
170 | + | ||
171 | + vd[i * 2 + 0] = brev8(Z[0]); | ||
172 | + vd[i * 2 + 1] = brev8(Z[1]); | ||
173 | + } | ||
174 | + /* set tail elements to 1s */ | ||
175 | + vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4); | ||
176 | + env->vstart = 0; | ||
177 | +} | ||
178 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
179 | index XXXXXXX..XXXXXXX 100644 | ||
180 | --- a/target/riscv/insn_trans/trans_rvvk.c.inc | ||
181 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
182 | @@ -XXX,XX +XXX,XX @@ static inline bool vsm3c_check(DisasContext *s, arg_rmrr *a) | ||
183 | |||
184 | GEN_VV_UNMASKED_TRANS(vsm3me_vv, vsm3me_check, ZVKSH_EGS) | ||
185 | GEN_VI_UNMASKED_TRANS(vsm3c_vi, vsm3c_check, ZVKSH_EGS) | ||
186 | + | ||
187 | +/* | ||
188 | + * Zvkg | ||
189 | + */ | ||
190 | + | ||
191 | +#define ZVKG_EGS 4 | ||
192 | + | ||
193 | +static bool vgmul_check(DisasContext *s, arg_rmr *a) | ||
194 | +{ | ||
195 | + int egw_bytes = ZVKG_EGS << s->sew; | ||
196 | + return s->cfg_ptr->ext_zvkg == true && | ||
197 | + vext_check_isa_ill(s) && | ||
198 | + require_rvv(s) && | ||
199 | + MAXSZ(s) >= egw_bytes && | ||
200 | + vext_check_ss(s, a->rd, a->rs2, a->vm) && | ||
201 | + s->sew == MO_32; | ||
202 | +} | ||
203 | + | ||
204 | +GEN_V_UNMASKED_TRANS(vgmul_vv, vgmul_check, ZVKG_EGS) | ||
205 | + | ||
206 | +static bool vghsh_check(DisasContext *s, arg_rmrr *a) | ||
207 | +{ | ||
208 | + int egw_bytes = ZVKG_EGS << s->sew; | ||
209 | + return s->cfg_ptr->ext_zvkg == true && | ||
210 | + opivv_check(s, a) && | ||
211 | + MAXSZ(s) >= egw_bytes && | ||
212 | + s->sew == MO_32; | ||
213 | +} | ||
214 | + | ||
215 | +GEN_VV_UNMASKED_TRANS(vghsh_vv, vghsh_check, ZVKG_EGS) | ||
216 | -- | ||
217 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Max Chou <max.chou@sifive.com> | ||
1 | 2 | ||
3 | Allows sharing of sm4_subword between different targets. | ||
4 | |||
5 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
6 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
9 | Message-ID: <20230711165917.2629866-14-max.chou@sifive.com> | ||
10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
11 | --- | ||
12 | include/crypto/sm4.h | 8 ++++++++ | ||
13 | target/arm/tcg/crypto_helper.c | 10 ++-------- | ||
14 | 2 files changed, 10 insertions(+), 8 deletions(-) | ||
15 | |||
16 | diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/include/crypto/sm4.h | ||
19 | +++ b/include/crypto/sm4.h | ||
20 | @@ -XXX,XX +XXX,XX @@ | ||
21 | |||
22 | extern const uint8_t sm4_sbox[256]; | ||
23 | |||
24 | +static inline uint32_t sm4_subword(uint32_t word) | ||
25 | +{ | ||
26 | + return sm4_sbox[word & 0xff] | | ||
27 | + sm4_sbox[(word >> 8) & 0xff] << 8 | | ||
28 | + sm4_sbox[(word >> 16) & 0xff] << 16 | | ||
29 | + sm4_sbox[(word >> 24) & 0xff] << 24; | ||
30 | +} | ||
31 | + | ||
32 | #endif | ||
33 | diff --git a/target/arm/tcg/crypto_helper.c b/target/arm/tcg/crypto_helper.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/target/arm/tcg/crypto_helper.c | ||
36 | +++ b/target/arm/tcg/crypto_helper.c | ||
37 | @@ -XXX,XX +XXX,XX @@ static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) | ||
38 | CR_ST_WORD(d, (i + 3) % 4) ^ | ||
39 | CR_ST_WORD(n, i); | ||
40 | |||
41 | - t = sm4_sbox[t & 0xff] | | ||
42 | - sm4_sbox[(t >> 8) & 0xff] << 8 | | ||
43 | - sm4_sbox[(t >> 16) & 0xff] << 16 | | ||
44 | - sm4_sbox[(t >> 24) & 0xff] << 24; | ||
45 | + t = sm4_subword(t); | ||
46 | |||
47 | CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ | ||
48 | rol32(t, 24); | ||
49 | @@ -XXX,XX +XXX,XX @@ static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) | ||
50 | CR_ST_WORD(d, (i + 3) % 4) ^ | ||
51 | CR_ST_WORD(m, i); | ||
52 | |||
53 | - t = sm4_sbox[t & 0xff] | | ||
54 | - sm4_sbox[(t >> 8) & 0xff] << 8 | | ||
55 | - sm4_sbox[(t >> 16) & 0xff] << 16 | | ||
56 | - sm4_sbox[(t >> 24) & 0xff] << 24; | ||
57 | + t = sm4_subword(t); | ||
58 | |||
59 | CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); | ||
60 | } | ||
61 | -- | ||
62 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Max Chou <max.chou@sifive.com> | ||
1 | 2 | ||
3 | Adds sm4_ck constant for use in sm4 cryptography across different targets. | ||
4 | |||
5 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
6 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
7 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
8 | Message-ID: <20230711165917.2629866-15-max.chou@sifive.com> | ||
9 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
10 | --- | ||
11 | include/crypto/sm4.h | 1 + | ||
12 | crypto/sm4.c | 10 ++++++++++ | ||
13 | 2 files changed, 11 insertions(+) | ||
14 | |||
15 | diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/include/crypto/sm4.h | ||
18 | +++ b/include/crypto/sm4.h | ||
19 | @@ -XXX,XX +XXX,XX @@ | ||
20 | #define QEMU_SM4_H | ||
21 | |||
22 | extern const uint8_t sm4_sbox[256]; | ||
23 | +extern const uint32_t sm4_ck[32]; | ||
24 | |||
25 | static inline uint32_t sm4_subword(uint32_t word) | ||
26 | { | ||
27 | diff --git a/crypto/sm4.c b/crypto/sm4.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/crypto/sm4.c | ||
30 | +++ b/crypto/sm4.c | ||
31 | @@ -XXX,XX +XXX,XX @@ uint8_t const sm4_sbox[] = { | ||
32 | 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, | ||
33 | }; | ||
34 | |||
35 | +uint32_t const sm4_ck[] = { | ||
36 | + 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, | ||
37 | + 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, | ||
38 | + 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, | ||
39 | + 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, | ||
40 | + 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, | ||
41 | + 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, | ||
42 | + 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, | ||
43 | + 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 | ||
44 | +}; | ||
45 | -- | ||
46 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: Max Chou <max.chou@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | Compares write mask registers, and so always operate under a tail- | 3 | This commit adds support for the Zvksed vector-crypto extension, which |
4 | agnostic policy. | 4 | consists of the following instructions: |
5 | 5 | ||
6 | Signed-off-by: eop Chen <eop.chen@sifive.com> | 6 | * vsm4k.vi |
7 | * vsm4r.[vv,vs] | ||
8 | |||
9 | Translation functions are defined in | ||
10 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in | ||
11 | `target/riscv/vcrypto_helper.c`. | ||
12 | |||
13 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
7 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | 14 | Reviewed-by: Frank Chang <frank.chang@sifive.com> |
8 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 15 | [lawrence.hunter@codethink.co.uk: Moved SM4 functions from |
9 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | 16 | crypto_helper.c to vcrypto_helper.c] |
10 | Message-Id: <165449614532.19704.7000832880482980398-12@git.sr.ht> | 17 | [nazar.kazakov@codethink.co.uk: Added alignment checks, refactored code to |
18 | use macros, and minor style changes] | ||
19 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
20 | Message-ID: <20230711165917.2629866-16-max.chou@sifive.com> | ||
11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 21 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
12 | --- | 22 | --- |
13 | target/riscv/vector_helper.c | 440 +++++++++++++----------- | 23 | target/riscv/cpu_cfg.h | 1 + |
14 | target/riscv/insn_trans/trans_rvv.c.inc | 17 + | 24 | target/riscv/helper.h | 4 + |
15 | 2 files changed, 261 insertions(+), 196 deletions(-) | 25 | target/riscv/insn32.decode | 5 + |
16 | 26 | target/riscv/cpu.c | 5 +- | |
17 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 27 | target/riscv/vcrypto_helper.c | 127 +++++++++++++++++++++++ |
18 | index XXXXXXX..XXXXXXX 100644 | 28 | target/riscv/insn_trans/trans_rvvk.c.inc | 43 ++++++++ |
19 | --- a/target/riscv/vector_helper.c | 29 | 6 files changed, 184 insertions(+), 1 deletion(-) |
20 | +++ b/target/riscv/vector_helper.c | 30 | |
21 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ | 31 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h |
22 | *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ | 32 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/target/riscv/cpu_cfg.h | ||
34 | +++ b/target/riscv/cpu_cfg.h | ||
35 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
36 | bool ext_zvkned; | ||
37 | bool ext_zvknha; | ||
38 | bool ext_zvknhb; | ||
39 | + bool ext_zvksed; | ||
40 | bool ext_zvksh; | ||
41 | bool ext_zmmul; | ||
42 | bool ext_zvfbfmin; | ||
43 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/target/riscv/helper.h | ||
46 | +++ b/target/riscv/helper.h | ||
47 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32) | ||
48 | |||
49 | DEF_HELPER_5(vghsh_vv, void, ptr, ptr, ptr, env, i32) | ||
50 | DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32) | ||
51 | + | ||
52 | +DEF_HELPER_5(vsm4k_vi, void, ptr, ptr, i32, env, i32) | ||
53 | +DEF_HELPER_4(vsm4r_vv, void, ptr, ptr, env, i32) | ||
54 | +DEF_HELPER_4(vsm4r_vs, void, ptr, ptr, env, i32) | ||
55 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/riscv/insn32.decode | ||
58 | +++ b/target/riscv/insn32.decode | ||
59 | @@ -XXX,XX +XXX,XX @@ vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
60 | # *** Zvkg vector crypto extension *** | ||
61 | vghsh_vv 101100 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
62 | vgmul_vv 101000 1 ..... 10001 010 ..... 1110111 @r2_vm_1 | ||
63 | + | ||
64 | +# *** Zvksed vector crypto extension *** | ||
65 | +vsm4k_vi 100001 1 ..... ..... 010 ..... 1110111 @r_vm_1 | ||
66 | +vsm4r_vv 101000 1 ..... 10000 010 ..... 1110111 @r2_vm_1 | ||
67 | +vsm4r_vs 101001 1 ..... 10000 010 ..... 1110111 @r2_vm_1 | ||
68 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/riscv/cpu.c | ||
71 | +++ b/target/riscv/cpu.c | ||
72 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { | ||
73 | ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), | ||
74 | ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha), | ||
75 | ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb), | ||
76 | + ISA_EXT_DATA_ENTRY(zvksed, PRIV_VERSION_1_12_0, ext_zvksed), | ||
77 | ISA_EXT_DATA_ENTRY(zvksh, PRIV_VERSION_1_12_0, ext_zvksh), | ||
78 | ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), | ||
79 | ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), | ||
80 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
81 | * in qemu | ||
82 | */ | ||
83 | if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned || | ||
84 | - cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) { | ||
85 | + cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed || cpu->cfg.ext_zvksh) && | ||
86 | + !cpu->cfg.ext_zve32f) { | ||
87 | error_setg(errp, | ||
88 | "Vector crypto extensions require V or Zve* extensions"); | ||
89 | return; | ||
90 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
91 | DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), | ||
92 | DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), | ||
93 | DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), | ||
94 | + DEFINE_PROP_BOOL("x-zvksed", RISCVCPU, cfg.ext_zvksed, false), | ||
95 | DEFINE_PROP_BOOL("x-zvksh", RISCVCPU, cfg.ext_zvksh, false), | ||
96 | |||
97 | DEFINE_PROP_END_OF_LIST(), | ||
98 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c | ||
99 | index XXXXXXX..XXXXXXX 100644 | ||
100 | --- a/target/riscv/vcrypto_helper.c | ||
101 | +++ b/target/riscv/vcrypto_helper.c | ||
102 | @@ -XXX,XX +XXX,XX @@ | ||
103 | #include "cpu.h" | ||
104 | #include "crypto/aes.h" | ||
105 | #include "crypto/aes-round.h" | ||
106 | +#include "crypto/sm4.h" | ||
107 | #include "exec/memop.h" | ||
108 | #include "exec/exec-all.h" | ||
109 | #include "exec/helper-proto.h" | ||
110 | @@ -XXX,XX +XXX,XX @@ void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env, | ||
111 | vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4); | ||
112 | env->vstart = 0; | ||
23 | } | 113 | } |
24 | 114 | + | |
25 | -#define GEN_VEXT_VV_ENV(NAME) \ | 115 | +void HELPER(vsm4k_vi)(void *vd, void *vs2, uint32_t uimm5, CPURISCVState *env, |
26 | +#define GEN_VEXT_VV_ENV(NAME, ESZ) \ | 116 | + uint32_t desc) |
27 | void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | 117 | +{ |
28 | void *vs2, CPURISCVState *env, \ | 118 | + const uint32_t egs = 4; |
29 | uint32_t desc) \ | 119 | + uint32_t rnd = uimm5 & 0x7; |
30 | { \ | 120 | + uint32_t group_start = env->vstart / egs; |
31 | uint32_t vm = vext_vm(desc); \ | 121 | + uint32_t group_end = env->vl / egs; |
32 | uint32_t vl = env->vl; \ | 122 | + uint32_t esz = sizeof(uint32_t); |
33 | + uint32_t total_elems = \ | 123 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); |
34 | + vext_get_total_elems(env, desc, ESZ); \ | 124 | + |
35 | + uint32_t vta = vext_vta(desc); \ | 125 | + for (uint32_t i = group_start; i < group_end; ++i) { |
36 | uint32_t i; \ | 126 | + uint32_t vstart = i * egs; |
37 | \ | 127 | + uint32_t vend = (i + 1) * egs; |
38 | for (i = env->vstart; i < vl; i++) { \ | 128 | + uint32_t rk[4] = {0}; |
39 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | 129 | + uint32_t tmp[8] = {0}; |
40 | do_##NAME(vd, vs1, vs2, i, env); \ | 130 | + |
41 | } \ | 131 | + for (uint32_t j = vstart; j < vend; ++j) { |
42 | env->vstart = 0; \ | 132 | + rk[j - vstart] = *((uint32_t *)vs2 + H4(j)); |
43 | + /* set tail elements to 1s */ \ | 133 | + } |
44 | + vext_set_elems_1s(vd, vta, vl * ESZ, \ | 134 | + |
45 | + total_elems * ESZ); \ | 135 | + for (uint32_t j = 0; j < egs; ++j) { |
136 | + tmp[j] = rk[j]; | ||
137 | + } | ||
138 | + | ||
139 | + for (uint32_t j = 0; j < egs; ++j) { | ||
140 | + uint32_t b, s; | ||
141 | + b = tmp[j + 1] ^ tmp[j + 2] ^ tmp[j + 3] ^ sm4_ck[rnd * 4 + j]; | ||
142 | + | ||
143 | + s = sm4_subword(b); | ||
144 | + | ||
145 | + tmp[j + 4] = tmp[j] ^ (s ^ rol32(s, 13) ^ rol32(s, 23)); | ||
146 | + } | ||
147 | + | ||
148 | + for (uint32_t j = vstart; j < vend; ++j) { | ||
149 | + *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)]; | ||
150 | + } | ||
151 | + } | ||
152 | + | ||
153 | + env->vstart = 0; | ||
154 | + /* set tail elements to 1s */ | ||
155 | + vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz); | ||
156 | +} | ||
157 | + | ||
158 | +static void do_sm4_round(uint32_t *rk, uint32_t *buf) | ||
159 | +{ | ||
160 | + const uint32_t egs = 4; | ||
161 | + uint32_t s, b; | ||
162 | + | ||
163 | + for (uint32_t j = egs; j < egs * 2; ++j) { | ||
164 | + b = buf[j - 3] ^ buf[j - 2] ^ buf[j - 1] ^ rk[j - 4]; | ||
165 | + | ||
166 | + s = sm4_subword(b); | ||
167 | + | ||
168 | + buf[j] = buf[j - 4] ^ (s ^ rol32(s, 2) ^ rol32(s, 10) ^ rol32(s, 18) ^ | ||
169 | + rol32(s, 24)); | ||
170 | + } | ||
171 | +} | ||
172 | + | ||
173 | +void HELPER(vsm4r_vv)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) | ||
174 | +{ | ||
175 | + const uint32_t egs = 4; | ||
176 | + uint32_t group_start = env->vstart / egs; | ||
177 | + uint32_t group_end = env->vl / egs; | ||
178 | + uint32_t esz = sizeof(uint32_t); | ||
179 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
180 | + | ||
181 | + for (uint32_t i = group_start; i < group_end; ++i) { | ||
182 | + uint32_t vstart = i * egs; | ||
183 | + uint32_t vend = (i + 1) * egs; | ||
184 | + uint32_t rk[4] = {0}; | ||
185 | + uint32_t tmp[8] = {0}; | ||
186 | + | ||
187 | + for (uint32_t j = vstart; j < vend; ++j) { | ||
188 | + rk[j - vstart] = *((uint32_t *)vs2 + H4(j)); | ||
189 | + } | ||
190 | + | ||
191 | + for (uint32_t j = vstart; j < vend; ++j) { | ||
192 | + tmp[j - vstart] = *((uint32_t *)vd + H4(j)); | ||
193 | + } | ||
194 | + | ||
195 | + do_sm4_round(rk, tmp); | ||
196 | + | ||
197 | + for (uint32_t j = vstart; j < vend; ++j) { | ||
198 | + *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)]; | ||
199 | + } | ||
200 | + } | ||
201 | + | ||
202 | + env->vstart = 0; | ||
203 | + /* set tail elements to 1s */ | ||
204 | + vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz); | ||
205 | +} | ||
206 | + | ||
207 | +void HELPER(vsm4r_vs)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) | ||
208 | +{ | ||
209 | + const uint32_t egs = 4; | ||
210 | + uint32_t group_start = env->vstart / egs; | ||
211 | + uint32_t group_end = env->vl / egs; | ||
212 | + uint32_t esz = sizeof(uint32_t); | ||
213 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
214 | + | ||
215 | + for (uint32_t i = group_start; i < group_end; ++i) { | ||
216 | + uint32_t vstart = i * egs; | ||
217 | + uint32_t vend = (i + 1) * egs; | ||
218 | + uint32_t rk[4] = {0}; | ||
219 | + uint32_t tmp[8] = {0}; | ||
220 | + | ||
221 | + for (uint32_t j = 0; j < egs; ++j) { | ||
222 | + rk[j] = *((uint32_t *)vs2 + H4(j)); | ||
223 | + } | ||
224 | + | ||
225 | + for (uint32_t j = vstart; j < vend; ++j) { | ||
226 | + tmp[j - vstart] = *((uint32_t *)vd + H4(j)); | ||
227 | + } | ||
228 | + | ||
229 | + do_sm4_round(rk, tmp); | ||
230 | + | ||
231 | + for (uint32_t j = vstart; j < vend; ++j) { | ||
232 | + *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)]; | ||
233 | + } | ||
234 | + } | ||
235 | + | ||
236 | + env->vstart = 0; | ||
237 | + /* set tail elements to 1s */ | ||
238 | + vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz); | ||
239 | +} | ||
240 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
241 | index XXXXXXX..XXXXXXX 100644 | ||
242 | --- a/target/riscv/insn_trans/trans_rvvk.c.inc | ||
243 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
244 | @@ -XXX,XX +XXX,XX @@ static bool vghsh_check(DisasContext *s, arg_rmrr *a) | ||
46 | } | 245 | } |
47 | 246 | ||
48 | RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) | 247 | GEN_VV_UNMASKED_TRANS(vghsh_vv, vghsh_check, ZVKG_EGS) |
49 | RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) | 248 | + |
50 | RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) | 249 | +/* |
51 | -GEN_VEXT_VV_ENV(vfadd_vv_h) | 250 | + * Zvksed |
52 | -GEN_VEXT_VV_ENV(vfadd_vv_w) | 251 | + */ |
53 | -GEN_VEXT_VV_ENV(vfadd_vv_d) | 252 | + |
54 | +GEN_VEXT_VV_ENV(vfadd_vv_h, 2) | 253 | +#define ZVKSED_EGS 4 |
55 | +GEN_VEXT_VV_ENV(vfadd_vv_w, 4) | 254 | + |
56 | +GEN_VEXT_VV_ENV(vfadd_vv_d, 8) | 255 | +static bool zvksed_check(DisasContext *s) |
57 | 256 | +{ | |
58 | #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | 257 | + int egw_bytes = ZVKSED_EGS << s->sew; |
59 | static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | 258 | + return s->cfg_ptr->ext_zvksed == true && |
60 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | 259 | + require_rvv(s) && |
61 | *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ | 260 | + vext_check_isa_ill(s) && |
62 | } | 261 | + MAXSZ(s) >= egw_bytes && |
63 | 262 | + s->sew == MO_32; | |
64 | -#define GEN_VEXT_VF(NAME) \ | 263 | +} |
65 | +#define GEN_VEXT_VF(NAME, ESZ) \ | 264 | + |
66 | void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ | 265 | +static bool vsm4k_vi_check(DisasContext *s, arg_rmrr *a) |
67 | void *vs2, CPURISCVState *env, \ | 266 | +{ |
68 | uint32_t desc) \ | 267 | + return zvksed_check(s) && |
69 | { \ | 268 | + require_align(a->rd, s->lmul) && |
70 | uint32_t vm = vext_vm(desc); \ | 269 | + require_align(a->rs2, s->lmul); |
71 | uint32_t vl = env->vl; \ | 270 | +} |
72 | + uint32_t total_elems = \ | 271 | + |
73 | + vext_get_total_elems(env, desc, ESZ); \ | 272 | +GEN_VI_UNMASKED_TRANS(vsm4k_vi, vsm4k_vi_check, ZVKSED_EGS) |
74 | + uint32_t vta = vext_vta(desc); \ | 273 | + |
75 | uint32_t i; \ | 274 | +static bool vsm4r_vv_check(DisasContext *s, arg_rmr *a) |
76 | \ | 275 | +{ |
77 | for (i = env->vstart; i < vl; i++) { \ | 276 | + return zvksed_check(s) && |
78 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ | 277 | + require_align(a->rd, s->lmul) && |
79 | do_##NAME(vd, s1, vs2, i, env); \ | 278 | + require_align(a->rs2, s->lmul); |
80 | } \ | 279 | +} |
81 | env->vstart = 0; \ | 280 | + |
82 | + /* set tail elements to 1s */ \ | 281 | +GEN_V_UNMASKED_TRANS(vsm4r_vv, vsm4r_vv_check, ZVKSED_EGS) |
83 | + vext_set_elems_1s(vd, vta, vl * ESZ, \ | 282 | + |
84 | + total_elems * ESZ); \ | 283 | +static bool vsm4r_vs_check(DisasContext *s, arg_rmr *a) |
85 | } | 284 | +{ |
86 | 285 | + return zvksed_check(s) && | |
87 | RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) | 286 | + !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1) && |
88 | RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) | 287 | + require_align(a->rd, s->lmul); |
89 | RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) | 288 | +} |
90 | -GEN_VEXT_VF(vfadd_vf_h) | 289 | + |
91 | -GEN_VEXT_VF(vfadd_vf_w) | 290 | +GEN_V_UNMASKED_TRANS(vsm4r_vs, vsm4r_vs_check, ZVKSED_EGS) |
92 | -GEN_VEXT_VF(vfadd_vf_d) | ||
93 | +GEN_VEXT_VF(vfadd_vf_h, 2) | ||
94 | +GEN_VEXT_VF(vfadd_vf_w, 4) | ||
95 | +GEN_VEXT_VF(vfadd_vf_d, 8) | ||
96 | |||
97 | RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) | ||
98 | RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) | ||
99 | RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) | ||
100 | -GEN_VEXT_VV_ENV(vfsub_vv_h) | ||
101 | -GEN_VEXT_VV_ENV(vfsub_vv_w) | ||
102 | -GEN_VEXT_VV_ENV(vfsub_vv_d) | ||
103 | +GEN_VEXT_VV_ENV(vfsub_vv_h, 2) | ||
104 | +GEN_VEXT_VV_ENV(vfsub_vv_w, 4) | ||
105 | +GEN_VEXT_VV_ENV(vfsub_vv_d, 8) | ||
106 | RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) | ||
107 | RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) | ||
108 | RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) | ||
109 | -GEN_VEXT_VF(vfsub_vf_h) | ||
110 | -GEN_VEXT_VF(vfsub_vf_w) | ||
111 | -GEN_VEXT_VF(vfsub_vf_d) | ||
112 | +GEN_VEXT_VF(vfsub_vf_h, 2) | ||
113 | +GEN_VEXT_VF(vfsub_vf_w, 4) | ||
114 | +GEN_VEXT_VF(vfsub_vf_d, 8) | ||
115 | |||
116 | static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) | ||
117 | { | ||
118 | @@ -XXX,XX +XXX,XX @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) | ||
119 | RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) | ||
120 | RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) | ||
121 | RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) | ||
122 | -GEN_VEXT_VF(vfrsub_vf_h) | ||
123 | -GEN_VEXT_VF(vfrsub_vf_w) | ||
124 | -GEN_VEXT_VF(vfrsub_vf_d) | ||
125 | +GEN_VEXT_VF(vfrsub_vf_h, 2) | ||
126 | +GEN_VEXT_VF(vfrsub_vf_w, 4) | ||
127 | +GEN_VEXT_VF(vfrsub_vf_d, 8) | ||
128 | |||
129 | /* Vector Widening Floating-Point Add/Subtract Instructions */ | ||
130 | static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) | ||
131 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) | ||
132 | |||
133 | RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) | ||
134 | RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) | ||
135 | -GEN_VEXT_VV_ENV(vfwadd_vv_h) | ||
136 | -GEN_VEXT_VV_ENV(vfwadd_vv_w) | ||
137 | +GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) | ||
138 | +GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) | ||
139 | RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) | ||
140 | RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) | ||
141 | -GEN_VEXT_VF(vfwadd_vf_h) | ||
142 | -GEN_VEXT_VF(vfwadd_vf_w) | ||
143 | +GEN_VEXT_VF(vfwadd_vf_h, 4) | ||
144 | +GEN_VEXT_VF(vfwadd_vf_w, 8) | ||
145 | |||
146 | static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) | ||
147 | { | ||
148 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) | ||
149 | |||
150 | RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) | ||
151 | RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) | ||
152 | -GEN_VEXT_VV_ENV(vfwsub_vv_h) | ||
153 | -GEN_VEXT_VV_ENV(vfwsub_vv_w) | ||
154 | +GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) | ||
155 | +GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) | ||
156 | RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) | ||
157 | RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) | ||
158 | -GEN_VEXT_VF(vfwsub_vf_h) | ||
159 | -GEN_VEXT_VF(vfwsub_vf_w) | ||
160 | +GEN_VEXT_VF(vfwsub_vf_h, 4) | ||
161 | +GEN_VEXT_VF(vfwsub_vf_w, 8) | ||
162 | |||
163 | static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) | ||
164 | { | ||
165 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) | ||
166 | |||
167 | RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) | ||
168 | RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) | ||
169 | -GEN_VEXT_VV_ENV(vfwadd_wv_h) | ||
170 | -GEN_VEXT_VV_ENV(vfwadd_wv_w) | ||
171 | +GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) | ||
172 | +GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) | ||
173 | RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) | ||
174 | RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) | ||
175 | -GEN_VEXT_VF(vfwadd_wf_h) | ||
176 | -GEN_VEXT_VF(vfwadd_wf_w) | ||
177 | +GEN_VEXT_VF(vfwadd_wf_h, 4) | ||
178 | +GEN_VEXT_VF(vfwadd_wf_w, 8) | ||
179 | |||
180 | static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) | ||
181 | { | ||
182 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) | ||
183 | |||
184 | RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) | ||
185 | RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) | ||
186 | -GEN_VEXT_VV_ENV(vfwsub_wv_h) | ||
187 | -GEN_VEXT_VV_ENV(vfwsub_wv_w) | ||
188 | +GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) | ||
189 | +GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) | ||
190 | RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) | ||
191 | RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) | ||
192 | -GEN_VEXT_VF(vfwsub_wf_h) | ||
193 | -GEN_VEXT_VF(vfwsub_wf_w) | ||
194 | +GEN_VEXT_VF(vfwsub_wf_h, 4) | ||
195 | +GEN_VEXT_VF(vfwsub_wf_w, 8) | ||
196 | |||
197 | /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ | ||
198 | RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) | ||
199 | RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) | ||
200 | RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) | ||
201 | -GEN_VEXT_VV_ENV(vfmul_vv_h) | ||
202 | -GEN_VEXT_VV_ENV(vfmul_vv_w) | ||
203 | -GEN_VEXT_VV_ENV(vfmul_vv_d) | ||
204 | +GEN_VEXT_VV_ENV(vfmul_vv_h, 2) | ||
205 | +GEN_VEXT_VV_ENV(vfmul_vv_w, 4) | ||
206 | +GEN_VEXT_VV_ENV(vfmul_vv_d, 8) | ||
207 | RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) | ||
208 | RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) | ||
209 | RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) | ||
210 | -GEN_VEXT_VF(vfmul_vf_h) | ||
211 | -GEN_VEXT_VF(vfmul_vf_w) | ||
212 | -GEN_VEXT_VF(vfmul_vf_d) | ||
213 | +GEN_VEXT_VF(vfmul_vf_h, 2) | ||
214 | +GEN_VEXT_VF(vfmul_vf_w, 4) | ||
215 | +GEN_VEXT_VF(vfmul_vf_d, 8) | ||
216 | |||
217 | RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) | ||
218 | RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) | ||
219 | RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) | ||
220 | -GEN_VEXT_VV_ENV(vfdiv_vv_h) | ||
221 | -GEN_VEXT_VV_ENV(vfdiv_vv_w) | ||
222 | -GEN_VEXT_VV_ENV(vfdiv_vv_d) | ||
223 | +GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) | ||
224 | +GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) | ||
225 | +GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) | ||
226 | RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) | ||
227 | RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) | ||
228 | RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) | ||
229 | -GEN_VEXT_VF(vfdiv_vf_h) | ||
230 | -GEN_VEXT_VF(vfdiv_vf_w) | ||
231 | -GEN_VEXT_VF(vfdiv_vf_d) | ||
232 | +GEN_VEXT_VF(vfdiv_vf_h, 2) | ||
233 | +GEN_VEXT_VF(vfdiv_vf_w, 4) | ||
234 | +GEN_VEXT_VF(vfdiv_vf_d, 8) | ||
235 | |||
236 | static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) | ||
237 | { | ||
238 | @@ -XXX,XX +XXX,XX @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) | ||
239 | RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) | ||
240 | RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) | ||
241 | RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) | ||
242 | -GEN_VEXT_VF(vfrdiv_vf_h) | ||
243 | -GEN_VEXT_VF(vfrdiv_vf_w) | ||
244 | -GEN_VEXT_VF(vfrdiv_vf_d) | ||
245 | +GEN_VEXT_VF(vfrdiv_vf_h, 2) | ||
246 | +GEN_VEXT_VF(vfrdiv_vf_w, 4) | ||
247 | +GEN_VEXT_VF(vfrdiv_vf_d, 8) | ||
248 | |||
249 | /* Vector Widening Floating-Point Multiply */ | ||
250 | static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) | ||
251 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) | ||
252 | } | ||
253 | RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) | ||
254 | RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) | ||
255 | -GEN_VEXT_VV_ENV(vfwmul_vv_h) | ||
256 | -GEN_VEXT_VV_ENV(vfwmul_vv_w) | ||
257 | +GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) | ||
258 | +GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) | ||
259 | RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) | ||
260 | RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) | ||
261 | -GEN_VEXT_VF(vfwmul_vf_h) | ||
262 | -GEN_VEXT_VF(vfwmul_vf_w) | ||
263 | +GEN_VEXT_VF(vfwmul_vf_h, 4) | ||
264 | +GEN_VEXT_VF(vfwmul_vf_w, 8) | ||
265 | |||
266 | /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ | ||
267 | #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
268 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
269 | RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) | ||
270 | RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) | ||
271 | RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) | ||
272 | -GEN_VEXT_VV_ENV(vfmacc_vv_h) | ||
273 | -GEN_VEXT_VV_ENV(vfmacc_vv_w) | ||
274 | -GEN_VEXT_VV_ENV(vfmacc_vv_d) | ||
275 | +GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) | ||
276 | +GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) | ||
277 | +GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) | ||
278 | |||
279 | #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
280 | static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
281 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
282 | RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) | ||
283 | RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) | ||
284 | RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) | ||
285 | -GEN_VEXT_VF(vfmacc_vf_h) | ||
286 | -GEN_VEXT_VF(vfmacc_vf_w) | ||
287 | -GEN_VEXT_VF(vfmacc_vf_d) | ||
288 | +GEN_VEXT_VF(vfmacc_vf_h, 2) | ||
289 | +GEN_VEXT_VF(vfmacc_vf_w, 4) | ||
290 | +GEN_VEXT_VF(vfmacc_vf_d, 8) | ||
291 | |||
292 | static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
293 | { | ||
294 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
295 | RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) | ||
296 | RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) | ||
297 | RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) | ||
298 | -GEN_VEXT_VV_ENV(vfnmacc_vv_h) | ||
299 | -GEN_VEXT_VV_ENV(vfnmacc_vv_w) | ||
300 | -GEN_VEXT_VV_ENV(vfnmacc_vv_d) | ||
301 | +GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) | ||
302 | +GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) | ||
303 | +GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) | ||
304 | RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) | ||
305 | RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) | ||
306 | RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) | ||
307 | -GEN_VEXT_VF(vfnmacc_vf_h) | ||
308 | -GEN_VEXT_VF(vfnmacc_vf_w) | ||
309 | -GEN_VEXT_VF(vfnmacc_vf_d) | ||
310 | +GEN_VEXT_VF(vfnmacc_vf_h, 2) | ||
311 | +GEN_VEXT_VF(vfnmacc_vf_w, 4) | ||
312 | +GEN_VEXT_VF(vfnmacc_vf_d, 8) | ||
313 | |||
314 | static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
315 | { | ||
316 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
317 | RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) | ||
318 | RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) | ||
319 | RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) | ||
320 | -GEN_VEXT_VV_ENV(vfmsac_vv_h) | ||
321 | -GEN_VEXT_VV_ENV(vfmsac_vv_w) | ||
322 | -GEN_VEXT_VV_ENV(vfmsac_vv_d) | ||
323 | +GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) | ||
324 | +GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) | ||
325 | +GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) | ||
326 | RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) | ||
327 | RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) | ||
328 | RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) | ||
329 | -GEN_VEXT_VF(vfmsac_vf_h) | ||
330 | -GEN_VEXT_VF(vfmsac_vf_w) | ||
331 | -GEN_VEXT_VF(vfmsac_vf_d) | ||
332 | +GEN_VEXT_VF(vfmsac_vf_h, 2) | ||
333 | +GEN_VEXT_VF(vfmsac_vf_w, 4) | ||
334 | +GEN_VEXT_VF(vfmsac_vf_d, 8) | ||
335 | |||
336 | static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
337 | { | ||
338 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
339 | RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) | ||
340 | RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) | ||
341 | RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) | ||
342 | -GEN_VEXT_VV_ENV(vfnmsac_vv_h) | ||
343 | -GEN_VEXT_VV_ENV(vfnmsac_vv_w) | ||
344 | -GEN_VEXT_VV_ENV(vfnmsac_vv_d) | ||
345 | +GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) | ||
346 | +GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) | ||
347 | +GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) | ||
348 | RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) | ||
349 | RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) | ||
350 | RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) | ||
351 | -GEN_VEXT_VF(vfnmsac_vf_h) | ||
352 | -GEN_VEXT_VF(vfnmsac_vf_w) | ||
353 | -GEN_VEXT_VF(vfnmsac_vf_d) | ||
354 | +GEN_VEXT_VF(vfnmsac_vf_h, 2) | ||
355 | +GEN_VEXT_VF(vfnmsac_vf_w, 4) | ||
356 | +GEN_VEXT_VF(vfnmsac_vf_d, 8) | ||
357 | |||
358 | static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
359 | { | ||
360 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
361 | RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) | ||
362 | RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) | ||
363 | RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) | ||
364 | -GEN_VEXT_VV_ENV(vfmadd_vv_h) | ||
365 | -GEN_VEXT_VV_ENV(vfmadd_vv_w) | ||
366 | -GEN_VEXT_VV_ENV(vfmadd_vv_d) | ||
367 | +GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) | ||
368 | +GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) | ||
369 | +GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) | ||
370 | RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) | ||
371 | RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) | ||
372 | RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) | ||
373 | -GEN_VEXT_VF(vfmadd_vf_h) | ||
374 | -GEN_VEXT_VF(vfmadd_vf_w) | ||
375 | -GEN_VEXT_VF(vfmadd_vf_d) | ||
376 | +GEN_VEXT_VF(vfmadd_vf_h, 2) | ||
377 | +GEN_VEXT_VF(vfmadd_vf_w, 4) | ||
378 | +GEN_VEXT_VF(vfmadd_vf_d, 8) | ||
379 | |||
380 | static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
381 | { | ||
382 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
383 | RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) | ||
384 | RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) | ||
385 | RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) | ||
386 | -GEN_VEXT_VV_ENV(vfnmadd_vv_h) | ||
387 | -GEN_VEXT_VV_ENV(vfnmadd_vv_w) | ||
388 | -GEN_VEXT_VV_ENV(vfnmadd_vv_d) | ||
389 | +GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) | ||
390 | +GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) | ||
391 | +GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) | ||
392 | RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) | ||
393 | RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) | ||
394 | RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) | ||
395 | -GEN_VEXT_VF(vfnmadd_vf_h) | ||
396 | -GEN_VEXT_VF(vfnmadd_vf_w) | ||
397 | -GEN_VEXT_VF(vfnmadd_vf_d) | ||
398 | +GEN_VEXT_VF(vfnmadd_vf_h, 2) | ||
399 | +GEN_VEXT_VF(vfnmadd_vf_w, 4) | ||
400 | +GEN_VEXT_VF(vfnmadd_vf_d, 8) | ||
401 | |||
402 | static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
403 | { | ||
404 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
405 | RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) | ||
406 | RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) | ||
407 | RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) | ||
408 | -GEN_VEXT_VV_ENV(vfmsub_vv_h) | ||
409 | -GEN_VEXT_VV_ENV(vfmsub_vv_w) | ||
410 | -GEN_VEXT_VV_ENV(vfmsub_vv_d) | ||
411 | +GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) | ||
412 | +GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) | ||
413 | +GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) | ||
414 | RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) | ||
415 | RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) | ||
416 | RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) | ||
417 | -GEN_VEXT_VF(vfmsub_vf_h) | ||
418 | -GEN_VEXT_VF(vfmsub_vf_w) | ||
419 | -GEN_VEXT_VF(vfmsub_vf_d) | ||
420 | +GEN_VEXT_VF(vfmsub_vf_h, 2) | ||
421 | +GEN_VEXT_VF(vfmsub_vf_w, 4) | ||
422 | +GEN_VEXT_VF(vfmsub_vf_d, 8) | ||
423 | |||
424 | static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
425 | { | ||
426 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
427 | RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) | ||
428 | RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) | ||
429 | RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) | ||
430 | -GEN_VEXT_VV_ENV(vfnmsub_vv_h) | ||
431 | -GEN_VEXT_VV_ENV(vfnmsub_vv_w) | ||
432 | -GEN_VEXT_VV_ENV(vfnmsub_vv_d) | ||
433 | +GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) | ||
434 | +GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) | ||
435 | +GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) | ||
436 | RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) | ||
437 | RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) | ||
438 | RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) | ||
439 | -GEN_VEXT_VF(vfnmsub_vf_h) | ||
440 | -GEN_VEXT_VF(vfnmsub_vf_w) | ||
441 | -GEN_VEXT_VF(vfnmsub_vf_d) | ||
442 | +GEN_VEXT_VF(vfnmsub_vf_h, 2) | ||
443 | +GEN_VEXT_VF(vfnmsub_vf_w, 4) | ||
444 | +GEN_VEXT_VF(vfnmsub_vf_d, 8) | ||
445 | |||
446 | /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ | ||
447 | static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
448 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
449 | |||
450 | RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) | ||
451 | RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) | ||
452 | -GEN_VEXT_VV_ENV(vfwmacc_vv_h) | ||
453 | -GEN_VEXT_VV_ENV(vfwmacc_vv_w) | ||
454 | +GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) | ||
455 | +GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) | ||
456 | RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) | ||
457 | RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) | ||
458 | -GEN_VEXT_VF(vfwmacc_vf_h) | ||
459 | -GEN_VEXT_VF(vfwmacc_vf_w) | ||
460 | +GEN_VEXT_VF(vfwmacc_vf_h, 4) | ||
461 | +GEN_VEXT_VF(vfwmacc_vf_w, 8) | ||
462 | |||
463 | static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
464 | { | ||
465 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
466 | |||
467 | RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) | ||
468 | RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) | ||
469 | -GEN_VEXT_VV_ENV(vfwnmacc_vv_h) | ||
470 | -GEN_VEXT_VV_ENV(vfwnmacc_vv_w) | ||
471 | +GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) | ||
472 | +GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) | ||
473 | RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) | ||
474 | RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) | ||
475 | -GEN_VEXT_VF(vfwnmacc_vf_h) | ||
476 | -GEN_VEXT_VF(vfwnmacc_vf_w) | ||
477 | +GEN_VEXT_VF(vfwnmacc_vf_h, 4) | ||
478 | +GEN_VEXT_VF(vfwnmacc_vf_w, 8) | ||
479 | |||
480 | static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
481 | { | ||
482 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
483 | |||
484 | RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) | ||
485 | RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) | ||
486 | -GEN_VEXT_VV_ENV(vfwmsac_vv_h) | ||
487 | -GEN_VEXT_VV_ENV(vfwmsac_vv_w) | ||
488 | +GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) | ||
489 | +GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) | ||
490 | RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) | ||
491 | RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) | ||
492 | -GEN_VEXT_VF(vfwmsac_vf_h) | ||
493 | -GEN_VEXT_VF(vfwmsac_vf_w) | ||
494 | +GEN_VEXT_VF(vfwmsac_vf_h, 4) | ||
495 | +GEN_VEXT_VF(vfwmsac_vf_w, 8) | ||
496 | |||
497 | static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
498 | { | ||
499 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
500 | |||
501 | RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) | ||
502 | RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) | ||
503 | -GEN_VEXT_VV_ENV(vfwnmsac_vv_h) | ||
504 | -GEN_VEXT_VV_ENV(vfwnmsac_vv_w) | ||
505 | +GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) | ||
506 | +GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) | ||
507 | RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) | ||
508 | RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) | ||
509 | -GEN_VEXT_VF(vfwnmsac_vf_h) | ||
510 | -GEN_VEXT_VF(vfwnmsac_vf_w) | ||
511 | +GEN_VEXT_VF(vfwnmsac_vf_h, 4) | ||
512 | +GEN_VEXT_VF(vfwnmsac_vf_w, 8) | ||
513 | |||
514 | /* Vector Floating-Point Square-Root Instruction */ | ||
515 | /* (TD, T2, TX2) */ | ||
516 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i, \ | ||
517 | *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ | ||
518 | } | ||
519 | |||
520 | -#define GEN_VEXT_V_ENV(NAME) \ | ||
521 | +#define GEN_VEXT_V_ENV(NAME, ESZ) \ | ||
522 | void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
523 | CPURISCVState *env, uint32_t desc) \ | ||
524 | { \ | ||
525 | uint32_t vm = vext_vm(desc); \ | ||
526 | uint32_t vl = env->vl; \ | ||
527 | + uint32_t total_elems = \ | ||
528 | + vext_get_total_elems(env, desc, ESZ); \ | ||
529 | + uint32_t vta = vext_vta(desc); \ | ||
530 | uint32_t i; \ | ||
531 | \ | ||
532 | if (vl == 0) { \ | ||
533 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
534 | do_##NAME(vd, vs2, i, env); \ | ||
535 | } \ | ||
536 | env->vstart = 0; \ | ||
537 | + vext_set_elems_1s(vd, vta, vl * ESZ, \ | ||
538 | + total_elems * ESZ); \ | ||
539 | } | ||
540 | |||
541 | RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) | ||
542 | RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) | ||
543 | RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) | ||
544 | -GEN_VEXT_V_ENV(vfsqrt_v_h) | ||
545 | -GEN_VEXT_V_ENV(vfsqrt_v_w) | ||
546 | -GEN_VEXT_V_ENV(vfsqrt_v_d) | ||
547 | +GEN_VEXT_V_ENV(vfsqrt_v_h, 2) | ||
548 | +GEN_VEXT_V_ENV(vfsqrt_v_w, 4) | ||
549 | +GEN_VEXT_V_ENV(vfsqrt_v_d, 8) | ||
550 | |||
551 | /* | ||
552 | * Vector Floating-Point Reciprocal Square-Root Estimate Instruction | ||
553 | @@ -XXX,XX +XXX,XX @@ static float64 frsqrt7_d(float64 f, float_status *s) | ||
554 | RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) | ||
555 | RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) | ||
556 | RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) | ||
557 | -GEN_VEXT_V_ENV(vfrsqrt7_v_h) | ||
558 | -GEN_VEXT_V_ENV(vfrsqrt7_v_w) | ||
559 | -GEN_VEXT_V_ENV(vfrsqrt7_v_d) | ||
560 | +GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) | ||
561 | +GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) | ||
562 | +GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) | ||
563 | |||
564 | /* | ||
565 | * Vector Floating-Point Reciprocal Estimate Instruction | ||
566 | @@ -XXX,XX +XXX,XX @@ static float64 frec7_d(float64 f, float_status *s) | ||
567 | RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) | ||
568 | RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) | ||
569 | RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) | ||
570 | -GEN_VEXT_V_ENV(vfrec7_v_h) | ||
571 | -GEN_VEXT_V_ENV(vfrec7_v_w) | ||
572 | -GEN_VEXT_V_ENV(vfrec7_v_d) | ||
573 | +GEN_VEXT_V_ENV(vfrec7_v_h, 2) | ||
574 | +GEN_VEXT_V_ENV(vfrec7_v_w, 4) | ||
575 | +GEN_VEXT_V_ENV(vfrec7_v_d, 8) | ||
576 | |||
577 | /* Vector Floating-Point MIN/MAX Instructions */ | ||
578 | RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) | ||
579 | RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) | ||
580 | RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) | ||
581 | -GEN_VEXT_VV_ENV(vfmin_vv_h) | ||
582 | -GEN_VEXT_VV_ENV(vfmin_vv_w) | ||
583 | -GEN_VEXT_VV_ENV(vfmin_vv_d) | ||
584 | +GEN_VEXT_VV_ENV(vfmin_vv_h, 2) | ||
585 | +GEN_VEXT_VV_ENV(vfmin_vv_w, 4) | ||
586 | +GEN_VEXT_VV_ENV(vfmin_vv_d, 8) | ||
587 | RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) | ||
588 | RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) | ||
589 | RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) | ||
590 | -GEN_VEXT_VF(vfmin_vf_h) | ||
591 | -GEN_VEXT_VF(vfmin_vf_w) | ||
592 | -GEN_VEXT_VF(vfmin_vf_d) | ||
593 | +GEN_VEXT_VF(vfmin_vf_h, 2) | ||
594 | +GEN_VEXT_VF(vfmin_vf_w, 4) | ||
595 | +GEN_VEXT_VF(vfmin_vf_d, 8) | ||
596 | |||
597 | RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) | ||
598 | RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) | ||
599 | RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) | ||
600 | -GEN_VEXT_VV_ENV(vfmax_vv_h) | ||
601 | -GEN_VEXT_VV_ENV(vfmax_vv_w) | ||
602 | -GEN_VEXT_VV_ENV(vfmax_vv_d) | ||
603 | +GEN_VEXT_VV_ENV(vfmax_vv_h, 2) | ||
604 | +GEN_VEXT_VV_ENV(vfmax_vv_w, 4) | ||
605 | +GEN_VEXT_VV_ENV(vfmax_vv_d, 8) | ||
606 | RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) | ||
607 | RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) | ||
608 | RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) | ||
609 | -GEN_VEXT_VF(vfmax_vf_h) | ||
610 | -GEN_VEXT_VF(vfmax_vf_w) | ||
611 | -GEN_VEXT_VF(vfmax_vf_d) | ||
612 | +GEN_VEXT_VF(vfmax_vf_h, 2) | ||
613 | +GEN_VEXT_VF(vfmax_vf_w, 4) | ||
614 | +GEN_VEXT_VF(vfmax_vf_d, 8) | ||
615 | |||
616 | /* Vector Floating-Point Sign-Injection Instructions */ | ||
617 | static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) | ||
618 | @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) | ||
619 | RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) | ||
620 | RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) | ||
621 | RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) | ||
622 | -GEN_VEXT_VV_ENV(vfsgnj_vv_h) | ||
623 | -GEN_VEXT_VV_ENV(vfsgnj_vv_w) | ||
624 | -GEN_VEXT_VV_ENV(vfsgnj_vv_d) | ||
625 | +GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) | ||
626 | +GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) | ||
627 | +GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) | ||
628 | RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) | ||
629 | RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) | ||
630 | RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) | ||
631 | -GEN_VEXT_VF(vfsgnj_vf_h) | ||
632 | -GEN_VEXT_VF(vfsgnj_vf_w) | ||
633 | -GEN_VEXT_VF(vfsgnj_vf_d) | ||
634 | +GEN_VEXT_VF(vfsgnj_vf_h, 2) | ||
635 | +GEN_VEXT_VF(vfsgnj_vf_w, 4) | ||
636 | +GEN_VEXT_VF(vfsgnj_vf_d, 8) | ||
637 | |||
638 | static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) | ||
639 | { | ||
640 | @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) | ||
641 | RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) | ||
642 | RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) | ||
643 | RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) | ||
644 | -GEN_VEXT_VV_ENV(vfsgnjn_vv_h) | ||
645 | -GEN_VEXT_VV_ENV(vfsgnjn_vv_w) | ||
646 | -GEN_VEXT_VV_ENV(vfsgnjn_vv_d) | ||
647 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) | ||
648 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) | ||
649 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) | ||
650 | RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) | ||
651 | RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) | ||
652 | RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) | ||
653 | -GEN_VEXT_VF(vfsgnjn_vf_h) | ||
654 | -GEN_VEXT_VF(vfsgnjn_vf_w) | ||
655 | -GEN_VEXT_VF(vfsgnjn_vf_d) | ||
656 | +GEN_VEXT_VF(vfsgnjn_vf_h, 2) | ||
657 | +GEN_VEXT_VF(vfsgnjn_vf_w, 4) | ||
658 | +GEN_VEXT_VF(vfsgnjn_vf_d, 8) | ||
659 | |||
660 | static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) | ||
661 | { | ||
662 | @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) | ||
663 | RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) | ||
664 | RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) | ||
665 | RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) | ||
666 | -GEN_VEXT_VV_ENV(vfsgnjx_vv_h) | ||
667 | -GEN_VEXT_VV_ENV(vfsgnjx_vv_w) | ||
668 | -GEN_VEXT_VV_ENV(vfsgnjx_vv_d) | ||
669 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) | ||
670 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) | ||
671 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) | ||
672 | RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) | ||
673 | RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) | ||
674 | RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) | ||
675 | -GEN_VEXT_VF(vfsgnjx_vf_h) | ||
676 | -GEN_VEXT_VF(vfsgnjx_vf_w) | ||
677 | -GEN_VEXT_VF(vfsgnjx_vf_d) | ||
678 | +GEN_VEXT_VF(vfsgnjx_vf_h, 2) | ||
679 | +GEN_VEXT_VF(vfsgnjx_vf_w, 4) | ||
680 | +GEN_VEXT_VF(vfsgnjx_vf_d, 8) | ||
681 | |||
682 | /* Vector Floating-Point Compare Instructions */ | ||
683 | #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ | ||
684 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
685 | { \ | ||
686 | uint32_t vm = vext_vm(desc); \ | ||
687 | uint32_t vl = env->vl; \ | ||
688 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ | ||
689 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ | ||
690 | uint32_t i; \ | ||
691 | \ | ||
692 | for (i = env->vstart; i < vl; i++) { \ | ||
693 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
694 | DO_OP(s2, s1, &env->fp_status)); \ | ||
695 | } \ | ||
696 | env->vstart = 0; \ | ||
697 | + /* mask destination register are always tail-agnostic */ \ | ||
698 | + /* set tail elements to 1s */ \ | ||
699 | + if (vta_all_1s) { \ | ||
700 | + for (; i < total_elems; i++) { \ | ||
701 | + vext_set_elem_mask(vd, i, 1); \ | ||
702 | + } \ | ||
703 | + } \ | ||
704 | } | ||
705 | |||
706 | GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) | ||
707 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
708 | { \ | ||
709 | uint32_t vm = vext_vm(desc); \ | ||
710 | uint32_t vl = env->vl; \ | ||
711 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ | ||
712 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ | ||
713 | uint32_t i; \ | ||
714 | \ | ||
715 | for (i = env->vstart; i < vl; i++) { \ | ||
716 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
717 | DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ | ||
718 | } \ | ||
719 | env->vstart = 0; \ | ||
720 | + /* mask destination register are always tail-agnostic */ \ | ||
721 | + /* set tail elements to 1s */ \ | ||
722 | + if (vta_all_1s) { \ | ||
723 | + for (; i < total_elems; i++) { \ | ||
724 | + vext_set_elem_mask(vd, i, 1); \ | ||
725 | + } \ | ||
726 | + } \ | ||
727 | } | ||
728 | |||
729 | GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) | ||
730 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i) \ | ||
731 | *((TD *)vd + HD(i)) = OP(s2); \ | ||
732 | } | ||
733 | |||
734 | -#define GEN_VEXT_V(NAME) \ | ||
735 | +#define GEN_VEXT_V(NAME, ESZ) \ | ||
736 | void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
737 | CPURISCVState *env, uint32_t desc) \ | ||
738 | { \ | ||
739 | uint32_t vm = vext_vm(desc); \ | ||
740 | uint32_t vl = env->vl; \ | ||
741 | + uint32_t total_elems = \ | ||
742 | + vext_get_total_elems(env, desc, ESZ); \ | ||
743 | + uint32_t vta = vext_vta(desc); \ | ||
744 | uint32_t i; \ | ||
745 | \ | ||
746 | for (i = env->vstart; i < vl; i++) { \ | ||
747 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
748 | do_##NAME(vd, vs2, i); \ | ||
749 | } \ | ||
750 | env->vstart = 0; \ | ||
751 | + /* set tail elements to 1s */ \ | ||
752 | + vext_set_elems_1s(vd, vta, vl * ESZ, \ | ||
753 | + total_elems * ESZ); \ | ||
754 | } | ||
755 | |||
756 | target_ulong fclass_h(uint64_t frs1) | ||
757 | @@ -XXX,XX +XXX,XX @@ target_ulong fclass_d(uint64_t frs1) | ||
758 | RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) | ||
759 | RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) | ||
760 | RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) | ||
761 | -GEN_VEXT_V(vfclass_v_h) | ||
762 | -GEN_VEXT_V(vfclass_v_w) | ||
763 | -GEN_VEXT_V(vfclass_v_d) | ||
764 | +GEN_VEXT_V(vfclass_v_h, 2) | ||
765 | +GEN_VEXT_V(vfclass_v_w, 4) | ||
766 | +GEN_VEXT_V(vfclass_v_d, 8) | ||
767 | |||
768 | /* Vector Floating-Point Merge Instruction */ | ||
769 | + | ||
770 | #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ | ||
771 | void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
772 | CPURISCVState *env, uint32_t desc) \ | ||
773 | { \ | ||
774 | uint32_t vm = vext_vm(desc); \ | ||
775 | uint32_t vl = env->vl; \ | ||
776 | + uint32_t esz = sizeof(ETYPE); \ | ||
777 | + uint32_t total_elems = \ | ||
778 | + vext_get_total_elems(env, desc, esz); \ | ||
779 | + uint32_t vta = vext_vta(desc); \ | ||
780 | uint32_t i; \ | ||
781 | \ | ||
782 | for (i = env->vstart; i < vl; i++) { \ | ||
783 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
784 | = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ | ||
785 | } \ | ||
786 | env->vstart = 0; \ | ||
787 | + /* set tail elements to 1s */ \ | ||
788 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
789 | } | ||
790 | |||
791 | GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) | ||
792 | @@ -XXX,XX +XXX,XX @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) | ||
793 | RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) | ||
794 | RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) | ||
795 | RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) | ||
796 | -GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) | ||
797 | -GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) | ||
798 | -GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) | ||
799 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) | ||
800 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) | ||
801 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) | ||
802 | |||
803 | /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ | ||
804 | RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) | ||
805 | RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) | ||
806 | RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) | ||
807 | -GEN_VEXT_V_ENV(vfcvt_x_f_v_h) | ||
808 | -GEN_VEXT_V_ENV(vfcvt_x_f_v_w) | ||
809 | -GEN_VEXT_V_ENV(vfcvt_x_f_v_d) | ||
810 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) | ||
811 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) | ||
812 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) | ||
813 | |||
814 | /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ | ||
815 | RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) | ||
816 | RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) | ||
817 | RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) | ||
818 | -GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) | ||
819 | -GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) | ||
820 | -GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) | ||
821 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) | ||
822 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) | ||
823 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) | ||
824 | |||
825 | /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ | ||
826 | RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) | ||
827 | RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) | ||
828 | RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) | ||
829 | -GEN_VEXT_V_ENV(vfcvt_f_x_v_h) | ||
830 | -GEN_VEXT_V_ENV(vfcvt_f_x_v_w) | ||
831 | -GEN_VEXT_V_ENV(vfcvt_f_x_v_d) | ||
832 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) | ||
833 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) | ||
834 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) | ||
835 | |||
836 | /* Widening Floating-Point/Integer Type-Convert Instructions */ | ||
837 | /* (TD, T2, TX2) */ | ||
838 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d) | ||
839 | /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ | ||
840 | RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) | ||
841 | RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) | ||
842 | -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) | ||
843 | -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) | ||
844 | +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) | ||
845 | +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) | ||
846 | |||
847 | /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ | ||
848 | RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) | ||
849 | RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) | ||
850 | -GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) | ||
851 | -GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) | ||
852 | +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) | ||
853 | +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) | ||
854 | |||
855 | /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ | ||
856 | RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) | ||
857 | RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) | ||
858 | RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) | ||
859 | -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) | ||
860 | -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) | ||
861 | -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) | ||
862 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) | ||
863 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) | ||
864 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) | ||
865 | |||
866 | /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ | ||
867 | RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) | ||
868 | RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) | ||
869 | RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) | ||
870 | -GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) | ||
871 | -GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) | ||
872 | -GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) | ||
873 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) | ||
874 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) | ||
875 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) | ||
876 | |||
877 | /* | ||
878 | * vfwcvt.f.f.v vd, vs2, vm | ||
879 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfwcvtffv16(uint16_t a, float_status *s) | ||
880 | |||
881 | RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) | ||
882 | RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) | ||
883 | -GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) | ||
884 | -GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) | ||
885 | +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) | ||
886 | +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) | ||
887 | |||
888 | /* Narrowing Floating-Point/Integer Type-Convert Instructions */ | ||
889 | /* (TD, T2, TX2) */ | ||
890 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) | ||
891 | RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) | ||
892 | RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) | ||
893 | RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) | ||
894 | -GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) | ||
895 | -GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) | ||
896 | -GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) | ||
897 | +GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) | ||
898 | +GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) | ||
899 | +GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) | ||
900 | |||
901 | /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ | ||
902 | RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) | ||
903 | RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) | ||
904 | RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) | ||
905 | -GEN_VEXT_V_ENV(vfncvt_x_f_w_b) | ||
906 | -GEN_VEXT_V_ENV(vfncvt_x_f_w_h) | ||
907 | -GEN_VEXT_V_ENV(vfncvt_x_f_w_w) | ||
908 | +GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) | ||
909 | +GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) | ||
910 | +GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) | ||
911 | |||
912 | /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ | ||
913 | RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) | ||
914 | RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) | ||
915 | -GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) | ||
916 | -GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) | ||
917 | +GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) | ||
918 | +GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) | ||
919 | |||
920 | /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ | ||
921 | RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) | ||
922 | RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) | ||
923 | -GEN_VEXT_V_ENV(vfncvt_f_x_w_h) | ||
924 | -GEN_VEXT_V_ENV(vfncvt_f_x_w_w) | ||
925 | +GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) | ||
926 | +GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) | ||
927 | |||
928 | /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ | ||
929 | static uint16_t vfncvtffv16(uint32_t a, float_status *s) | ||
930 | @@ -XXX,XX +XXX,XX @@ static uint16_t vfncvtffv16(uint32_t a, float_status *s) | ||
931 | |||
932 | RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) | ||
933 | RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) | ||
934 | -GEN_VEXT_V_ENV(vfncvt_f_f_w_h) | ||
935 | -GEN_VEXT_V_ENV(vfncvt_f_f_w_w) | ||
936 | +GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) | ||
937 | +GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) | ||
938 | |||
939 | /* | ||
940 | *** Vector Reduction Operations | ||
941 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
942 | index XXXXXXX..XXXXXXX 100644 | ||
943 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
944 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
945 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
946 | \ | ||
947 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
948 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
949 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
950 | + data = \ | ||
951 | + FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ | ||
952 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
953 | vreg_ofs(s, a->rs1), \ | ||
954 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
955 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
956 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
957 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
958 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
959 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
960 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, \ | ||
961 | + s->cfg_vta_all_1s); \ | ||
962 | return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ | ||
963 | fns[s->sew - 1], s); \ | ||
964 | } \ | ||
965 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
966 | \ | ||
967 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
968 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
969 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
970 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
971 | vreg_ofs(s, a->rs1), \ | ||
972 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
973 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
974 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
975 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
976 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
977 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
978 | return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ | ||
979 | fns[s->sew - 1], s); \ | ||
980 | } \ | ||
981 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
982 | \ | ||
983 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
984 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
985 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
986 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
987 | vreg_ofs(s, a->rs1), \ | ||
988 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
989 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
990 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
991 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
992 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
993 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
994 | return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ | ||
995 | fns[s->sew - 1], s); \ | ||
996 | } \ | ||
997 | @@ -XXX,XX +XXX,XX @@ static bool do_opfv(DisasContext *s, arg_rmr *a, | ||
998 | |||
999 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
1000 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
1001 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
1002 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
1003 | vreg_ofs(s, a->rs2), cpu_env, | ||
1004 | s->cfg_ptr->vlen / 8, | ||
1005 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
1006 | \ | ||
1007 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
1008 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
1009 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
1010 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
1011 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
1012 | s->cfg_ptr->vlen / 8, \ | ||
1013 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
1014 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
1015 | \ | ||
1016 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
1017 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
1018 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
1019 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
1020 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
1021 | s->cfg_ptr->vlen / 8, \ | ||
1022 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
1023 | \ | ||
1024 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
1025 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
1026 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
1027 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
1028 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
1029 | s->cfg_ptr->vlen / 8, \ | ||
1030 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
1031 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
1032 | \ | ||
1033 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
1034 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
1035 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
1036 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
1037 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
1038 | s->cfg_ptr->vlen / 8, \ | ||
1039 | -- | 291 | -- |
1040 | 2.36.1 | 292 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Rob Bradford <rbradford@rivosinc.com> | ||
1 | 2 | ||
3 | These are WARL fields - zero out the bits for unavailable counters and | ||
4 | special case the TM bit in mcountinhibit which is hardwired to zero. | ||
5 | This patch achieves this by modifying the value written so that any use | ||
6 | of the field will see the correctly masked bits. | ||
7 | |||
8 | Tested by modifying OpenSBI to write max value to these CSRs and upon | ||
9 | subsequent read the appropriate number of bits for number of PMUs is | ||
10 | enabled and the TM bit is zero in mcountinhibit. | ||
11 | |||
12 | Signed-off-by: Rob Bradford <rbradford@rivosinc.com> | ||
13 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
14 | Reviewed-by: Atish Patra <atishp@rivosinc.com> | ||
15 | Message-ID: <20230802124906.24197-1-rbradford@rivosinc.com> | ||
16 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
17 | --- | ||
18 | target/riscv/csr.c | 11 +++++++++-- | ||
19 | 1 file changed, 9 insertions(+), 2 deletions(-) | ||
20 | |||
21 | diff --git a/target/riscv/csr.c b/target/riscv/csr.c | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/target/riscv/csr.c | ||
24 | +++ b/target/riscv/csr.c | ||
25 | @@ -XXX,XX +XXX,XX @@ static RISCVException write_mcountinhibit(CPURISCVState *env, int csrno, | ||
26 | { | ||
27 | int cidx; | ||
28 | PMUCTRState *counter; | ||
29 | + RISCVCPU *cpu = env_archcpu(env); | ||
30 | |||
31 | - env->mcountinhibit = val; | ||
32 | + /* WARL register - disable unavailable counters; TM bit is always 0 */ | ||
33 | + env->mcountinhibit = | ||
34 | + val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_IR); | ||
35 | |||
36 | /* Check if any other counter is also monitoring cycles/instructions */ | ||
37 | for (cidx = 0; cidx < RV_MAX_MHPMCOUNTERS; cidx++) { | ||
38 | @@ -XXX,XX +XXX,XX @@ static RISCVException read_mcounteren(CPURISCVState *env, int csrno, | ||
39 | static RISCVException write_mcounteren(CPURISCVState *env, int csrno, | ||
40 | target_ulong val) | ||
41 | { | ||
42 | - env->mcounteren = val; | ||
43 | + RISCVCPU *cpu = env_archcpu(env); | ||
44 | + | ||
45 | + /* WARL register - disable unavailable counters */ | ||
46 | + env->mcounteren = val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_TM | | ||
47 | + COUNTEREN_IR); | ||
48 | return RISCV_EXCP_NONE; | ||
49 | } | ||
50 | |||
51 | -- | ||
52 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Jason Chien <jason.chien@sifive.com> | ||
1 | 2 | ||
3 | RVA23 Profiles states: | ||
4 | The RVA23 profiles are intended to be used for 64-bit application | ||
5 | processors that will run rich OS stacks from standard binary OS | ||
6 | distributions and with a substantial number of third-party binary user | ||
7 | applications that will be supported over a considerable length of time | ||
8 | in the field. | ||
9 | |||
10 | The chapter 4 of the unprivileged spec introduces the Zihintntl extension | ||
11 | and Zihintntl is a mandatory extension presented in RVA23 Profiles, whose | ||
12 | purpose is to enable application and operating system portability across | ||
13 | different implementations. Thus the DTS should contain the Zihintntl ISA | ||
14 | string in order to pass to software. | ||
15 | |||
16 | The unprivileged spec states: | ||
17 | Like any HINTs, these instructions may be freely ignored. Hence, although | ||
18 | they are described in terms of cache-based memory hierarchies, they do not | ||
19 | mandate the provision of caches. | ||
20 | |||
21 | These instructions are encoded with non-used opcode, e.g. ADD x0, x0, x2, | ||
22 | which QEMU already supports, and QEMU does not emulate cache. Therefore | ||
23 | these instructions can be considered as a no-op, and we only need to add | ||
24 | a new property for the Zihintntl extension. | ||
25 | |||
26 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
27 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
28 | Signed-off-by: Jason Chien <jason.chien@sifive.com> | ||
29 | Message-ID: <20230726074049.19505-2-jason.chien@sifive.com> | ||
30 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
31 | --- | ||
32 | target/riscv/cpu_cfg.h | 1 + | ||
33 | target/riscv/cpu.c | 2 ++ | ||
34 | 2 files changed, 3 insertions(+) | ||
35 | |||
36 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/riscv/cpu_cfg.h | ||
39 | +++ b/target/riscv/cpu_cfg.h | ||
40 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
41 | bool ext_icbom; | ||
42 | bool ext_icboz; | ||
43 | bool ext_zicond; | ||
44 | + bool ext_zihintntl; | ||
45 | bool ext_zihintpause; | ||
46 | bool ext_smstateen; | ||
47 | bool ext_sstc; | ||
48 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/riscv/cpu.c | ||
51 | +++ b/target/riscv/cpu.c | ||
52 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { | ||
53 | ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond), | ||
54 | ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr), | ||
55 | ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei), | ||
56 | + ISA_EXT_DATA_ENTRY(zihintntl, PRIV_VERSION_1_10_0, ext_zihintntl), | ||
57 | ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause), | ||
58 | ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul), | ||
59 | ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs), | ||
60 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
61 | DEFINE_PROP_BOOL("sscofpmf", RISCVCPU, cfg.ext_sscofpmf, false), | ||
62 | DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true), | ||
63 | DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true), | ||
64 | + DEFINE_PROP_BOOL("Zihintntl", RISCVCPU, cfg.ext_zihintntl, true), | ||
65 | DEFINE_PROP_BOOL("Zihintpause", RISCVCPU, cfg.ext_zihintpause, true), | ||
66 | DEFINE_PROP_BOOL("Zawrs", RISCVCPU, cfg.ext_zawrs, true), | ||
67 | DEFINE_PROP_BOOL("Zfa", RISCVCPU, cfg.ext_zfa, true), | ||
68 | -- | ||
69 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> | ||
1 | 2 | ||
3 | Commit a47842d ("riscv: Add support for the Zfa extension") implemented the zfa extension. | ||
4 | However, it has some typos for fleq.d and fltq.d. Both of them misused the fltq.s | ||
5 | helper function. | ||
6 | |||
7 | Fixes: a47842d ("riscv: Add support for the Zfa extension") | ||
8 | Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> | ||
9 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
10 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
11 | Message-ID: <20230728003906.768-1-zhiwei_liu@linux.alibaba.com> | ||
12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | --- | ||
14 | target/riscv/insn_trans/trans_rvzfa.c.inc | 4 ++-- | ||
15 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
16 | |||
17 | diff --git a/target/riscv/insn_trans/trans_rvzfa.c.inc b/target/riscv/insn_trans/trans_rvzfa.c.inc | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/riscv/insn_trans/trans_rvzfa.c.inc | ||
20 | +++ b/target/riscv/insn_trans/trans_rvzfa.c.inc | ||
21 | @@ -XXX,XX +XXX,XX @@ bool trans_fleq_d(DisasContext *ctx, arg_fleq_d *a) | ||
22 | TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); | ||
23 | TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); | ||
24 | |||
25 | - gen_helper_fltq_s(dest, cpu_env, src1, src2); | ||
26 | + gen_helper_fleq_d(dest, cpu_env, src1, src2); | ||
27 | gen_set_gpr(ctx, a->rd, dest); | ||
28 | return true; | ||
29 | } | ||
30 | @@ -XXX,XX +XXX,XX @@ bool trans_fltq_d(DisasContext *ctx, arg_fltq_d *a) | ||
31 | TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); | ||
32 | TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); | ||
33 | |||
34 | - gen_helper_fltq_s(dest, cpu_env, src1, src2); | ||
35 | + gen_helper_fltq_d(dest, cpu_env, src1, src2); | ||
36 | gen_set_gpr(ctx, a->rd, dest); | ||
37 | return true; | ||
38 | } | ||
39 | -- | ||
40 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Jason Chien <jason.chien@sifive.com> | ||
1 | 2 | ||
3 | When writing the upper mtime, we should keep the original lower mtime | ||
4 | whose value is given by cpu_riscv_read_rtc() instead of | ||
5 | cpu_riscv_read_rtc_raw(). The same logic applies to writes to lower mtime. | ||
6 | |||
7 | Signed-off-by: Jason Chien <jason.chien@sifive.com> | ||
8 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
9 | Message-ID: <20230728082502.26439-1-jason.chien@sifive.com> | ||
10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
11 | --- | ||
12 | hw/intc/riscv_aclint.c | 5 +++-- | ||
13 | 1 file changed, 3 insertions(+), 2 deletions(-) | ||
14 | |||
15 | diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/hw/intc/riscv_aclint.c | ||
18 | +++ b/hw/intc/riscv_aclint.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, | ||
20 | return; | ||
21 | } else if (addr == mtimer->time_base || addr == mtimer->time_base + 4) { | ||
22 | uint64_t rtc_r = cpu_riscv_read_rtc_raw(mtimer->timebase_freq); | ||
23 | + uint64_t rtc = cpu_riscv_read_rtc(mtimer); | ||
24 | |||
25 | if (addr == mtimer->time_base) { | ||
26 | if (size == 4) { | ||
27 | /* time_lo for RV32/RV64 */ | ||
28 | - mtimer->time_delta = ((rtc_r & ~0xFFFFFFFFULL) | value) - rtc_r; | ||
29 | + mtimer->time_delta = ((rtc & ~0xFFFFFFFFULL) | value) - rtc_r; | ||
30 | } else { | ||
31 | /* time for RV64 */ | ||
32 | mtimer->time_delta = value - rtc_r; | ||
33 | @@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, | ||
34 | } else { | ||
35 | if (size == 4) { | ||
36 | /* time_hi for RV32/RV64 */ | ||
37 | - mtimer->time_delta = (value << 32 | (rtc_r & 0xFFFFFFFF)) - rtc_r; | ||
38 | + mtimer->time_delta = (value << 32 | (rtc & 0xFFFFFFFF)) - rtc_r; | ||
39 | } else { | ||
40 | qemu_log_mask(LOG_GUEST_ERROR, | ||
41 | "aclint-mtimer: invalid time_hi write: %08x", | ||
42 | -- | ||
43 | 2.41.0 | diff view generated by jsdifflib |
1 | From: Alistair Francis <alistair.francis@wdc.com> | 1 | From: Jason Chien <jason.chien@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | The variables whose values are given by cpu_riscv_read_rtc() should be named | ||
4 | "rtc". The variables whose value are given by cpu_riscv_read_rtc_raw() | ||
5 | should be named "rtc_r". | ||
6 | |||
7 | Signed-off-by: Jason Chien <jason.chien@sifive.com> | ||
8 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
9 | Message-ID: <20230728082502.26439-2-jason.chien@sifive.com> | ||
3 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
4 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
5 | Message-Id: <20220509091339.26016-1-alistair.francis@wdc.com> | ||
6 | --- | 11 | --- |
7 | MAINTAINERS | 1 + | 12 | hw/intc/riscv_aclint.c | 6 +++--- |
8 | 1 file changed, 1 insertion(+) | 13 | 1 file changed, 3 insertions(+), 3 deletions(-) |
9 | 14 | ||
10 | diff --git a/MAINTAINERS b/MAINTAINERS | 15 | diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c |
11 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/MAINTAINERS | 17 | --- a/hw/intc/riscv_aclint.c |
13 | +++ b/MAINTAINERS | 18 | +++ b/hw/intc/riscv_aclint.c |
14 | @@ -XXX,XX +XXX,XX @@ Generic Loader | 19 | @@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer, |
15 | M: Alistair Francis <alistair@alistair23.me> | 20 | uint64_t next; |
16 | S: Maintained | 21 | uint64_t diff; |
17 | F: hw/core/generic-loader.c | 22 | |
18 | +F: hw/core/uboot_image.h | 23 | - uint64_t rtc_r = cpu_riscv_read_rtc(mtimer); |
19 | F: include/hw/core/generic-loader.h | 24 | + uint64_t rtc = cpu_riscv_read_rtc(mtimer); |
20 | F: docs/system/generic-loader.rst | 25 | |
26 | /* Compute the relative hartid w.r.t the socket */ | ||
27 | hartid = hartid - mtimer->hartid_base; | ||
28 | |||
29 | mtimer->timecmp[hartid] = value; | ||
30 | - if (mtimer->timecmp[hartid] <= rtc_r) { | ||
31 | + if (mtimer->timecmp[hartid] <= rtc) { | ||
32 | /* | ||
33 | * If we're setting an MTIMECMP value in the "past", | ||
34 | * immediately raise the timer interrupt | ||
35 | @@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer, | ||
36 | |||
37 | /* otherwise, set up the future timer interrupt */ | ||
38 | qemu_irq_lower(mtimer->timer_irqs[hartid]); | ||
39 | - diff = mtimer->timecmp[hartid] - rtc_r; | ||
40 | + diff = mtimer->timecmp[hartid] - rtc; | ||
41 | /* back to ns (note args switched in muldiv64) */ | ||
42 | uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq); | ||
21 | 43 | ||
22 | -- | 44 | -- |
23 | 2.36.1 | 45 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> | ||
1 | 2 | ||
3 | We should not use types dependend on host arch for target_ucontext. | ||
4 | This bug is found when run rv32 applications. | ||
5 | |||
6 | Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | Message-ID: <20230811055438.1945-1-zhiwei_liu@linux.alibaba.com> | ||
11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
12 | --- | ||
13 | linux-user/riscv/signal.c | 4 ++-- | ||
14 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
15 | |||
16 | diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/linux-user/riscv/signal.c | ||
19 | +++ b/linux-user/riscv/signal.c | ||
20 | @@ -XXX,XX +XXX,XX @@ struct target_sigcontext { | ||
21 | }; /* cf. riscv-linux:arch/riscv/include/uapi/asm/ptrace.h */ | ||
22 | |||
23 | struct target_ucontext { | ||
24 | - unsigned long uc_flags; | ||
25 | - struct target_ucontext *uc_link; | ||
26 | + abi_ulong uc_flags; | ||
27 | + abi_ptr uc_link; | ||
28 | target_stack_t uc_stack; | ||
29 | target_sigset_t uc_sigmask; | ||
30 | uint8_t __unused[1024 / 8 - sizeof(target_sigset_t)]; | ||
31 | -- | ||
32 | 2.41.0 | ||
33 | |||
34 | diff view generated by jsdifflib |
1 | From: Jamie Iles <jamie@nuviainc.com> | 1 | From: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | Various loader functions return an int which limits images to 2GB which | 3 | In this patch, we create the APLIC and IMSIC FDT helper functions and |
4 | is fine for things like a BIOS/kernel image, but if we want to be able | 4 | remove M mode AIA devices when using KVM acceleration. |
5 | to load memory images or large ramdisks then any file over 2GB would | ||
6 | silently fail to load. | ||
7 | 5 | ||
8 | Cc: Luc Michel <lmichel@kalray.eu> | 6 | Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
9 | Signed-off-by: Jamie Iles <jamie@nuviainc.com> | 7 | Reviewed-by: Jim Shu <jim.shu@sifive.com> |
10 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | 8 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
11 | Reviewed-by: Luc Michel <lmichel@kalray.eu> | 9 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> |
12 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Message-ID: <20230727102439.22554-2-yongxuan.wang@sifive.com> |
13 | Message-Id: <20211111141141.3295094-2-jamie@nuviainc.com> | ||
14 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
15 | --- | 12 | --- |
16 | include/hw/loader.h | 55 +++++++++++++-------------- | 13 | hw/riscv/virt.c | 290 +++++++++++++++++++++++------------------------- |
17 | hw/arm/armv7m.c | 2 +- | 14 | 1 file changed, 137 insertions(+), 153 deletions(-) |
18 | hw/arm/boot.c | 8 ++-- | ||
19 | hw/core/generic-loader.c | 2 +- | ||
20 | hw/core/loader.c | 81 +++++++++++++++++++++------------------- | ||
21 | hw/i386/x86.c | 2 +- | ||
22 | hw/riscv/boot.c | 5 ++- | ||
23 | 7 files changed, 80 insertions(+), 75 deletions(-) | ||
24 | 15 | ||
25 | diff --git a/include/hw/loader.h b/include/hw/loader.h | 16 | diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c |
26 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/include/hw/loader.h | 18 | --- a/hw/riscv/virt.c |
28 | +++ b/include/hw/loader.h | 19 | +++ b/hw/riscv/virt.c |
29 | @@ -XXX,XX +XXX,XX @@ ssize_t load_image_size(const char *filename, void *addr, size_t size); | 20 | @@ -XXX,XX +XXX,XX @@ static uint32_t imsic_num_bits(uint32_t count) |
30 | * | ||
31 | * Returns the size of the loaded image on success, -1 otherwise. | ||
32 | */ | ||
33 | -int load_image_targphys_as(const char *filename, | ||
34 | - hwaddr addr, uint64_t max_sz, AddressSpace *as); | ||
35 | +ssize_t load_image_targphys_as(const char *filename, | ||
36 | + hwaddr addr, uint64_t max_sz, AddressSpace *as); | ||
37 | |||
38 | /**load_targphys_hex_as: | ||
39 | * @filename: Path to the .hex file | ||
40 | @@ -XXX,XX +XXX,XX @@ int load_image_targphys_as(const char *filename, | ||
41 | * | ||
42 | * Returns the size of the loaded .hex file on success, -1 otherwise. | ||
43 | */ | ||
44 | -int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as); | ||
45 | +ssize_t load_targphys_hex_as(const char *filename, hwaddr *entry, | ||
46 | + AddressSpace *as); | ||
47 | |||
48 | /** load_image_targphys: | ||
49 | * Same as load_image_targphys_as(), but doesn't allow the caller to specify | ||
50 | * an AddressSpace. | ||
51 | */ | ||
52 | -int load_image_targphys(const char *filename, hwaddr, | ||
53 | - uint64_t max_sz); | ||
54 | +ssize_t load_image_targphys(const char *filename, hwaddr, | ||
55 | + uint64_t max_sz); | ||
56 | |||
57 | /** | ||
58 | * load_image_mr: load an image into a memory region | ||
59 | @@ -XXX,XX +XXX,XX @@ int load_image_targphys(const char *filename, hwaddr, | ||
60 | * If the file is larger than the memory region's size the call will fail. | ||
61 | * Returns -1 on failure, or the size of the file. | ||
62 | */ | ||
63 | -int load_image_mr(const char *filename, MemoryRegion *mr); | ||
64 | +ssize_t load_image_mr(const char *filename, MemoryRegion *mr); | ||
65 | |||
66 | /* This is the limit on the maximum uncompressed image size that | ||
67 | * load_image_gzipped_buffer() and load_image_gzipped() will read. It prevents | ||
68 | @@ -XXX,XX +XXX,XX @@ int load_image_mr(const char *filename, MemoryRegion *mr); | ||
69 | */ | ||
70 | #define LOAD_IMAGE_MAX_GUNZIP_BYTES (256 << 20) | ||
71 | |||
72 | -int load_image_gzipped_buffer(const char *filename, uint64_t max_sz, | ||
73 | - uint8_t **buffer); | ||
74 | -int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); | ||
75 | +ssize_t load_image_gzipped_buffer(const char *filename, uint64_t max_sz, | ||
76 | + uint8_t **buffer); | ||
77 | +ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); | ||
78 | |||
79 | #define ELF_LOAD_FAILED -1 | ||
80 | #define ELF_LOAD_NOT_ELF -2 | ||
81 | @@ -XXX,XX +XXX,XX @@ ssize_t load_elf(const char *filename, | ||
82 | */ | ||
83 | void load_elf_hdr(const char *filename, void *hdr, bool *is64, Error **errp); | ||
84 | |||
85 | -int load_aout(const char *filename, hwaddr addr, int max_sz, | ||
86 | - int bswap_needed, hwaddr target_page_size); | ||
87 | +ssize_t load_aout(const char *filename, hwaddr addr, int max_sz, | ||
88 | + int bswap_needed, hwaddr target_page_size); | ||
89 | |||
90 | #define LOAD_UIMAGE_LOADADDR_INVALID (-1) | ||
91 | |||
92 | @@ -XXX,XX +XXX,XX @@ int load_aout(const char *filename, hwaddr addr, int max_sz, | ||
93 | * | ||
94 | * Returns the size of the loaded image on success, -1 otherwise. | ||
95 | */ | ||
96 | -int load_uimage_as(const char *filename, hwaddr *ep, | ||
97 | - hwaddr *loadaddr, int *is_linux, | ||
98 | - uint64_t (*translate_fn)(void *, uint64_t), | ||
99 | - void *translate_opaque, AddressSpace *as); | ||
100 | +ssize_t load_uimage_as(const char *filename, hwaddr *ep, | ||
101 | + hwaddr *loadaddr, int *is_linux, | ||
102 | + uint64_t (*translate_fn)(void *, uint64_t), | ||
103 | + void *translate_opaque, AddressSpace *as); | ||
104 | |||
105 | /** load_uimage: | ||
106 | * Same as load_uimage_as(), but doesn't allow the caller to specify an | ||
107 | * AddressSpace. | ||
108 | */ | ||
109 | -int load_uimage(const char *filename, hwaddr *ep, | ||
110 | - hwaddr *loadaddr, int *is_linux, | ||
111 | - uint64_t (*translate_fn)(void *, uint64_t), | ||
112 | - void *translate_opaque); | ||
113 | +ssize_t load_uimage(const char *filename, hwaddr *ep, | ||
114 | + hwaddr *loadaddr, int *is_linux, | ||
115 | + uint64_t (*translate_fn)(void *, uint64_t), | ||
116 | + void *translate_opaque); | ||
117 | |||
118 | /** | ||
119 | * load_ramdisk_as: | ||
120 | @@ -XXX,XX +XXX,XX @@ int load_uimage(const char *filename, hwaddr *ep, | ||
121 | * | ||
122 | * Returns the size of the loaded image on success, -1 otherwise. | ||
123 | */ | ||
124 | -int load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz, | ||
125 | - AddressSpace *as); | ||
126 | +ssize_t load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz, | ||
127 | + AddressSpace *as); | ||
128 | |||
129 | /** | ||
130 | * load_ramdisk: | ||
131 | * Same as load_ramdisk_as(), but doesn't allow the caller to specify | ||
132 | * an AddressSpace. | ||
133 | */ | ||
134 | -int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz); | ||
135 | +ssize_t load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz); | ||
136 | |||
137 | ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen); | ||
138 | |||
139 | @@ -XXX,XX +XXX,XX @@ void pstrcpy_targphys(const char *name, | ||
140 | extern bool option_rom_has_mr; | ||
141 | extern bool rom_file_has_mr; | ||
142 | |||
143 | -int rom_add_file(const char *file, const char *fw_dir, | ||
144 | - hwaddr addr, int32_t bootindex, | ||
145 | - bool option_rom, MemoryRegion *mr, AddressSpace *as); | ||
146 | +ssize_t rom_add_file(const char *file, const char *fw_dir, | ||
147 | + hwaddr addr, int32_t bootindex, | ||
148 | + bool option_rom, MemoryRegion *mr, AddressSpace *as); | ||
149 | MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len, | ||
150 | size_t max_len, hwaddr addr, | ||
151 | const char *fw_file_name, | ||
152 | @@ -XXX,XX +XXX,XX @@ void hmp_info_roms(Monitor *mon, const QDict *qdict); | ||
153 | #define rom_add_blob_fixed_as(_f, _b, _l, _a, _as) \ | ||
154 | rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, _as, true) | ||
155 | |||
156 | -int rom_add_vga(const char *file); | ||
157 | -int rom_add_option(const char *file, int32_t bootindex); | ||
158 | +ssize_t rom_add_vga(const char *file); | ||
159 | +ssize_t rom_add_option(const char *file, int32_t bootindex); | ||
160 | |||
161 | /* This is the usual maximum in uboot, so if a uImage overflows this, it would | ||
162 | * overflow on real hardware too. */ | ||
163 | diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c | ||
164 | index XXXXXXX..XXXXXXX 100644 | ||
165 | --- a/hw/arm/armv7m.c | ||
166 | +++ b/hw/arm/armv7m.c | ||
167 | @@ -XXX,XX +XXX,XX @@ static void armv7m_reset(void *opaque) | ||
168 | |||
169 | void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size) | ||
170 | { | ||
171 | - int image_size; | ||
172 | + ssize_t image_size; | ||
173 | uint64_t entry; | ||
174 | int big_endian; | ||
175 | AddressSpace *as; | ||
176 | diff --git a/hw/arm/boot.c b/hw/arm/boot.c | ||
177 | index XXXXXXX..XXXXXXX 100644 | ||
178 | --- a/hw/arm/boot.c | ||
179 | +++ b/hw/arm/boot.c | ||
180 | @@ -XXX,XX +XXX,XX @@ static int do_arm_linux_init(Object *obj, void *opaque) | ||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | -static int64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry, | ||
185 | +static ssize_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry, | ||
186 | uint64_t *lowaddr, uint64_t *highaddr, | ||
187 | int elf_machine, AddressSpace *as) | ||
188 | { | ||
189 | @@ -XXX,XX +XXX,XX @@ static int64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry, | ||
190 | } elf_header; | ||
191 | int data_swab = 0; | ||
192 | bool big_endian; | ||
193 | - int64_t ret = -1; | ||
194 | + ssize_t ret = -1; | ||
195 | Error *err = NULL; | ||
196 | |||
197 | |||
198 | @@ -XXX,XX +XXX,XX @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, | ||
199 | /* Set up for a direct boot of a kernel image file. */ | ||
200 | CPUState *cs; | ||
201 | AddressSpace *as = arm_boot_address_space(cpu, info); | ||
202 | - int kernel_size; | ||
203 | + ssize_t kernel_size; | ||
204 | int initrd_size; | ||
205 | int is_linux = 0; | ||
206 | uint64_t elf_entry; | ||
207 | @@ -XXX,XX +XXX,XX @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, | ||
208 | |||
209 | if (kernel_size > info->ram_size) { | ||
210 | error_report("kernel '%s' is too large to fit in RAM " | ||
211 | - "(kernel size %d, RAM size %" PRId64 ")", | ||
212 | + "(kernel size %zd, RAM size %" PRId64 ")", | ||
213 | info->kernel_filename, kernel_size, info->ram_size); | ||
214 | exit(1); | ||
215 | } | ||
216 | diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c | ||
217 | index XXXXXXX..XXXXXXX 100644 | ||
218 | --- a/hw/core/generic-loader.c | ||
219 | +++ b/hw/core/generic-loader.c | ||
220 | @@ -XXX,XX +XXX,XX @@ static void generic_loader_realize(DeviceState *dev, Error **errp) | ||
221 | GenericLoaderState *s = GENERIC_LOADER(dev); | ||
222 | hwaddr entry; | ||
223 | int big_endian; | ||
224 | - int size = 0; | ||
225 | + ssize_t size = 0; | ||
226 | |||
227 | s->set_pc = false; | ||
228 | |||
229 | diff --git a/hw/core/loader.c b/hw/core/loader.c | ||
230 | index XXXXXXX..XXXXXXX 100644 | ||
231 | --- a/hw/core/loader.c | ||
232 | +++ b/hw/core/loader.c | ||
233 | @@ -XXX,XX +XXX,XX @@ ssize_t read_targphys(const char *name, | ||
234 | return did; | ||
235 | } | ||
236 | |||
237 | -int load_image_targphys(const char *filename, | ||
238 | - hwaddr addr, uint64_t max_sz) | ||
239 | +ssize_t load_image_targphys(const char *filename, | ||
240 | + hwaddr addr, uint64_t max_sz) | ||
241 | { | ||
242 | return load_image_targphys_as(filename, addr, max_sz, NULL); | ||
243 | } | ||
244 | |||
245 | /* return the size or -1 if error */ | ||
246 | -int load_image_targphys_as(const char *filename, | ||
247 | - hwaddr addr, uint64_t max_sz, AddressSpace *as) | ||
248 | +ssize_t load_image_targphys_as(const char *filename, | ||
249 | + hwaddr addr, uint64_t max_sz, AddressSpace *as) | ||
250 | { | ||
251 | - int size; | ||
252 | + ssize_t size; | ||
253 | |||
254 | size = get_image_size(filename); | ||
255 | if (size < 0 || size > max_sz) { | ||
256 | @@ -XXX,XX +XXX,XX @@ int load_image_targphys_as(const char *filename, | ||
257 | return size; | ||
258 | } | ||
259 | |||
260 | -int load_image_mr(const char *filename, MemoryRegion *mr) | ||
261 | +ssize_t load_image_mr(const char *filename, MemoryRegion *mr) | ||
262 | { | ||
263 | - int size; | ||
264 | + ssize_t size; | ||
265 | |||
266 | if (!memory_access_is_direct(mr, false)) { | ||
267 | /* Can only load an image into RAM or ROM */ | ||
268 | @@ -XXX,XX +XXX,XX @@ static void bswap_ahdr(struct exec *e) | ||
269 | : (_N_SEGMENT_ROUND (_N_TXTENDADDR(x, target_page_size), target_page_size))) | ||
270 | |||
271 | |||
272 | -int load_aout(const char *filename, hwaddr addr, int max_sz, | ||
273 | - int bswap_needed, hwaddr target_page_size) | ||
274 | +ssize_t load_aout(const char *filename, hwaddr addr, int max_sz, | ||
275 | + int bswap_needed, hwaddr target_page_size) | ||
276 | { | ||
277 | int fd; | ||
278 | ssize_t size, ret; | ||
279 | @@ -XXX,XX +XXX,XX @@ toosmall: | ||
280 | } | ||
281 | |||
282 | /* Load a U-Boot image. */ | ||
283 | -static int load_uboot_image(const char *filename, hwaddr *ep, hwaddr *loadaddr, | ||
284 | - int *is_linux, uint8_t image_type, | ||
285 | - uint64_t (*translate_fn)(void *, uint64_t), | ||
286 | - void *translate_opaque, AddressSpace *as) | ||
287 | +static ssize_t load_uboot_image(const char *filename, hwaddr *ep, | ||
288 | + hwaddr *loadaddr, int *is_linux, | ||
289 | + uint8_t image_type, | ||
290 | + uint64_t (*translate_fn)(void *, uint64_t), | ||
291 | + void *translate_opaque, AddressSpace *as) | ||
292 | { | ||
293 | int fd; | ||
294 | - int size; | ||
295 | + ssize_t size; | ||
296 | hwaddr address; | ||
297 | uboot_image_header_t h; | ||
298 | uboot_image_header_t *hdr = &h; | ||
299 | @@ -XXX,XX +XXX,XX @@ out: | ||
300 | return ret; | 21 | return ret; |
301 | } | 22 | } |
302 | 23 | ||
303 | -int load_uimage(const char *filename, hwaddr *ep, hwaddr *loadaddr, | 24 | -static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, |
304 | - int *is_linux, | 25 | - uint32_t *phandle, uint32_t *intc_phandles, |
305 | - uint64_t (*translate_fn)(void *, uint64_t), | 26 | - uint32_t *msi_m_phandle, uint32_t *msi_s_phandle) |
306 | - void *translate_opaque) | 27 | +static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr, |
307 | +ssize_t load_uimage(const char *filename, hwaddr *ep, hwaddr *loadaddr, | 28 | + uint32_t *intc_phandles, uint32_t msi_phandle, |
308 | + int *is_linux, | 29 | + bool m_mode, uint32_t imsic_guest_bits) |
309 | + uint64_t (*translate_fn)(void *, uint64_t), | ||
310 | + void *translate_opaque) | ||
311 | { | 30 | { |
312 | return load_uboot_image(filename, ep, loadaddr, is_linux, IH_TYPE_KERNEL, | 31 | int cpu, socket; |
313 | translate_fn, translate_opaque, NULL); | 32 | char *imsic_name; |
33 | MachineState *ms = MACHINE(s); | ||
34 | int socket_count = riscv_socket_count(ms); | ||
35 | - uint32_t imsic_max_hart_per_socket, imsic_guest_bits; | ||
36 | + uint32_t imsic_max_hart_per_socket; | ||
37 | uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size; | ||
38 | |||
39 | - *msi_m_phandle = (*phandle)++; | ||
40 | - *msi_s_phandle = (*phandle)++; | ||
41 | imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2); | ||
42 | imsic_regs = g_new0(uint32_t, socket_count * 4); | ||
43 | |||
44 | - /* M-level IMSIC node */ | ||
45 | for (cpu = 0; cpu < ms->smp.cpus; cpu++) { | ||
46 | imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); | ||
47 | - imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT); | ||
48 | + imsic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); | ||
49 | } | ||
50 | - imsic_max_hart_per_socket = 0; | ||
51 | - for (socket = 0; socket < socket_count; socket++) { | ||
52 | - imsic_addr = memmap[VIRT_IMSIC_M].base + | ||
53 | - socket * VIRT_IMSIC_GROUP_MAX_SIZE; | ||
54 | - imsic_size = IMSIC_HART_SIZE(0) * s->soc[socket].num_harts; | ||
55 | - imsic_regs[socket * 4 + 0] = 0; | ||
56 | - imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr); | ||
57 | - imsic_regs[socket * 4 + 2] = 0; | ||
58 | - imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size); | ||
59 | - if (imsic_max_hart_per_socket < s->soc[socket].num_harts) { | ||
60 | - imsic_max_hart_per_socket = s->soc[socket].num_harts; | ||
61 | - } | ||
62 | - } | ||
63 | - imsic_name = g_strdup_printf("/soc/imsics@%lx", | ||
64 | - (unsigned long)memmap[VIRT_IMSIC_M].base); | ||
65 | - qemu_fdt_add_subnode(ms->fdt, imsic_name); | ||
66 | - qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", | ||
67 | - "riscv,imsics"); | ||
68 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells", | ||
69 | - FDT_IMSIC_INT_CELLS); | ||
70 | - qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", | ||
71 | - NULL, 0); | ||
72 | - qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", | ||
73 | - NULL, 0); | ||
74 | - qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended", | ||
75 | - imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); | ||
76 | - qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs, | ||
77 | - socket_count * sizeof(uint32_t) * 4); | ||
78 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids", | ||
79 | - VIRT_IRQCHIP_NUM_MSIS); | ||
80 | - if (socket_count > 1) { | ||
81 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits", | ||
82 | - imsic_num_bits(imsic_max_hart_per_socket)); | ||
83 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits", | ||
84 | - imsic_num_bits(socket_count)); | ||
85 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift", | ||
86 | - IMSIC_MMIO_GROUP_MIN_SHIFT); | ||
87 | - } | ||
88 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_m_phandle); | ||
89 | - | ||
90 | - g_free(imsic_name); | ||
91 | |||
92 | - /* S-level IMSIC node */ | ||
93 | - for (cpu = 0; cpu < ms->smp.cpus; cpu++) { | ||
94 | - imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); | ||
95 | - imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); | ||
96 | - } | ||
97 | - imsic_guest_bits = imsic_num_bits(s->aia_guests + 1); | ||
98 | imsic_max_hart_per_socket = 0; | ||
99 | for (socket = 0; socket < socket_count; socket++) { | ||
100 | - imsic_addr = memmap[VIRT_IMSIC_S].base + | ||
101 | - socket * VIRT_IMSIC_GROUP_MAX_SIZE; | ||
102 | + imsic_addr = base_addr + socket * VIRT_IMSIC_GROUP_MAX_SIZE; | ||
103 | imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) * | ||
104 | s->soc[socket].num_harts; | ||
105 | imsic_regs[socket * 4 + 0] = 0; | ||
106 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, | ||
107 | imsic_max_hart_per_socket = s->soc[socket].num_harts; | ||
108 | } | ||
109 | } | ||
110 | - imsic_name = g_strdup_printf("/soc/imsics@%lx", | ||
111 | - (unsigned long)memmap[VIRT_IMSIC_S].base); | ||
112 | + | ||
113 | + imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr); | ||
114 | qemu_fdt_add_subnode(ms->fdt, imsic_name); | ||
115 | - qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", | ||
116 | - "riscv,imsics"); | ||
117 | + qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics"); | ||
118 | qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells", | ||
119 | - FDT_IMSIC_INT_CELLS); | ||
120 | - qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", | ||
121 | - NULL, 0); | ||
122 | - qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", | ||
123 | - NULL, 0); | ||
124 | + FDT_IMSIC_INT_CELLS); | ||
125 | + qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0); | ||
126 | + qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0); | ||
127 | qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended", | ||
128 | - imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); | ||
129 | + imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); | ||
130 | qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs, | ||
131 | - socket_count * sizeof(uint32_t) * 4); | ||
132 | + socket_count * sizeof(uint32_t) * 4); | ||
133 | qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids", | ||
134 | - VIRT_IRQCHIP_NUM_MSIS); | ||
135 | + VIRT_IRQCHIP_NUM_MSIS); | ||
136 | + | ||
137 | if (imsic_guest_bits) { | ||
138 | qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,guest-index-bits", | ||
139 | - imsic_guest_bits); | ||
140 | + imsic_guest_bits); | ||
141 | } | ||
142 | + | ||
143 | if (socket_count > 1) { | ||
144 | qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits", | ||
145 | - imsic_num_bits(imsic_max_hart_per_socket)); | ||
146 | + imsic_num_bits(imsic_max_hart_per_socket)); | ||
147 | qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits", | ||
148 | - imsic_num_bits(socket_count)); | ||
149 | + imsic_num_bits(socket_count)); | ||
150 | qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift", | ||
151 | - IMSIC_MMIO_GROUP_MIN_SHIFT); | ||
152 | + IMSIC_MMIO_GROUP_MIN_SHIFT); | ||
153 | } | ||
154 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_s_phandle); | ||
155 | - g_free(imsic_name); | ||
156 | + qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle); | ||
157 | |||
158 | + g_free(imsic_name); | ||
159 | g_free(imsic_regs); | ||
160 | g_free(imsic_cells); | ||
314 | } | 161 | } |
315 | 162 | ||
316 | -int load_uimage_as(const char *filename, hwaddr *ep, hwaddr *loadaddr, | 163 | -static void create_fdt_socket_aplic(RISCVVirtState *s, |
317 | - int *is_linux, | 164 | - const MemMapEntry *memmap, int socket, |
318 | - uint64_t (*translate_fn)(void *, uint64_t), | 165 | - uint32_t msi_m_phandle, |
319 | - void *translate_opaque, AddressSpace *as) | 166 | - uint32_t msi_s_phandle, |
320 | +ssize_t load_uimage_as(const char *filename, hwaddr *ep, hwaddr *loadaddr, | 167 | - uint32_t *phandle, |
321 | + int *is_linux, | 168 | - uint32_t *intc_phandles, |
322 | + uint64_t (*translate_fn)(void *, uint64_t), | 169 | - uint32_t *aplic_phandles) |
323 | + void *translate_opaque, AddressSpace *as) | 170 | +static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, |
171 | + uint32_t *phandle, uint32_t *intc_phandles, | ||
172 | + uint32_t *msi_m_phandle, uint32_t *msi_s_phandle) | ||
173 | +{ | ||
174 | + *msi_m_phandle = (*phandle)++; | ||
175 | + *msi_s_phandle = (*phandle)++; | ||
176 | + | ||
177 | + if (!kvm_enabled()) { | ||
178 | + /* M-level IMSIC node */ | ||
179 | + create_fdt_one_imsic(s, memmap[VIRT_IMSIC_M].base, intc_phandles, | ||
180 | + *msi_m_phandle, true, 0); | ||
181 | + } | ||
182 | + | ||
183 | + /* S-level IMSIC node */ | ||
184 | + create_fdt_one_imsic(s, memmap[VIRT_IMSIC_S].base, intc_phandles, | ||
185 | + *msi_s_phandle, false, | ||
186 | + imsic_num_bits(s->aia_guests + 1)); | ||
187 | + | ||
188 | +} | ||
189 | + | ||
190 | +static void create_fdt_one_aplic(RISCVVirtState *s, int socket, | ||
191 | + unsigned long aplic_addr, uint32_t aplic_size, | ||
192 | + uint32_t msi_phandle, | ||
193 | + uint32_t *intc_phandles, | ||
194 | + uint32_t aplic_phandle, | ||
195 | + uint32_t aplic_child_phandle, | ||
196 | + bool m_mode) | ||
324 | { | 197 | { |
325 | return load_uboot_image(filename, ep, loadaddr, is_linux, IH_TYPE_KERNEL, | 198 | int cpu; |
326 | translate_fn, translate_opaque, as); | 199 | char *aplic_name; |
200 | uint32_t *aplic_cells; | ||
201 | - unsigned long aplic_addr; | ||
202 | MachineState *ms = MACHINE(s); | ||
203 | - uint32_t aplic_m_phandle, aplic_s_phandle; | ||
204 | |||
205 | - aplic_m_phandle = (*phandle)++; | ||
206 | - aplic_s_phandle = (*phandle)++; | ||
207 | aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); | ||
208 | |||
209 | - /* M-level APLIC node */ | ||
210 | for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { | ||
211 | aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); | ||
212 | - aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT); | ||
213 | + aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); | ||
214 | } | ||
215 | - aplic_addr = memmap[VIRT_APLIC_M].base + | ||
216 | - (memmap[VIRT_APLIC_M].size * socket); | ||
217 | + | ||
218 | aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); | ||
219 | qemu_fdt_add_subnode(ms->fdt, aplic_name); | ||
220 | qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic"); | ||
221 | qemu_fdt_setprop_cell(ms->fdt, aplic_name, | ||
222 | - "#interrupt-cells", FDT_APLIC_INT_CELLS); | ||
223 | + "#interrupt-cells", FDT_APLIC_INT_CELLS); | ||
224 | qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0); | ||
225 | + | ||
226 | if (s->aia_type == VIRT_AIA_TYPE_APLIC) { | ||
227 | qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended", | ||
228 | - aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2); | ||
229 | + aplic_cells, | ||
230 | + s->soc[socket].num_harts * sizeof(uint32_t) * 2); | ||
231 | } else { | ||
232 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", | ||
233 | - msi_m_phandle); | ||
234 | + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle); | ||
235 | } | ||
236 | + | ||
237 | qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", | ||
238 | - 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_M].size); | ||
239 | + 0x0, aplic_addr, 0x0, aplic_size); | ||
240 | qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", | ||
241 | - VIRT_IRQCHIP_NUM_SOURCES); | ||
242 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children", | ||
243 | - aplic_s_phandle); | ||
244 | - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate", | ||
245 | - aplic_s_phandle, 0x1, VIRT_IRQCHIP_NUM_SOURCES); | ||
246 | + VIRT_IRQCHIP_NUM_SOURCES); | ||
247 | + | ||
248 | + if (aplic_child_phandle) { | ||
249 | + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children", | ||
250 | + aplic_child_phandle); | ||
251 | + qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate", | ||
252 | + aplic_child_phandle, 0x1, | ||
253 | + VIRT_IRQCHIP_NUM_SOURCES); | ||
254 | + } | ||
255 | + | ||
256 | riscv_socket_fdt_write_id(ms, aplic_name, socket); | ||
257 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_m_phandle); | ||
258 | + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_phandle); | ||
259 | + | ||
260 | g_free(aplic_name); | ||
261 | + g_free(aplic_cells); | ||
262 | +} | ||
263 | |||
264 | - /* S-level APLIC node */ | ||
265 | - for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { | ||
266 | - aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); | ||
267 | - aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); | ||
268 | +static void create_fdt_socket_aplic(RISCVVirtState *s, | ||
269 | + const MemMapEntry *memmap, int socket, | ||
270 | + uint32_t msi_m_phandle, | ||
271 | + uint32_t msi_s_phandle, | ||
272 | + uint32_t *phandle, | ||
273 | + uint32_t *intc_phandles, | ||
274 | + uint32_t *aplic_phandles) | ||
275 | +{ | ||
276 | + char *aplic_name; | ||
277 | + unsigned long aplic_addr; | ||
278 | + MachineState *ms = MACHINE(s); | ||
279 | + uint32_t aplic_m_phandle, aplic_s_phandle; | ||
280 | + | ||
281 | + aplic_m_phandle = (*phandle)++; | ||
282 | + aplic_s_phandle = (*phandle)++; | ||
283 | + | ||
284 | + if (!kvm_enabled()) { | ||
285 | + /* M-level APLIC node */ | ||
286 | + aplic_addr = memmap[VIRT_APLIC_M].base + | ||
287 | + (memmap[VIRT_APLIC_M].size * socket); | ||
288 | + create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size, | ||
289 | + msi_m_phandle, intc_phandles, | ||
290 | + aplic_m_phandle, aplic_s_phandle, | ||
291 | + true); | ||
292 | } | ||
293 | + | ||
294 | + /* S-level APLIC node */ | ||
295 | aplic_addr = memmap[VIRT_APLIC_S].base + | ||
296 | (memmap[VIRT_APLIC_S].size * socket); | ||
297 | + create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size, | ||
298 | + msi_s_phandle, intc_phandles, | ||
299 | + aplic_s_phandle, 0, | ||
300 | + false); | ||
301 | + | ||
302 | aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); | ||
303 | - qemu_fdt_add_subnode(ms->fdt, aplic_name); | ||
304 | - qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic"); | ||
305 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, | ||
306 | - "#interrupt-cells", FDT_APLIC_INT_CELLS); | ||
307 | - qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0); | ||
308 | - if (s->aia_type == VIRT_AIA_TYPE_APLIC) { | ||
309 | - qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended", | ||
310 | - aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2); | ||
311 | - } else { | ||
312 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", | ||
313 | - msi_s_phandle); | ||
314 | - } | ||
315 | - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", | ||
316 | - 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_S].size); | ||
317 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", | ||
318 | - VIRT_IRQCHIP_NUM_SOURCES); | ||
319 | - riscv_socket_fdt_write_id(ms, aplic_name, socket); | ||
320 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_s_phandle); | ||
321 | |||
322 | if (!socket) { | ||
323 | platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name, | ||
324 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s, | ||
325 | |||
326 | g_free(aplic_name); | ||
327 | |||
328 | - g_free(aplic_cells); | ||
329 | aplic_phandles[socket] = aplic_s_phandle; | ||
327 | } | 330 | } |
328 | 331 | ||
329 | /* Load a ramdisk. */ | 332 | @@ -XXX,XX +XXX,XX @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests, |
330 | -int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz) | 333 | int i; |
331 | +ssize_t load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz) | 334 | hwaddr addr; |
332 | { | 335 | uint32_t guest_bits; |
333 | return load_ramdisk_as(filename, addr, max_sz, NULL); | 336 | - DeviceState *aplic_m; |
337 | - bool msimode = (aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) ? true : false; | ||
338 | + DeviceState *aplic_s = NULL; | ||
339 | + DeviceState *aplic_m = NULL; | ||
340 | + bool msimode = aia_type == VIRT_AIA_TYPE_APLIC_IMSIC; | ||
341 | |||
342 | if (msimode) { | ||
343 | - /* Per-socket M-level IMSICs */ | ||
344 | - addr = memmap[VIRT_IMSIC_M].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE; | ||
345 | - for (i = 0; i < hart_count; i++) { | ||
346 | - riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), | ||
347 | - base_hartid + i, true, 1, | ||
348 | - VIRT_IRQCHIP_NUM_MSIS); | ||
349 | + if (!kvm_enabled()) { | ||
350 | + /* Per-socket M-level IMSICs */ | ||
351 | + addr = memmap[VIRT_IMSIC_M].base + | ||
352 | + socket * VIRT_IMSIC_GROUP_MAX_SIZE; | ||
353 | + for (i = 0; i < hart_count; i++) { | ||
354 | + riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), | ||
355 | + base_hartid + i, true, 1, | ||
356 | + VIRT_IRQCHIP_NUM_MSIS); | ||
357 | + } | ||
358 | } | ||
359 | |||
360 | /* Per-socket S-level IMSICs */ | ||
361 | @@ -XXX,XX +XXX,XX @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests, | ||
362 | } | ||
363 | } | ||
364 | |||
365 | - /* Per-socket M-level APLIC */ | ||
366 | - aplic_m = riscv_aplic_create( | ||
367 | - memmap[VIRT_APLIC_M].base + socket * memmap[VIRT_APLIC_M].size, | ||
368 | - memmap[VIRT_APLIC_M].size, | ||
369 | - (msimode) ? 0 : base_hartid, | ||
370 | - (msimode) ? 0 : hart_count, | ||
371 | - VIRT_IRQCHIP_NUM_SOURCES, | ||
372 | - VIRT_IRQCHIP_NUM_PRIO_BITS, | ||
373 | - msimode, true, NULL); | ||
374 | - | ||
375 | - if (aplic_m) { | ||
376 | - /* Per-socket S-level APLIC */ | ||
377 | - riscv_aplic_create( | ||
378 | - memmap[VIRT_APLIC_S].base + socket * memmap[VIRT_APLIC_S].size, | ||
379 | - memmap[VIRT_APLIC_S].size, | ||
380 | - (msimode) ? 0 : base_hartid, | ||
381 | - (msimode) ? 0 : hart_count, | ||
382 | - VIRT_IRQCHIP_NUM_SOURCES, | ||
383 | - VIRT_IRQCHIP_NUM_PRIO_BITS, | ||
384 | - msimode, false, aplic_m); | ||
385 | + if (!kvm_enabled()) { | ||
386 | + /* Per-socket M-level APLIC */ | ||
387 | + aplic_m = riscv_aplic_create(memmap[VIRT_APLIC_M].base + | ||
388 | + socket * memmap[VIRT_APLIC_M].size, | ||
389 | + memmap[VIRT_APLIC_M].size, | ||
390 | + (msimode) ? 0 : base_hartid, | ||
391 | + (msimode) ? 0 : hart_count, | ||
392 | + VIRT_IRQCHIP_NUM_SOURCES, | ||
393 | + VIRT_IRQCHIP_NUM_PRIO_BITS, | ||
394 | + msimode, true, NULL); | ||
395 | } | ||
396 | |||
397 | - return aplic_m; | ||
398 | + /* Per-socket S-level APLIC */ | ||
399 | + aplic_s = riscv_aplic_create(memmap[VIRT_APLIC_S].base + | ||
400 | + socket * memmap[VIRT_APLIC_S].size, | ||
401 | + memmap[VIRT_APLIC_S].size, | ||
402 | + (msimode) ? 0 : base_hartid, | ||
403 | + (msimode) ? 0 : hart_count, | ||
404 | + VIRT_IRQCHIP_NUM_SOURCES, | ||
405 | + VIRT_IRQCHIP_NUM_PRIO_BITS, | ||
406 | + msimode, false, aplic_m); | ||
407 | + | ||
408 | + return kvm_enabled() ? aplic_s : aplic_m; | ||
334 | } | 409 | } |
335 | 410 | ||
336 | -int load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz, | 411 | static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip) |
337 | - AddressSpace *as) | ||
338 | +ssize_t load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz, | ||
339 | + AddressSpace *as) | ||
340 | { | ||
341 | return load_uboot_image(filename, NULL, &addr, NULL, IH_TYPE_RAMDISK, | ||
342 | NULL, NULL, as); | ||
343 | } | ||
344 | |||
345 | /* Load a gzip-compressed kernel to a dynamically allocated buffer. */ | ||
346 | -int load_image_gzipped_buffer(const char *filename, uint64_t max_sz, | ||
347 | - uint8_t **buffer) | ||
348 | +ssize_t load_image_gzipped_buffer(const char *filename, uint64_t max_sz, | ||
349 | + uint8_t **buffer) | ||
350 | { | ||
351 | uint8_t *compressed_data = NULL; | ||
352 | uint8_t *data = NULL; | ||
353 | @@ -XXX,XX +XXX,XX @@ int load_image_gzipped_buffer(const char *filename, uint64_t max_sz, | ||
354 | } | ||
355 | |||
356 | /* Load a gzip-compressed kernel. */ | ||
357 | -int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz) | ||
358 | +ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz) | ||
359 | { | ||
360 | - int bytes; | ||
361 | + ssize_t bytes; | ||
362 | uint8_t *data; | ||
363 | |||
364 | bytes = load_image_gzipped_buffer(filename, max_sz, &data); | ||
365 | @@ -XXX,XX +XXX,XX @@ static void *rom_set_mr(Rom *rom, Object *owner, const char *name, bool ro) | ||
366 | return data; | ||
367 | } | ||
368 | |||
369 | -int rom_add_file(const char *file, const char *fw_dir, | ||
370 | - hwaddr addr, int32_t bootindex, | ||
371 | - bool option_rom, MemoryRegion *mr, | ||
372 | - AddressSpace *as) | ||
373 | +ssize_t rom_add_file(const char *file, const char *fw_dir, | ||
374 | + hwaddr addr, int32_t bootindex, | ||
375 | + bool option_rom, MemoryRegion *mr, | ||
376 | + AddressSpace *as) | ||
377 | { | ||
378 | MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); | ||
379 | Rom *rom; | ||
380 | - int rc, fd = -1; | ||
381 | + ssize_t rc; | ||
382 | + int fd = -1; | ||
383 | char devpath[100]; | ||
384 | |||
385 | if (as && mr) { | ||
386 | @@ -XXX,XX +XXX,XX @@ int rom_add_file(const char *file, const char *fw_dir, | ||
387 | lseek(fd, 0, SEEK_SET); | ||
388 | rc = read(fd, rom->data, rom->datasize); | ||
389 | if (rc != rom->datasize) { | ||
390 | - fprintf(stderr, "rom: file %-20s: read error: rc=%d (expected %zd)\n", | ||
391 | + fprintf(stderr, "rom: file %-20s: read error: rc=%zd (expected %zd)\n", | ||
392 | rom->name, rc, rom->datasize); | ||
393 | goto err; | ||
394 | } | ||
395 | @@ -XXX,XX +XXX,XX @@ int rom_add_elf_program(const char *name, GMappedFile *mapped_file, void *data, | ||
396 | return 0; | ||
397 | } | ||
398 | |||
399 | -int rom_add_vga(const char *file) | ||
400 | +ssize_t rom_add_vga(const char *file) | ||
401 | { | ||
402 | return rom_add_file(file, "vgaroms", 0, -1, true, NULL, NULL); | ||
403 | } | ||
404 | |||
405 | -int rom_add_option(const char *file, int32_t bootindex) | ||
406 | +ssize_t rom_add_option(const char *file, int32_t bootindex) | ||
407 | { | ||
408 | return rom_add_file(file, "genroms", 0, bootindex, true, NULL, NULL); | ||
409 | } | ||
410 | @@ -XXX,XX +XXX,XX @@ out: | ||
411 | } | ||
412 | |||
413 | /* return size or -1 if error */ | ||
414 | -int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as) | ||
415 | +ssize_t load_targphys_hex_as(const char *filename, hwaddr *entry, | ||
416 | + AddressSpace *as) | ||
417 | { | ||
418 | gsize hex_blob_size; | ||
419 | gchar *hex_blob; | ||
420 | - int total_size = 0; | ||
421 | + ssize_t total_size = 0; | ||
422 | |||
423 | if (!g_file_get_contents(filename, &hex_blob, &hex_blob_size, NULL)) { | ||
424 | return -1; | ||
425 | diff --git a/hw/i386/x86.c b/hw/i386/x86.c | ||
426 | index XXXXXXX..XXXXXXX 100644 | ||
427 | --- a/hw/i386/x86.c | ||
428 | +++ b/hw/i386/x86.c | ||
429 | @@ -XXX,XX +XXX,XX @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, | ||
430 | char *filename; | ||
431 | MemoryRegion *bios, *isa_bios; | ||
432 | int bios_size, isa_bios_size; | ||
433 | - int ret; | ||
434 | + ssize_t ret; | ||
435 | |||
436 | /* BIOS load */ | ||
437 | bios_name = ms->firmware ?: default_firmware; | ||
438 | diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c | ||
439 | index XXXXXXX..XXXXXXX 100644 | ||
440 | --- a/hw/riscv/boot.c | ||
441 | +++ b/hw/riscv/boot.c | ||
442 | @@ -XXX,XX +XXX,XX @@ target_ulong riscv_load_firmware(const char *firmware_filename, | ||
443 | hwaddr firmware_load_addr, | ||
444 | symbol_fn_t sym_cb) | ||
445 | { | ||
446 | - uint64_t firmware_entry, firmware_size, firmware_end; | ||
447 | + uint64_t firmware_entry, firmware_end; | ||
448 | + ssize_t firmware_size; | ||
449 | |||
450 | if (load_elf_ram_sym(firmware_filename, NULL, NULL, NULL, | ||
451 | &firmware_entry, NULL, &firmware_end, NULL, | ||
452 | @@ -XXX,XX +XXX,XX @@ target_ulong riscv_load_kernel(const char *kernel_filename, | ||
453 | hwaddr riscv_load_initrd(const char *filename, uint64_t mem_size, | ||
454 | uint64_t kernel_entry, hwaddr *start) | ||
455 | { | ||
456 | - int size; | ||
457 | + ssize_t size; | ||
458 | |||
459 | /* | ||
460 | * We want to put the initrd far enough into RAM that when the | ||
461 | -- | 412 | -- |
462 | 2.36.1 | 413 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Yong-Xuan Wang <yongxuan.wang@sifive.com> | ||
1 | 2 | ||
3 | We check the in-kernel irqchip support when using KVM acceleration. | ||
4 | |||
5 | Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com> | ||
6 | Reviewed-by: Jim Shu <jim.shu@sifive.com> | ||
7 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
8 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> | ||
9 | Message-ID: <20230727102439.22554-3-yongxuan.wang@sifive.com> | ||
10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
11 | --- | ||
12 | target/riscv/kvm.c | 10 +++++++++- | ||
13 | 1 file changed, 9 insertions(+), 1 deletion(-) | ||
14 | |||
15 | diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/kvm.c | ||
18 | +++ b/target/riscv/kvm.c | ||
19 | @@ -XXX,XX +XXX,XX @@ int kvm_arch_init(MachineState *ms, KVMState *s) | ||
20 | |||
21 | int kvm_arch_irqchip_create(KVMState *s) | ||
22 | { | ||
23 | - return 0; | ||
24 | + if (kvm_kernel_irqchip_split()) { | ||
25 | + error_report("-machine kernel_irqchip=split is not supported on RISC-V."); | ||
26 | + exit(1); | ||
27 | + } | ||
28 | + | ||
29 | + /* | ||
30 | + * We can create the VAIA using the newer device control API. | ||
31 | + */ | ||
32 | + return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL); | ||
33 | } | ||
34 | |||
35 | int kvm_arch_process_async_events(CPUState *cs) | ||
36 | -- | ||
37 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: eop Chen <eop.chen@sifive.com> | 3 | We create a vAIA chip by using the KVM_DEV_TYPE_RISCV_AIA and then set up |
4 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | 4 | the chip with the KVM_DEV_RISCV_AIA_GRP_* APIs. |
5 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 5 | We also extend KVM accelerator to specify the KVM AIA mode. The "riscv-aia" |
6 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | 6 | parameter is passed along with --accel in QEMU command-line. |
7 | Message-Id: <165449614532.19704.7000832880482980398-10@git.sr.ht> | 7 | 1) "riscv-aia=emul": IMSIC is emulated by hypervisor |
8 | 2) "riscv-aia=hwaccel": use hardware guest IMSIC | ||
9 | 3) "riscv-aia=auto": use the hardware guest IMSICs whenever available | ||
10 | otherwise we fallback to software emulation. | ||
11 | |||
12 | Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com> | ||
13 | Reviewed-by: Jim Shu <jim.shu@sifive.com> | ||
14 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
15 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> | ||
16 | Message-ID: <20230727102439.22554-4-yongxuan.wang@sifive.com> | ||
8 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 17 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
9 | --- | 18 | --- |
10 | target/riscv/vector_helper.c | 20 ++++++++++++++++++++ | 19 | target/riscv/kvm_riscv.h | 4 + |
11 | target/riscv/insn_trans/trans_rvv.c.inc | 12 ++++++++---- | 20 | target/riscv/kvm.c | 186 +++++++++++++++++++++++++++++++++++++++ |
12 | 2 files changed, 28 insertions(+), 4 deletions(-) | 21 | 2 files changed, 190 insertions(+) |
13 | 22 | ||
14 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 23 | diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h |
15 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/riscv/vector_helper.c | 25 | --- a/target/riscv/kvm_riscv.h |
17 | +++ b/target/riscv/vector_helper.c | 26 | +++ b/target/riscv/kvm_riscv.h |
18 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ | 27 | @@ -XXX,XX +XXX,XX @@ |
19 | uint32_t desc) \ | 28 | void kvm_riscv_init_user_properties(Object *cpu_obj); |
20 | { \ | 29 | void kvm_riscv_reset_vcpu(RISCVCPU *cpu); |
21 | uint32_t vl = env->vl; \ | 30 | void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level); |
22 | + uint32_t esz = sizeof(ETYPE); \ | 31 | +void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, |
23 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | 32 | + uint64_t aia_irq_num, uint64_t aia_msi_num, |
24 | + uint32_t vta = vext_vta(desc); \ | 33 | + uint64_t aplic_base, uint64_t imsic_base, |
25 | uint32_t i; \ | 34 | + uint64_t guest_num); |
26 | \ | 35 | |
27 | for (i = env->vstart; i < vl; i++) { \ | 36 | #endif |
28 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ | 37 | diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c |
29 | *((ETYPE *)vd + H(i)) = s1; \ | 38 | index XXXXXXX..XXXXXXX 100644 |
30 | } \ | 39 | --- a/target/riscv/kvm.c |
31 | env->vstart = 0; \ | 40 | +++ b/target/riscv/kvm.c |
32 | + /* set tail elements to 1s */ \ | 41 | @@ -XXX,XX +XXX,XX @@ |
33 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | 42 | #include "exec/address-spaces.h" |
43 | #include "hw/boards.h" | ||
44 | #include "hw/irq.h" | ||
45 | +#include "hw/intc/riscv_imsic.h" | ||
46 | #include "qemu/log.h" | ||
47 | #include "hw/loader.h" | ||
48 | #include "kvm_riscv.h" | ||
49 | @@ -XXX,XX +XXX,XX @@ | ||
50 | #include "chardev/char-fe.h" | ||
51 | #include "migration/migration.h" | ||
52 | #include "sysemu/runstate.h" | ||
53 | +#include "hw/riscv/numa.h" | ||
54 | |||
55 | static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, | ||
56 | uint64_t idx) | ||
57 | @@ -XXX,XX +XXX,XX @@ bool kvm_arch_cpu_check_are_resettable(void) | ||
58 | return true; | ||
34 | } | 59 | } |
35 | 60 | ||
36 | GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) | 61 | +static int aia_mode; |
37 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ | 62 | + |
38 | uint32_t desc) \ | 63 | +static const char *kvm_aia_mode_str(uint64_t mode) |
39 | { \ | 64 | +{ |
40 | uint32_t vl = env->vl; \ | 65 | + switch (mode) { |
41 | + uint32_t esz = sizeof(ETYPE); \ | 66 | + case KVM_DEV_RISCV_AIA_MODE_EMUL: |
42 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | 67 | + return "emul"; |
43 | + uint32_t vta = vext_vta(desc); \ | 68 | + case KVM_DEV_RISCV_AIA_MODE_HWACCEL: |
44 | uint32_t i; \ | 69 | + return "hwaccel"; |
45 | \ | 70 | + case KVM_DEV_RISCV_AIA_MODE_AUTO: |
46 | for (i = env->vstart; i < vl; i++) { \ | 71 | + default: |
47 | *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ | 72 | + return "auto"; |
48 | } \ | 73 | + }; |
49 | env->vstart = 0; \ | 74 | +} |
50 | + /* set tail elements to 1s */ \ | 75 | + |
51 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | 76 | +static char *riscv_get_kvm_aia(Object *obj, Error **errp) |
77 | +{ | ||
78 | + return g_strdup(kvm_aia_mode_str(aia_mode)); | ||
79 | +} | ||
80 | + | ||
81 | +static void riscv_set_kvm_aia(Object *obj, const char *val, Error **errp) | ||
82 | +{ | ||
83 | + if (!strcmp(val, "emul")) { | ||
84 | + aia_mode = KVM_DEV_RISCV_AIA_MODE_EMUL; | ||
85 | + } else if (!strcmp(val, "hwaccel")) { | ||
86 | + aia_mode = KVM_DEV_RISCV_AIA_MODE_HWACCEL; | ||
87 | + } else if (!strcmp(val, "auto")) { | ||
88 | + aia_mode = KVM_DEV_RISCV_AIA_MODE_AUTO; | ||
89 | + } else { | ||
90 | + error_setg(errp, "Invalid KVM AIA mode"); | ||
91 | + error_append_hint(errp, "Valid values are emul, hwaccel, and auto.\n"); | ||
92 | + } | ||
93 | +} | ||
94 | + | ||
95 | void kvm_arch_accel_class_init(ObjectClass *oc) | ||
96 | { | ||
97 | + object_class_property_add_str(oc, "riscv-aia", riscv_get_kvm_aia, | ||
98 | + riscv_set_kvm_aia); | ||
99 | + object_class_property_set_description(oc, "riscv-aia", | ||
100 | + "Set KVM AIA mode. Valid values are " | ||
101 | + "emul, hwaccel, and auto. Default " | ||
102 | + "is auto."); | ||
103 | + object_property_set_default_str(object_class_property_find(oc, "riscv-aia"), | ||
104 | + "auto"); | ||
105 | +} | ||
106 | + | ||
107 | +void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, | ||
108 | + uint64_t aia_irq_num, uint64_t aia_msi_num, | ||
109 | + uint64_t aplic_base, uint64_t imsic_base, | ||
110 | + uint64_t guest_num) | ||
111 | +{ | ||
112 | + int ret, i; | ||
113 | + int aia_fd = -1; | ||
114 | + uint64_t default_aia_mode; | ||
115 | + uint64_t socket_count = riscv_socket_count(machine); | ||
116 | + uint64_t max_hart_per_socket = 0; | ||
117 | + uint64_t socket, base_hart, hart_count, socket_imsic_base, imsic_addr; | ||
118 | + uint64_t socket_bits, hart_bits, guest_bits; | ||
119 | + | ||
120 | + aia_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_RISCV_AIA, false); | ||
121 | + | ||
122 | + if (aia_fd < 0) { | ||
123 | + error_report("Unable to create in-kernel irqchip"); | ||
124 | + exit(1); | ||
125 | + } | ||
126 | + | ||
127 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
128 | + KVM_DEV_RISCV_AIA_CONFIG_MODE, | ||
129 | + &default_aia_mode, false, NULL); | ||
130 | + if (ret < 0) { | ||
131 | + error_report("KVM AIA: failed to get current KVM AIA mode"); | ||
132 | + exit(1); | ||
133 | + } | ||
134 | + qemu_log("KVM AIA: default mode is %s\n", | ||
135 | + kvm_aia_mode_str(default_aia_mode)); | ||
136 | + | ||
137 | + if (default_aia_mode != aia_mode) { | ||
138 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
139 | + KVM_DEV_RISCV_AIA_CONFIG_MODE, | ||
140 | + &aia_mode, true, NULL); | ||
141 | + if (ret < 0) | ||
142 | + warn_report("KVM AIA: failed to set KVM AIA mode"); | ||
143 | + else | ||
144 | + qemu_log("KVM AIA: set current mode to %s\n", | ||
145 | + kvm_aia_mode_str(aia_mode)); | ||
146 | + } | ||
147 | + | ||
148 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
149 | + KVM_DEV_RISCV_AIA_CONFIG_SRCS, | ||
150 | + &aia_irq_num, true, NULL); | ||
151 | + if (ret < 0) { | ||
152 | + error_report("KVM AIA: failed to set number of input irq lines"); | ||
153 | + exit(1); | ||
154 | + } | ||
155 | + | ||
156 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
157 | + KVM_DEV_RISCV_AIA_CONFIG_IDS, | ||
158 | + &aia_msi_num, true, NULL); | ||
159 | + if (ret < 0) { | ||
160 | + error_report("KVM AIA: failed to set number of msi"); | ||
161 | + exit(1); | ||
162 | + } | ||
163 | + | ||
164 | + socket_bits = find_last_bit(&socket_count, BITS_PER_LONG) + 1; | ||
165 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
166 | + KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS, | ||
167 | + &socket_bits, true, NULL); | ||
168 | + if (ret < 0) { | ||
169 | + error_report("KVM AIA: failed to set group_bits"); | ||
170 | + exit(1); | ||
171 | + } | ||
172 | + | ||
173 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
174 | + KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT, | ||
175 | + &group_shift, true, NULL); | ||
176 | + if (ret < 0) { | ||
177 | + error_report("KVM AIA: failed to set group_shift"); | ||
178 | + exit(1); | ||
179 | + } | ||
180 | + | ||
181 | + guest_bits = guest_num == 0 ? 0 : | ||
182 | + find_last_bit(&guest_num, BITS_PER_LONG) + 1; | ||
183 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
184 | + KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS, | ||
185 | + &guest_bits, true, NULL); | ||
186 | + if (ret < 0) { | ||
187 | + error_report("KVM AIA: failed to set guest_bits"); | ||
188 | + exit(1); | ||
189 | + } | ||
190 | + | ||
191 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR, | ||
192 | + KVM_DEV_RISCV_AIA_ADDR_APLIC, | ||
193 | + &aplic_base, true, NULL); | ||
194 | + if (ret < 0) { | ||
195 | + error_report("KVM AIA: failed to set the base address of APLIC"); | ||
196 | + exit(1); | ||
197 | + } | ||
198 | + | ||
199 | + for (socket = 0; socket < socket_count; socket++) { | ||
200 | + socket_imsic_base = imsic_base + socket * (1U << group_shift); | ||
201 | + hart_count = riscv_socket_hart_count(machine, socket); | ||
202 | + base_hart = riscv_socket_first_hartid(machine, socket); | ||
203 | + | ||
204 | + if (max_hart_per_socket < hart_count) { | ||
205 | + max_hart_per_socket = hart_count; | ||
206 | + } | ||
207 | + | ||
208 | + for (i = 0; i < hart_count; i++) { | ||
209 | + imsic_addr = socket_imsic_base + i * IMSIC_HART_SIZE(guest_bits); | ||
210 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR, | ||
211 | + KVM_DEV_RISCV_AIA_ADDR_IMSIC(i + base_hart), | ||
212 | + &imsic_addr, true, NULL); | ||
213 | + if (ret < 0) { | ||
214 | + error_report("KVM AIA: failed to set the IMSIC address for hart %d", i); | ||
215 | + exit(1); | ||
216 | + } | ||
217 | + } | ||
218 | + } | ||
219 | + | ||
220 | + hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1; | ||
221 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
222 | + KVM_DEV_RISCV_AIA_CONFIG_HART_BITS, | ||
223 | + &hart_bits, true, NULL); | ||
224 | + if (ret < 0) { | ||
225 | + error_report("KVM AIA: failed to set hart_bits"); | ||
226 | + exit(1); | ||
227 | + } | ||
228 | + | ||
229 | + if (kvm_has_gsi_routing()) { | ||
230 | + for (uint64_t idx = 0; idx < aia_irq_num + 1; ++idx) { | ||
231 | + /* KVM AIA only has one APLIC instance */ | ||
232 | + kvm_irqchip_add_irq_route(kvm_state, idx, 0, idx); | ||
233 | + } | ||
234 | + kvm_gsi_routing_allowed = true; | ||
235 | + kvm_irqchip_commit_routes(kvm_state); | ||
236 | + } | ||
237 | + | ||
238 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CTRL, | ||
239 | + KVM_DEV_RISCV_AIA_CTRL_INIT, | ||
240 | + NULL, true, NULL); | ||
241 | + if (ret < 0) { | ||
242 | + error_report("KVM AIA: initialized fail"); | ||
243 | + exit(1); | ||
244 | + } | ||
245 | + | ||
246 | + kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled(); | ||
52 | } | 247 | } |
53 | |||
54 | GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) | ||
55 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
56 | CPURISCVState *env, uint32_t desc) \ | ||
57 | { \ | ||
58 | uint32_t vl = env->vl; \ | ||
59 | + uint32_t esz = sizeof(ETYPE); \ | ||
60 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
61 | + uint32_t vta = vext_vta(desc); \ | ||
62 | uint32_t i; \ | ||
63 | \ | ||
64 | for (i = env->vstart; i < vl; i++) { \ | ||
65 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
66 | *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ | ||
67 | } \ | ||
68 | env->vstart = 0; \ | ||
69 | + /* set tail elements to 1s */ \ | ||
70 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
71 | } | ||
72 | |||
73 | GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) | ||
74 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
75 | void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
76 | { \ | ||
77 | uint32_t vl = env->vl; \ | ||
78 | + uint32_t esz = sizeof(ETYPE); \ | ||
79 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
80 | + uint32_t vta = vext_vta(desc); \ | ||
81 | uint32_t i; \ | ||
82 | \ | ||
83 | for (i = env->vstart; i < vl; i++) { \ | ||
84 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
85 | *((ETYPE *)vd + H(i)) = d; \ | ||
86 | } \ | ||
87 | env->vstart = 0; \ | ||
88 | + /* set tail elements to 1s */ \ | ||
89 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
90 | } | ||
91 | |||
92 | GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) | ||
93 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
96 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
97 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) | ||
98 | vext_check_isa_ill(s) && | ||
99 | /* vmv.v.v has rs2 = 0 and vm = 1 */ | ||
100 | vext_check_sss(s, a->rd, a->rs1, 0, 1)) { | ||
101 | - if (s->vl_eq_vlmax) { | ||
102 | + if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
103 | tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd), | ||
104 | vreg_ofs(s, a->rs1), | ||
105 | MAXSZ(s), MAXSZ(s)); | ||
106 | } else { | ||
107 | uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); | ||
108 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
109 | static gen_helper_gvec_2_ptr * const fns[4] = { | ||
110 | gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, | ||
111 | gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, | ||
112 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) | ||
113 | |||
114 | s1 = get_gpr(s, a->rs1, EXT_SIGN); | ||
115 | |||
116 | - if (s->vl_eq_vlmax) { | ||
117 | + if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
118 | tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), | ||
119 | MAXSZ(s), MAXSZ(s), s1); | ||
120 | } else { | ||
121 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) | ||
122 | TCGv_i64 s1_i64 = tcg_temp_new_i64(); | ||
123 | TCGv_ptr dest = tcg_temp_new_ptr(); | ||
124 | uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); | ||
125 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
126 | static gen_helper_vmv_vx * const fns[4] = { | ||
127 | gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, | ||
128 | gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, | ||
129 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) | ||
130 | /* vmv.v.i has rs2 = 0 and vm = 1 */ | ||
131 | vext_check_ss(s, a->rd, 0, 1)) { | ||
132 | int64_t simm = sextract64(a->rs1, 0, 5); | ||
133 | - if (s->vl_eq_vlmax) { | ||
134 | + if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
135 | tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd), | ||
136 | MAXSZ(s), MAXSZ(s), simm); | ||
137 | mark_vs_dirty(s); | ||
138 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) | ||
139 | TCGv_i64 s1; | ||
140 | TCGv_ptr dest; | ||
141 | uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); | ||
142 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
143 | static gen_helper_vmv_vx * const fns[4] = { | ||
144 | gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, | ||
145 | gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, | ||
146 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) | ||
147 | |||
148 | TCGv_i64 t1; | ||
149 | |||
150 | - if (s->vl_eq_vlmax) { | ||
151 | + if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
152 | t1 = tcg_temp_new_i64(); | ||
153 | /* NaN-box f[rs1] */ | ||
154 | do_nanbox(s, t1, cpu_fpr[a->rs1]); | ||
155 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) | ||
156 | TCGv_ptr dest; | ||
157 | TCGv_i32 desc; | ||
158 | uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); | ||
159 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
160 | static gen_helper_vmv_vx * const fns[3] = { | ||
161 | gen_helper_vmv_v_x_h, | ||
162 | gen_helper_vmv_v_x_w, | ||
163 | -- | 248 | -- |
164 | 2.36.1 | 249 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | `vmadc` and `vmsbc` produces a mask value, they always operate with | 3 | KVM AIA can't emulate APLIC only. When "aia=aplic" parameter is passed, |
4 | a tail agnostic policy. | 4 | APLIC devices is emulated by QEMU. For "aia=aplic-imsic", remove the |
5 | mmio operations of APLIC when using KVM AIA and send wired interrupt | ||
6 | signal via KVM_IRQ_LINE API. | ||
7 | After KVM AIA enabled, MSI messages are delivered by KVM_SIGNAL_MSI API | ||
8 | when the IMSICs receive mmio write requests. | ||
5 | 9 | ||
6 | Signed-off-by: eop Chen <eop.chen@sifive.com> | 10 | Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
7 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | 11 | Reviewed-by: Jim Shu <jim.shu@sifive.com> |
8 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 12 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
9 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | 13 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> |
10 | Message-Id: <165449614532.19704.7000832880482980398-7@git.sr.ht> | 14 | Message-ID: <20230727102439.22554-5-yongxuan.wang@sifive.com> |
11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 15 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
12 | --- | 16 | --- |
13 | target/riscv/internals.h | 5 +- | 17 | hw/intc/riscv_aplic.c | 56 ++++++++++++++++++++++++++++++------------- |
14 | target/riscv/vector_helper.c | 314 +++++++++++++----------- | 18 | hw/intc/riscv_imsic.c | 25 +++++++++++++++---- |
15 | target/riscv/insn_trans/trans_rvv.c.inc | 13 +- | 19 | 2 files changed, 61 insertions(+), 20 deletions(-) |
16 | 3 files changed, 190 insertions(+), 142 deletions(-) | ||
17 | 20 | ||
18 | diff --git a/target/riscv/internals.h b/target/riscv/internals.h | 21 | diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c |
19 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/target/riscv/internals.h | 23 | --- a/hw/intc/riscv_aplic.c |
21 | +++ b/target/riscv/internals.h | 24 | +++ b/hw/intc/riscv_aplic.c |
22 | @@ -XXX,XX +XXX,XX @@ | 25 | @@ -XXX,XX +XXX,XX @@ |
23 | FIELD(VDATA, VM, 0, 1) | 26 | #include "hw/irq.h" |
24 | FIELD(VDATA, LMUL, 1, 3) | 27 | #include "target/riscv/cpu.h" |
25 | FIELD(VDATA, VTA, 4, 1) | 28 | #include "sysemu/sysemu.h" |
26 | -FIELD(VDATA, NF, 5, 4) | 29 | +#include "sysemu/kvm.h" |
27 | -FIELD(VDATA, WD, 5, 1) | 30 | #include "migration/vmstate.h" |
28 | +FIELD(VDATA, VTA_ALL_1S, 5, 1) | 31 | |
29 | +FIELD(VDATA, NF, 6, 4) | 32 | #define APLIC_MAX_IDC (1UL << 14) |
30 | +FIELD(VDATA, WD, 6, 1) | 33 | @@ -XXX,XX +XXX,XX @@ |
31 | 34 | ||
32 | /* float point classify helpers */ | 35 | #define APLIC_IDC_CLAIMI 0x1c |
33 | target_ulong fclass_h(uint64_t frs1); | 36 | |
34 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 37 | +/* |
35 | index XXXXXXX..XXXXXXX 100644 | 38 | + * KVM AIA only supports APLIC MSI, fallback to QEMU emulation if we want to use |
36 | --- a/target/riscv/vector_helper.c | 39 | + * APLIC Wired. |
37 | +++ b/target/riscv/vector_helper.c | 40 | + */ |
38 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_vta(uint32_t desc) | 41 | +static bool is_kvm_aia(bool msimode) |
39 | return FIELD_EX32(simd_data(desc), VDATA, VTA); | ||
40 | } | ||
41 | |||
42 | +static inline uint32_t vext_vta_all_1s(uint32_t desc) | ||
43 | +{ | 42 | +{ |
44 | + return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); | 43 | + return kvm_irqchip_in_kernel() && msimode; |
45 | +} | 44 | +} |
46 | + | 45 | + |
47 | /* | 46 | static uint32_t riscv_aplic_read_input_word(RISCVAPLICState *aplic, |
48 | * Get the maximum number of elements can be operated. | 47 | uint32_t word) |
49 | * | ||
50 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) | ||
51 | |||
52 | static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, | ||
53 | CPURISCVState *env, uint32_t desc, | ||
54 | - opivx2_fn fn) | ||
55 | + opivx2_fn fn, uint32_t esz) | ||
56 | { | 48 | { |
57 | uint32_t vm = vext_vm(desc); | 49 | @@ -XXX,XX +XXX,XX @@ static uint32_t riscv_aplic_idc_claimi(RISCVAPLICState *aplic, uint32_t idc) |
58 | uint32_t vl = env->vl; | 50 | return topi; |
59 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
60 | + uint32_t vta = vext_vta(desc); | ||
61 | uint32_t i; | ||
62 | |||
63 | for (i = env->vstart; i < vl; i++) { | ||
64 | @@ -XXX,XX +XXX,XX @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, | ||
65 | fn(vd, s1, vs2, i); | ||
66 | } | ||
67 | env->vstart = 0; | ||
68 | + /* set tail elements to 1s */ | ||
69 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); | ||
70 | } | 51 | } |
71 | 52 | ||
72 | /* generate the helpers for OPIVX */ | 53 | +static void riscv_kvm_aplic_request(void *opaque, int irq, int level) |
73 | -#define GEN_VEXT_VX(NAME) \ | 54 | +{ |
74 | +#define GEN_VEXT_VX(NAME, ESZ) \ | 55 | + kvm_set_irq(kvm_state, irq, !!level); |
75 | void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
76 | void *vs2, CPURISCVState *env, \ | ||
77 | uint32_t desc) \ | ||
78 | { \ | ||
79 | do_vext_vx(vd, v0, s1, vs2, env, desc, \ | ||
80 | - do_##NAME); \ | ||
81 | -} | ||
82 | - | ||
83 | -GEN_VEXT_VX(vadd_vx_b) | ||
84 | -GEN_VEXT_VX(vadd_vx_h) | ||
85 | -GEN_VEXT_VX(vadd_vx_w) | ||
86 | -GEN_VEXT_VX(vadd_vx_d) | ||
87 | -GEN_VEXT_VX(vsub_vx_b) | ||
88 | -GEN_VEXT_VX(vsub_vx_h) | ||
89 | -GEN_VEXT_VX(vsub_vx_w) | ||
90 | -GEN_VEXT_VX(vsub_vx_d) | ||
91 | -GEN_VEXT_VX(vrsub_vx_b) | ||
92 | -GEN_VEXT_VX(vrsub_vx_h) | ||
93 | -GEN_VEXT_VX(vrsub_vx_w) | ||
94 | -GEN_VEXT_VX(vrsub_vx_d) | ||
95 | + do_##NAME, ESZ); \ | ||
96 | +} | 56 | +} |
97 | + | 57 | + |
98 | +GEN_VEXT_VX(vadd_vx_b, 1) | 58 | static void riscv_aplic_request(void *opaque, int irq, int level) |
99 | +GEN_VEXT_VX(vadd_vx_h, 2) | ||
100 | +GEN_VEXT_VX(vadd_vx_w, 4) | ||
101 | +GEN_VEXT_VX(vadd_vx_d, 8) | ||
102 | +GEN_VEXT_VX(vsub_vx_b, 1) | ||
103 | +GEN_VEXT_VX(vsub_vx_h, 2) | ||
104 | +GEN_VEXT_VX(vsub_vx_w, 4) | ||
105 | +GEN_VEXT_VX(vsub_vx_d, 8) | ||
106 | +GEN_VEXT_VX(vrsub_vx_b, 1) | ||
107 | +GEN_VEXT_VX(vrsub_vx_h, 2) | ||
108 | +GEN_VEXT_VX(vrsub_vx_w, 4) | ||
109 | +GEN_VEXT_VX(vrsub_vx_d, 8) | ||
110 | |||
111 | void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) | ||
112 | { | 59 | { |
113 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) | 60 | bool update = false; |
114 | RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) | 61 | @@ -XXX,XX +XXX,XX @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp) |
115 | RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) | 62 | uint32_t i; |
116 | RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) | 63 | RISCVAPLICState *aplic = RISCV_APLIC(dev); |
117 | -GEN_VEXT_VX(vwaddu_vx_b) | 64 | |
118 | -GEN_VEXT_VX(vwaddu_vx_h) | 65 | - aplic->bitfield_words = (aplic->num_irqs + 31) >> 5; |
119 | -GEN_VEXT_VX(vwaddu_vx_w) | 66 | - aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs); |
120 | -GEN_VEXT_VX(vwsubu_vx_b) | 67 | - aplic->state = g_new0(uint32_t, aplic->num_irqs); |
121 | -GEN_VEXT_VX(vwsubu_vx_h) | 68 | - aplic->target = g_new0(uint32_t, aplic->num_irqs); |
122 | -GEN_VEXT_VX(vwsubu_vx_w) | 69 | - if (!aplic->msimode) { |
123 | -GEN_VEXT_VX(vwadd_vx_b) | 70 | - for (i = 0; i < aplic->num_irqs; i++) { |
124 | -GEN_VEXT_VX(vwadd_vx_h) | 71 | - aplic->target[i] = 1; |
125 | -GEN_VEXT_VX(vwadd_vx_w) | 72 | + if (!is_kvm_aia(aplic->msimode)) { |
126 | -GEN_VEXT_VX(vwsub_vx_b) | 73 | + aplic->bitfield_words = (aplic->num_irqs + 31) >> 5; |
127 | -GEN_VEXT_VX(vwsub_vx_h) | 74 | + aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs); |
128 | -GEN_VEXT_VX(vwsub_vx_w) | 75 | + aplic->state = g_new0(uint32_t, aplic->num_irqs); |
129 | -GEN_VEXT_VX(vwaddu_wx_b) | 76 | + aplic->target = g_new0(uint32_t, aplic->num_irqs); |
130 | -GEN_VEXT_VX(vwaddu_wx_h) | 77 | + if (!aplic->msimode) { |
131 | -GEN_VEXT_VX(vwaddu_wx_w) | 78 | + for (i = 0; i < aplic->num_irqs; i++) { |
132 | -GEN_VEXT_VX(vwsubu_wx_b) | 79 | + aplic->target[i] = 1; |
133 | -GEN_VEXT_VX(vwsubu_wx_h) | 80 | + } |
134 | -GEN_VEXT_VX(vwsubu_wx_w) | 81 | } |
135 | -GEN_VEXT_VX(vwadd_wx_b) | 82 | - } |
136 | -GEN_VEXT_VX(vwadd_wx_h) | 83 | - aplic->idelivery = g_new0(uint32_t, aplic->num_harts); |
137 | -GEN_VEXT_VX(vwadd_wx_w) | 84 | - aplic->iforce = g_new0(uint32_t, aplic->num_harts); |
138 | -GEN_VEXT_VX(vwsub_wx_b) | 85 | - aplic->ithreshold = g_new0(uint32_t, aplic->num_harts); |
139 | -GEN_VEXT_VX(vwsub_wx_h) | 86 | + aplic->idelivery = g_new0(uint32_t, aplic->num_harts); |
140 | -GEN_VEXT_VX(vwsub_wx_w) | 87 | + aplic->iforce = g_new0(uint32_t, aplic->num_harts); |
141 | +GEN_VEXT_VX(vwaddu_vx_b, 2) | 88 | + aplic->ithreshold = g_new0(uint32_t, aplic->num_harts); |
142 | +GEN_VEXT_VX(vwaddu_vx_h, 4) | 89 | |
143 | +GEN_VEXT_VX(vwaddu_vx_w, 8) | 90 | - memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops, aplic, |
144 | +GEN_VEXT_VX(vwsubu_vx_b, 2) | 91 | - TYPE_RISCV_APLIC, aplic->aperture_size); |
145 | +GEN_VEXT_VX(vwsubu_vx_h, 4) | 92 | - sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio); |
146 | +GEN_VEXT_VX(vwsubu_vx_w, 8) | 93 | + memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops, |
147 | +GEN_VEXT_VX(vwadd_vx_b, 2) | 94 | + aplic, TYPE_RISCV_APLIC, aplic->aperture_size); |
148 | +GEN_VEXT_VX(vwadd_vx_h, 4) | 95 | + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio); |
149 | +GEN_VEXT_VX(vwadd_vx_w, 8) | 96 | + } |
150 | +GEN_VEXT_VX(vwsub_vx_b, 2) | 97 | |
151 | +GEN_VEXT_VX(vwsub_vx_h, 4) | 98 | /* |
152 | +GEN_VEXT_VX(vwsub_vx_w, 8) | 99 | * Only root APLICs have hardware IRQ lines. All non-root APLICs |
153 | +GEN_VEXT_VX(vwaddu_wx_b, 2) | 100 | * have IRQ lines delegated by their parent APLIC. |
154 | +GEN_VEXT_VX(vwaddu_wx_h, 4) | 101 | */ |
155 | +GEN_VEXT_VX(vwaddu_wx_w, 8) | 102 | if (!aplic->parent) { |
156 | +GEN_VEXT_VX(vwsubu_wx_b, 2) | 103 | - qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs); |
157 | +GEN_VEXT_VX(vwsubu_wx_h, 4) | 104 | + if (is_kvm_aia(aplic->msimode)) { |
158 | +GEN_VEXT_VX(vwsubu_wx_w, 8) | 105 | + qdev_init_gpio_in(dev, riscv_kvm_aplic_request, aplic->num_irqs); |
159 | +GEN_VEXT_VX(vwadd_wx_b, 2) | 106 | + } else { |
160 | +GEN_VEXT_VX(vwadd_wx_h, 4) | 107 | + qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs); |
161 | +GEN_VEXT_VX(vwadd_wx_w, 8) | 108 | + } |
162 | +GEN_VEXT_VX(vwsub_wx_b, 2) | 109 | } |
163 | +GEN_VEXT_VX(vwsub_wx_h, 4) | 110 | |
164 | +GEN_VEXT_VX(vwsub_wx_w, 8) | 111 | /* Create output IRQ lines for non-MSI mode */ |
165 | 112 | @@ -XXX,XX +XXX,XX @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size, | |
166 | /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ | 113 | qdev_prop_set_bit(dev, "mmode", mmode); |
167 | #define DO_VADC(N, M, C) (N + M + C) | 114 | |
168 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | 115 | sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); |
169 | CPURISCVState *env, uint32_t desc) \ | 116 | - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); |
170 | { \ | 117 | + |
171 | uint32_t vl = env->vl; \ | 118 | + if (!is_kvm_aia(msimode)) { |
172 | + uint32_t esz = sizeof(ETYPE); \ | 119 | + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); |
173 | + uint32_t total_elems = \ | 120 | + } |
174 | + vext_get_total_elems(env, desc, esz); \ | 121 | |
175 | + uint32_t vta = vext_vta(desc); \ | 122 | if (parent) { |
176 | uint32_t i; \ | 123 | riscv_aplic_add_child(parent, dev); |
177 | \ | 124 | diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c |
178 | for (i = env->vstart; i < vl; i++) { \ | ||
179 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
180 | *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ | ||
181 | } \ | ||
182 | env->vstart = 0; \ | ||
183 | + /* set tail elements to 1s */ \ | ||
184 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
185 | } | ||
186 | |||
187 | GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) | ||
188 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
189 | CPURISCVState *env, uint32_t desc) \ | ||
190 | { \ | ||
191 | uint32_t vl = env->vl; \ | ||
192 | + uint32_t esz = sizeof(ETYPE); \ | ||
193 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
194 | + uint32_t vta = vext_vta(desc); \ | ||
195 | uint32_t i; \ | ||
196 | \ | ||
197 | for (i = env->vstart; i < vl; i++) { \ | ||
198 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
199 | *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ | ||
200 | } \ | ||
201 | env->vstart = 0; \ | ||
202 | + /* set tail elements to 1s */ \ | ||
203 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
204 | } | ||
205 | |||
206 | GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) | ||
207 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
208 | { \ | ||
209 | uint32_t vl = env->vl; \ | ||
210 | uint32_t vm = vext_vm(desc); \ | ||
211 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ | ||
212 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ | ||
213 | uint32_t i; \ | ||
214 | \ | ||
215 | for (i = env->vstart; i < vl; i++) { \ | ||
216 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
217 | vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ | ||
218 | } \ | ||
219 | env->vstart = 0; \ | ||
220 | + /* mask destination register are always tail-agnostic */ \ | ||
221 | + /* set tail elements to 1s */ \ | ||
222 | + if (vta_all_1s) { \ | ||
223 | + for (; i < total_elems; i++) { \ | ||
224 | + vext_set_elem_mask(vd, i, 1); \ | ||
225 | + } \ | ||
226 | + } \ | ||
227 | } | ||
228 | |||
229 | GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) | ||
230 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
231 | { \ | ||
232 | uint32_t vl = env->vl; \ | ||
233 | uint32_t vm = vext_vm(desc); \ | ||
234 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ | ||
235 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ | ||
236 | uint32_t i; \ | ||
237 | \ | ||
238 | for (i = env->vstart; i < vl; i++) { \ | ||
239 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
240 | DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ | ||
241 | } \ | ||
242 | env->vstart = 0; \ | ||
243 | + /* mask destination register are always tail-agnostic */ \ | ||
244 | + /* set tail elements to 1s */ \ | ||
245 | + if (vta_all_1s) { \ | ||
246 | + for (; i < total_elems; i++) { \ | ||
247 | + vext_set_elem_mask(vd, i, 1); \ | ||
248 | + } \ | ||
249 | + } \ | ||
250 | } | ||
251 | |||
252 | GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) | ||
253 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) | ||
254 | RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) | ||
255 | RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) | ||
256 | RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) | ||
257 | -GEN_VEXT_VX(vand_vx_b) | ||
258 | -GEN_VEXT_VX(vand_vx_h) | ||
259 | -GEN_VEXT_VX(vand_vx_w) | ||
260 | -GEN_VEXT_VX(vand_vx_d) | ||
261 | -GEN_VEXT_VX(vor_vx_b) | ||
262 | -GEN_VEXT_VX(vor_vx_h) | ||
263 | -GEN_VEXT_VX(vor_vx_w) | ||
264 | -GEN_VEXT_VX(vor_vx_d) | ||
265 | -GEN_VEXT_VX(vxor_vx_b) | ||
266 | -GEN_VEXT_VX(vxor_vx_h) | ||
267 | -GEN_VEXT_VX(vxor_vx_w) | ||
268 | -GEN_VEXT_VX(vxor_vx_d) | ||
269 | +GEN_VEXT_VX(vand_vx_b, 1) | ||
270 | +GEN_VEXT_VX(vand_vx_h, 2) | ||
271 | +GEN_VEXT_VX(vand_vx_w, 4) | ||
272 | +GEN_VEXT_VX(vand_vx_d, 8) | ||
273 | +GEN_VEXT_VX(vor_vx_b, 1) | ||
274 | +GEN_VEXT_VX(vor_vx_h, 2) | ||
275 | +GEN_VEXT_VX(vor_vx_w, 4) | ||
276 | +GEN_VEXT_VX(vor_vx_d, 8) | ||
277 | +GEN_VEXT_VX(vxor_vx_b, 1) | ||
278 | +GEN_VEXT_VX(vxor_vx_h, 2) | ||
279 | +GEN_VEXT_VX(vxor_vx_w, 4) | ||
280 | +GEN_VEXT_VX(vxor_vx_d, 8) | ||
281 | |||
282 | /* Vector Single-Width Bit Shift Instructions */ | ||
283 | #define DO_SLL(N, M) (N << (M)) | ||
284 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) | ||
285 | RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) | ||
286 | RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) | ||
287 | RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) | ||
288 | -GEN_VEXT_VX(vminu_vx_b) | ||
289 | -GEN_VEXT_VX(vminu_vx_h) | ||
290 | -GEN_VEXT_VX(vminu_vx_w) | ||
291 | -GEN_VEXT_VX(vminu_vx_d) | ||
292 | -GEN_VEXT_VX(vmin_vx_b) | ||
293 | -GEN_VEXT_VX(vmin_vx_h) | ||
294 | -GEN_VEXT_VX(vmin_vx_w) | ||
295 | -GEN_VEXT_VX(vmin_vx_d) | ||
296 | -GEN_VEXT_VX(vmaxu_vx_b) | ||
297 | -GEN_VEXT_VX(vmaxu_vx_h) | ||
298 | -GEN_VEXT_VX(vmaxu_vx_w) | ||
299 | -GEN_VEXT_VX(vmaxu_vx_d) | ||
300 | -GEN_VEXT_VX(vmax_vx_b) | ||
301 | -GEN_VEXT_VX(vmax_vx_h) | ||
302 | -GEN_VEXT_VX(vmax_vx_w) | ||
303 | -GEN_VEXT_VX(vmax_vx_d) | ||
304 | +GEN_VEXT_VX(vminu_vx_b, 1) | ||
305 | +GEN_VEXT_VX(vminu_vx_h, 2) | ||
306 | +GEN_VEXT_VX(vminu_vx_w, 4) | ||
307 | +GEN_VEXT_VX(vminu_vx_d, 8) | ||
308 | +GEN_VEXT_VX(vmin_vx_b, 1) | ||
309 | +GEN_VEXT_VX(vmin_vx_h, 2) | ||
310 | +GEN_VEXT_VX(vmin_vx_w, 4) | ||
311 | +GEN_VEXT_VX(vmin_vx_d, 8) | ||
312 | +GEN_VEXT_VX(vmaxu_vx_b, 1) | ||
313 | +GEN_VEXT_VX(vmaxu_vx_h, 2) | ||
314 | +GEN_VEXT_VX(vmaxu_vx_w, 4) | ||
315 | +GEN_VEXT_VX(vmaxu_vx_d, 8) | ||
316 | +GEN_VEXT_VX(vmax_vx_b, 1) | ||
317 | +GEN_VEXT_VX(vmax_vx_h, 2) | ||
318 | +GEN_VEXT_VX(vmax_vx_w, 4) | ||
319 | +GEN_VEXT_VX(vmax_vx_d, 8) | ||
320 | |||
321 | /* Vector Single-Width Integer Multiply Instructions */ | ||
322 | #define DO_MUL(N, M) (N * M) | ||
323 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) | ||
324 | RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) | ||
325 | RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) | ||
326 | RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) | ||
327 | -GEN_VEXT_VX(vmul_vx_b) | ||
328 | -GEN_VEXT_VX(vmul_vx_h) | ||
329 | -GEN_VEXT_VX(vmul_vx_w) | ||
330 | -GEN_VEXT_VX(vmul_vx_d) | ||
331 | -GEN_VEXT_VX(vmulh_vx_b) | ||
332 | -GEN_VEXT_VX(vmulh_vx_h) | ||
333 | -GEN_VEXT_VX(vmulh_vx_w) | ||
334 | -GEN_VEXT_VX(vmulh_vx_d) | ||
335 | -GEN_VEXT_VX(vmulhu_vx_b) | ||
336 | -GEN_VEXT_VX(vmulhu_vx_h) | ||
337 | -GEN_VEXT_VX(vmulhu_vx_w) | ||
338 | -GEN_VEXT_VX(vmulhu_vx_d) | ||
339 | -GEN_VEXT_VX(vmulhsu_vx_b) | ||
340 | -GEN_VEXT_VX(vmulhsu_vx_h) | ||
341 | -GEN_VEXT_VX(vmulhsu_vx_w) | ||
342 | -GEN_VEXT_VX(vmulhsu_vx_d) | ||
343 | +GEN_VEXT_VX(vmul_vx_b, 1) | ||
344 | +GEN_VEXT_VX(vmul_vx_h, 2) | ||
345 | +GEN_VEXT_VX(vmul_vx_w, 4) | ||
346 | +GEN_VEXT_VX(vmul_vx_d, 8) | ||
347 | +GEN_VEXT_VX(vmulh_vx_b, 1) | ||
348 | +GEN_VEXT_VX(vmulh_vx_h, 2) | ||
349 | +GEN_VEXT_VX(vmulh_vx_w, 4) | ||
350 | +GEN_VEXT_VX(vmulh_vx_d, 8) | ||
351 | +GEN_VEXT_VX(vmulhu_vx_b, 1) | ||
352 | +GEN_VEXT_VX(vmulhu_vx_h, 2) | ||
353 | +GEN_VEXT_VX(vmulhu_vx_w, 4) | ||
354 | +GEN_VEXT_VX(vmulhu_vx_d, 8) | ||
355 | +GEN_VEXT_VX(vmulhsu_vx_b, 1) | ||
356 | +GEN_VEXT_VX(vmulhsu_vx_h, 2) | ||
357 | +GEN_VEXT_VX(vmulhsu_vx_w, 4) | ||
358 | +GEN_VEXT_VX(vmulhsu_vx_d, 8) | ||
359 | |||
360 | /* Vector Integer Divide Instructions */ | ||
361 | #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) | ||
362 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) | ||
363 | RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) | ||
364 | RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) | ||
365 | RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) | ||
366 | -GEN_VEXT_VX(vdivu_vx_b) | ||
367 | -GEN_VEXT_VX(vdivu_vx_h) | ||
368 | -GEN_VEXT_VX(vdivu_vx_w) | ||
369 | -GEN_VEXT_VX(vdivu_vx_d) | ||
370 | -GEN_VEXT_VX(vdiv_vx_b) | ||
371 | -GEN_VEXT_VX(vdiv_vx_h) | ||
372 | -GEN_VEXT_VX(vdiv_vx_w) | ||
373 | -GEN_VEXT_VX(vdiv_vx_d) | ||
374 | -GEN_VEXT_VX(vremu_vx_b) | ||
375 | -GEN_VEXT_VX(vremu_vx_h) | ||
376 | -GEN_VEXT_VX(vremu_vx_w) | ||
377 | -GEN_VEXT_VX(vremu_vx_d) | ||
378 | -GEN_VEXT_VX(vrem_vx_b) | ||
379 | -GEN_VEXT_VX(vrem_vx_h) | ||
380 | -GEN_VEXT_VX(vrem_vx_w) | ||
381 | -GEN_VEXT_VX(vrem_vx_d) | ||
382 | +GEN_VEXT_VX(vdivu_vx_b, 1) | ||
383 | +GEN_VEXT_VX(vdivu_vx_h, 2) | ||
384 | +GEN_VEXT_VX(vdivu_vx_w, 4) | ||
385 | +GEN_VEXT_VX(vdivu_vx_d, 8) | ||
386 | +GEN_VEXT_VX(vdiv_vx_b, 1) | ||
387 | +GEN_VEXT_VX(vdiv_vx_h, 2) | ||
388 | +GEN_VEXT_VX(vdiv_vx_w, 4) | ||
389 | +GEN_VEXT_VX(vdiv_vx_d, 8) | ||
390 | +GEN_VEXT_VX(vremu_vx_b, 1) | ||
391 | +GEN_VEXT_VX(vremu_vx_h, 2) | ||
392 | +GEN_VEXT_VX(vremu_vx_w, 4) | ||
393 | +GEN_VEXT_VX(vremu_vx_d, 8) | ||
394 | +GEN_VEXT_VX(vrem_vx_b, 1) | ||
395 | +GEN_VEXT_VX(vrem_vx_h, 2) | ||
396 | +GEN_VEXT_VX(vrem_vx_w, 4) | ||
397 | +GEN_VEXT_VX(vrem_vx_d, 8) | ||
398 | |||
399 | /* Vector Widening Integer Multiply Instructions */ | ||
400 | RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) | ||
401 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) | ||
402 | RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) | ||
403 | RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) | ||
404 | RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) | ||
405 | -GEN_VEXT_VX(vwmul_vx_b) | ||
406 | -GEN_VEXT_VX(vwmul_vx_h) | ||
407 | -GEN_VEXT_VX(vwmul_vx_w) | ||
408 | -GEN_VEXT_VX(vwmulu_vx_b) | ||
409 | -GEN_VEXT_VX(vwmulu_vx_h) | ||
410 | -GEN_VEXT_VX(vwmulu_vx_w) | ||
411 | -GEN_VEXT_VX(vwmulsu_vx_b) | ||
412 | -GEN_VEXT_VX(vwmulsu_vx_h) | ||
413 | -GEN_VEXT_VX(vwmulsu_vx_w) | ||
414 | +GEN_VEXT_VX(vwmul_vx_b, 2) | ||
415 | +GEN_VEXT_VX(vwmul_vx_h, 4) | ||
416 | +GEN_VEXT_VX(vwmul_vx_w, 8) | ||
417 | +GEN_VEXT_VX(vwmulu_vx_b, 2) | ||
418 | +GEN_VEXT_VX(vwmulu_vx_h, 4) | ||
419 | +GEN_VEXT_VX(vwmulu_vx_w, 8) | ||
420 | +GEN_VEXT_VX(vwmulsu_vx_b, 2) | ||
421 | +GEN_VEXT_VX(vwmulsu_vx_h, 4) | ||
422 | +GEN_VEXT_VX(vwmulsu_vx_w, 8) | ||
423 | |||
424 | /* Vector Single-Width Integer Multiply-Add Instructions */ | ||
425 | #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
426 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) | ||
427 | RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) | ||
428 | RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) | ||
429 | RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) | ||
430 | -GEN_VEXT_VX(vmacc_vx_b) | ||
431 | -GEN_VEXT_VX(vmacc_vx_h) | ||
432 | -GEN_VEXT_VX(vmacc_vx_w) | ||
433 | -GEN_VEXT_VX(vmacc_vx_d) | ||
434 | -GEN_VEXT_VX(vnmsac_vx_b) | ||
435 | -GEN_VEXT_VX(vnmsac_vx_h) | ||
436 | -GEN_VEXT_VX(vnmsac_vx_w) | ||
437 | -GEN_VEXT_VX(vnmsac_vx_d) | ||
438 | -GEN_VEXT_VX(vmadd_vx_b) | ||
439 | -GEN_VEXT_VX(vmadd_vx_h) | ||
440 | -GEN_VEXT_VX(vmadd_vx_w) | ||
441 | -GEN_VEXT_VX(vmadd_vx_d) | ||
442 | -GEN_VEXT_VX(vnmsub_vx_b) | ||
443 | -GEN_VEXT_VX(vnmsub_vx_h) | ||
444 | -GEN_VEXT_VX(vnmsub_vx_w) | ||
445 | -GEN_VEXT_VX(vnmsub_vx_d) | ||
446 | +GEN_VEXT_VX(vmacc_vx_b, 1) | ||
447 | +GEN_VEXT_VX(vmacc_vx_h, 2) | ||
448 | +GEN_VEXT_VX(vmacc_vx_w, 4) | ||
449 | +GEN_VEXT_VX(vmacc_vx_d, 8) | ||
450 | +GEN_VEXT_VX(vnmsac_vx_b, 1) | ||
451 | +GEN_VEXT_VX(vnmsac_vx_h, 2) | ||
452 | +GEN_VEXT_VX(vnmsac_vx_w, 4) | ||
453 | +GEN_VEXT_VX(vnmsac_vx_d, 8) | ||
454 | +GEN_VEXT_VX(vmadd_vx_b, 1) | ||
455 | +GEN_VEXT_VX(vmadd_vx_h, 2) | ||
456 | +GEN_VEXT_VX(vmadd_vx_w, 4) | ||
457 | +GEN_VEXT_VX(vmadd_vx_d, 8) | ||
458 | +GEN_VEXT_VX(vnmsub_vx_b, 1) | ||
459 | +GEN_VEXT_VX(vnmsub_vx_h, 2) | ||
460 | +GEN_VEXT_VX(vnmsub_vx_w, 4) | ||
461 | +GEN_VEXT_VX(vnmsub_vx_d, 8) | ||
462 | |||
463 | /* Vector Widening Integer Multiply-Add Instructions */ | ||
464 | RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) | ||
465 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) | ||
466 | RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) | ||
467 | RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) | ||
468 | RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) | ||
469 | -GEN_VEXT_VX(vwmaccu_vx_b) | ||
470 | -GEN_VEXT_VX(vwmaccu_vx_h) | ||
471 | -GEN_VEXT_VX(vwmaccu_vx_w) | ||
472 | -GEN_VEXT_VX(vwmacc_vx_b) | ||
473 | -GEN_VEXT_VX(vwmacc_vx_h) | ||
474 | -GEN_VEXT_VX(vwmacc_vx_w) | ||
475 | -GEN_VEXT_VX(vwmaccsu_vx_b) | ||
476 | -GEN_VEXT_VX(vwmaccsu_vx_h) | ||
477 | -GEN_VEXT_VX(vwmaccsu_vx_w) | ||
478 | -GEN_VEXT_VX(vwmaccus_vx_b) | ||
479 | -GEN_VEXT_VX(vwmaccus_vx_h) | ||
480 | -GEN_VEXT_VX(vwmaccus_vx_w) | ||
481 | +GEN_VEXT_VX(vwmaccu_vx_b, 2) | ||
482 | +GEN_VEXT_VX(vwmaccu_vx_h, 4) | ||
483 | +GEN_VEXT_VX(vwmaccu_vx_w, 8) | ||
484 | +GEN_VEXT_VX(vwmacc_vx_b, 2) | ||
485 | +GEN_VEXT_VX(vwmacc_vx_h, 4) | ||
486 | +GEN_VEXT_VX(vwmacc_vx_w, 8) | ||
487 | +GEN_VEXT_VX(vwmaccsu_vx_b, 2) | ||
488 | +GEN_VEXT_VX(vwmaccsu_vx_h, 4) | ||
489 | +GEN_VEXT_VX(vwmaccsu_vx_w, 8) | ||
490 | +GEN_VEXT_VX(vwmaccus_vx_b, 2) | ||
491 | +GEN_VEXT_VX(vwmaccus_vx_h, 4) | ||
492 | +GEN_VEXT_VX(vwmaccus_vx_w, 8) | ||
493 | |||
494 | /* Vector Integer Merge and Move Instructions */ | ||
495 | #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ | ||
496 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
497 | index XXXXXXX..XXXXXXX 100644 | 125 | index XXXXXXX..XXXXXXX 100644 |
498 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | 126 | --- a/hw/intc/riscv_imsic.c |
499 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | 127 | +++ b/hw/intc/riscv_imsic.c |
500 | @@ -XXX,XX +XXX,XX @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, | 128 | @@ -XXX,XX +XXX,XX @@ |
501 | 129 | #include "target/riscv/cpu.h" | |
502 | data = FIELD_DP32(data, VDATA, VM, vm); | 130 | #include "target/riscv/cpu_bits.h" |
503 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | 131 | #include "sysemu/sysemu.h" |
504 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | 132 | +#include "sysemu/kvm.h" |
505 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); | 133 | #include "migration/vmstate.h" |
506 | desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8, | 134 | |
507 | s->cfg_ptr->vlen / 8, data)); | 135 | #define IMSIC_MMIO_PAGE_LE 0x00 |
508 | 136 | @@ -XXX,XX +XXX,XX @@ static void riscv_imsic_write(void *opaque, hwaddr addr, uint64_t value, | |
509 | @@ -XXX,XX +XXX,XX @@ do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, | 137 | goto err; |
510 | return false; | ||
511 | } | 138 | } |
512 | 139 | ||
513 | - if (a->vm && s->vl_eq_vlmax) { | 140 | +#if defined(CONFIG_KVM) |
514 | + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | 141 | + if (kvm_irqchip_in_kernel()) { |
515 | TCGv_i64 src1 = tcg_temp_new_i64(); | 142 | + struct kvm_msi msi; |
516 | 143 | + | |
517 | tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN)); | 144 | + msi.address_lo = extract64(imsic->mmio.addr + addr, 0, 32); |
518 | @@ -XXX,XX +XXX,XX @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, | 145 | + msi.address_hi = extract64(imsic->mmio.addr + addr, 32, 32); |
519 | 146 | + msi.data = le32_to_cpu(value); | |
520 | data = FIELD_DP32(data, VDATA, VM, vm); | 147 | + |
521 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | 148 | + kvm_vm_ioctl(kvm_state, KVM_SIGNAL_MSI, &msi); |
522 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | 149 | + |
523 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); | 150 | + return; |
524 | desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8, | 151 | + } |
525 | s->cfg_ptr->vlen / 8, data)); | 152 | +#endif |
526 | 153 | + | |
527 | @@ -XXX,XX +XXX,XX @@ do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, | 154 | /* Writes only supported for MSI little-endian registers */ |
528 | return false; | 155 | page = addr >> IMSIC_MMIO_PAGE_SHIFT; |
529 | } | 156 | if ((addr & (IMSIC_MMIO_PAGE_SZ - 1)) == IMSIC_MMIO_PAGE_LE) { |
530 | 157 | @@ -XXX,XX +XXX,XX @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp) | |
531 | - if (a->vm && s->vl_eq_vlmax) { | 158 | CPUState *cpu = cpu_by_arch_id(imsic->hartid); |
532 | + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | 159 | CPURISCVState *env = cpu ? cpu->env_ptr : NULL; |
533 | gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), | 160 | |
534 | extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s)); | 161 | - imsic->num_eistate = imsic->num_pages * imsic->num_irqs; |
535 | mark_vs_dirty(s); | 162 | - imsic->eidelivery = g_new0(uint32_t, imsic->num_pages); |
536 | @@ -XXX,XX +XXX,XX @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, | 163 | - imsic->eithreshold = g_new0(uint32_t, imsic->num_pages); |
537 | 164 | - imsic->eistate = g_new0(uint32_t, imsic->num_eistate); | |
538 | data = FIELD_DP32(data, VDATA, VM, a->vm); | 165 | + if (!kvm_irqchip_in_kernel()) { |
539 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | 166 | + imsic->num_eistate = imsic->num_pages * imsic->num_irqs; |
540 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | 167 | + imsic->eidelivery = g_new0(uint32_t, imsic->num_pages); |
541 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | 168 | + imsic->eithreshold = g_new0(uint32_t, imsic->num_pages); |
542 | vreg_ofs(s, a->rs1), | 169 | + imsic->eistate = g_new0(uint32_t, imsic->num_eistate); |
543 | vreg_ofs(s, a->rs2), | 170 | + } |
544 | @@ -XXX,XX +XXX,XX @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, | 171 | |
545 | 172 | memory_region_init_io(&imsic->mmio, OBJECT(dev), &riscv_imsic_ops, | |
546 | data = FIELD_DP32(data, VDATA, VM, a->vm); | 173 | imsic, TYPE_RISCV_IMSIC, |
547 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
548 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
549 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
550 | vreg_ofs(s, a->rs1), | ||
551 | vreg_ofs(s, a->rs2), | ||
552 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
553 | \ | ||
554 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
555 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
556 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
557 | + data = \ | ||
558 | + FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ | ||
559 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
560 | vreg_ofs(s, a->rs1), \ | ||
561 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
562 | -- | 174 | -- |
563 | 2.36.1 | 175 | 2.41.0 | diff view generated by jsdifflib |
1 | From: Atish Patra <atishp@rivosinc.com> | 1 | From: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | fw_cfg DT node is generated after the create_fdt without any check | 3 | Select KVM AIA when the host kernel has in-kernel AIA chip support. |
4 | if the DT is being loaded from the commandline. This results in | 4 | Since KVM AIA only has one APLIC instance, we map the QEMU APLIC |
5 | FDT_ERR_EXISTS error if dtb is loaded from the commandline. | 5 | devices to KVM APLIC. |
6 | 6 | ||
7 | Generate fw_cfg node only if the DT is not loaded from the commandline. | 7 | Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
8 | 8 | Reviewed-by: Jim Shu <jim.shu@sifive.com> | |
9 | Signed-off-by: Atish Patra <atishp@rivosinc.com> | 9 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
10 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> |
11 | Message-Id: <20220526203500.847165-1-atishp@rivosinc.com> | 11 | Message-ID: <20230727102439.22554-6-yongxuan.wang@sifive.com> |
12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
13 | --- | 13 | --- |
14 | hw/riscv/virt.c | 28 ++++++++++++++++++---------- | 14 | hw/riscv/virt.c | 94 +++++++++++++++++++++++++++++++++---------------- |
15 | 1 file changed, 18 insertions(+), 10 deletions(-) | 15 | 1 file changed, 63 insertions(+), 31 deletions(-) |
16 | 16 | ||
17 | diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c | 17 | diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c |
18 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/hw/riscv/virt.c | 19 | --- a/hw/riscv/virt.c |
20 | +++ b/hw/riscv/virt.c | 20 | +++ b/hw/riscv/virt.c |
21 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap) | 21 | @@ -XXX,XX +XXX,XX @@ |
22 | g_free(name); | 22 | #include "hw/riscv/virt.h" |
23 | } | 23 | #include "hw/riscv/boot.h" |
24 | 24 | #include "hw/riscv/numa.h" | |
25 | +static void create_fdt_fw_cfg(RISCVVirtState *s, const MemMapEntry *memmap) | 25 | +#include "kvm_riscv.h" |
26 | #include "hw/intc/riscv_aclint.h" | ||
27 | #include "hw/intc/riscv_aplic.h" | ||
28 | #include "hw/intc/riscv_imsic.h" | ||
29 | @@ -XXX,XX +XXX,XX @@ | ||
30 | #error "Can't accommodate all IMSIC groups in address space" | ||
31 | #endif | ||
32 | |||
33 | +/* KVM AIA only supports APLIC MSI. APLIC Wired is always emulated by QEMU. */ | ||
34 | +static bool virt_use_kvm_aia(RISCVVirtState *s) | ||
26 | +{ | 35 | +{ |
27 | + char *nodename; | 36 | + return kvm_irqchip_in_kernel() && s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC; |
28 | + MachineState *mc = MACHINE(s); | ||
29 | + hwaddr base = memmap[VIRT_FW_CFG].base; | ||
30 | + hwaddr size = memmap[VIRT_FW_CFG].size; | ||
31 | + | ||
32 | + nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); | ||
33 | + qemu_fdt_add_subnode(mc->fdt, nodename); | ||
34 | + qemu_fdt_setprop_string(mc->fdt, nodename, | ||
35 | + "compatible", "qemu,fw-cfg-mmio"); | ||
36 | + qemu_fdt_setprop_sized_cells(mc->fdt, nodename, "reg", | ||
37 | + 2, base, 2, size); | ||
38 | + qemu_fdt_setprop(mc->fdt, nodename, "dma-coherent", NULL, 0); | ||
39 | + g_free(nodename); | ||
40 | +} | 37 | +} |
41 | + | 38 | + |
42 | static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, | 39 | static const MemMapEntry virt_memmap[] = { |
43 | uint64_t mem_size, const char *cmdline, bool is_32_bit) | 40 | [VIRT_DEBUG] = { 0x0, 0x100 }, |
41 | [VIRT_MROM] = { 0x1000, 0xf000 }, | ||
42 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, | ||
43 | uint32_t *intc_phandles, | ||
44 | uint32_t aplic_phandle, | ||
45 | uint32_t aplic_child_phandle, | ||
46 | - bool m_mode) | ||
47 | + bool m_mode, int num_harts) | ||
44 | { | 48 | { |
45 | @@ -XXX,XX +XXX,XX @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, | 49 | int cpu; |
46 | create_fdt_rtc(s, memmap, irq_mmio_phandle); | 50 | char *aplic_name; |
47 | 51 | uint32_t *aplic_cells; | |
48 | create_fdt_flash(s, memmap); | 52 | MachineState *ms = MACHINE(s); |
49 | + create_fdt_fw_cfg(s, memmap); | 53 | |
50 | 54 | - aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); | |
51 | update_bootargs: | 55 | + aplic_cells = g_new0(uint32_t, num_harts * 2); |
52 | if (cmdline && *cmdline) { | 56 | |
53 | @@ -XXX,XX +XXX,XX @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, | 57 | - for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { |
54 | static FWCfgState *create_fw_cfg(const MachineState *mc) | 58 | + for (cpu = 0; cpu < num_harts; cpu++) { |
59 | aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); | ||
60 | aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); | ||
61 | } | ||
62 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, | ||
63 | |||
64 | if (s->aia_type == VIRT_AIA_TYPE_APLIC) { | ||
65 | qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended", | ||
66 | - aplic_cells, | ||
67 | - s->soc[socket].num_harts * sizeof(uint32_t) * 2); | ||
68 | + aplic_cells, num_harts * sizeof(uint32_t) * 2); | ||
69 | } else { | ||
70 | qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle); | ||
71 | } | ||
72 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s, | ||
73 | uint32_t msi_s_phandle, | ||
74 | uint32_t *phandle, | ||
75 | uint32_t *intc_phandles, | ||
76 | - uint32_t *aplic_phandles) | ||
77 | + uint32_t *aplic_phandles, | ||
78 | + int num_harts) | ||
55 | { | 79 | { |
56 | hwaddr base = virt_memmap[VIRT_FW_CFG].base; | 80 | char *aplic_name; |
57 | - hwaddr size = virt_memmap[VIRT_FW_CFG].size; | 81 | unsigned long aplic_addr; |
58 | FWCfgState *fw_cfg; | 82 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s, |
59 | - char *nodename; | 83 | create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size, |
60 | 84 | msi_m_phandle, intc_phandles, | |
61 | fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, | 85 | aplic_m_phandle, aplic_s_phandle, |
62 | &address_space_memory); | 86 | - true); |
63 | fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)mc->smp.cpus); | 87 | + true, num_harts); |
64 | 88 | } | |
65 | - nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); | 89 | |
66 | - qemu_fdt_add_subnode(mc->fdt, nodename); | 90 | /* S-level APLIC node */ |
67 | - qemu_fdt_setprop_string(mc->fdt, nodename, | 91 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s, |
68 | - "compatible", "qemu,fw-cfg-mmio"); | 92 | create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size, |
69 | - qemu_fdt_setprop_sized_cells(mc->fdt, nodename, "reg", | 93 | msi_s_phandle, intc_phandles, |
70 | - 2, base, 2, size); | 94 | aplic_s_phandle, 0, |
71 | - qemu_fdt_setprop(mc->fdt, nodename, "dma-coherent", NULL, 0); | 95 | - false); |
72 | - g_free(nodename); | 96 | + false, num_harts); |
73 | return fw_cfg; | 97 | |
74 | } | 98 | aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); |
75 | 99 | ||
100 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, | ||
101 | *msi_pcie_phandle = msi_s_phandle; | ||
102 | } | ||
103 | |||
104 | - phandle_pos = ms->smp.cpus; | ||
105 | - for (socket = (socket_count - 1); socket >= 0; socket--) { | ||
106 | - phandle_pos -= s->soc[socket].num_harts; | ||
107 | - | ||
108 | - if (s->aia_type == VIRT_AIA_TYPE_NONE) { | ||
109 | - create_fdt_socket_plic(s, memmap, socket, phandle, | ||
110 | - &intc_phandles[phandle_pos], xplic_phandles); | ||
111 | - } else { | ||
112 | - create_fdt_socket_aplic(s, memmap, socket, | ||
113 | - msi_m_phandle, msi_s_phandle, phandle, | ||
114 | - &intc_phandles[phandle_pos], xplic_phandles); | ||
115 | + /* KVM AIA only has one APLIC instance */ | ||
116 | + if (virt_use_kvm_aia(s)) { | ||
117 | + create_fdt_socket_aplic(s, memmap, 0, | ||
118 | + msi_m_phandle, msi_s_phandle, phandle, | ||
119 | + &intc_phandles[0], xplic_phandles, | ||
120 | + ms->smp.cpus); | ||
121 | + } else { | ||
122 | + phandle_pos = ms->smp.cpus; | ||
123 | + for (socket = (socket_count - 1); socket >= 0; socket--) { | ||
124 | + phandle_pos -= s->soc[socket].num_harts; | ||
125 | + | ||
126 | + if (s->aia_type == VIRT_AIA_TYPE_NONE) { | ||
127 | + create_fdt_socket_plic(s, memmap, socket, phandle, | ||
128 | + &intc_phandles[phandle_pos], | ||
129 | + xplic_phandles); | ||
130 | + } else { | ||
131 | + create_fdt_socket_aplic(s, memmap, socket, | ||
132 | + msi_m_phandle, msi_s_phandle, phandle, | ||
133 | + &intc_phandles[phandle_pos], | ||
134 | + xplic_phandles, | ||
135 | + s->soc[socket].num_harts); | ||
136 | + } | ||
137 | } | ||
138 | } | ||
139 | |||
140 | g_free(intc_phandles); | ||
141 | |||
142 | - for (socket = 0; socket < socket_count; socket++) { | ||
143 | - if (socket == 0) { | ||
144 | - *irq_mmio_phandle = xplic_phandles[socket]; | ||
145 | - *irq_virtio_phandle = xplic_phandles[socket]; | ||
146 | - *irq_pcie_phandle = xplic_phandles[socket]; | ||
147 | - } | ||
148 | - if (socket == 1) { | ||
149 | - *irq_virtio_phandle = xplic_phandles[socket]; | ||
150 | - *irq_pcie_phandle = xplic_phandles[socket]; | ||
151 | - } | ||
152 | - if (socket == 2) { | ||
153 | - *irq_pcie_phandle = xplic_phandles[socket]; | ||
154 | + if (virt_use_kvm_aia(s)) { | ||
155 | + *irq_mmio_phandle = xplic_phandles[0]; | ||
156 | + *irq_virtio_phandle = xplic_phandles[0]; | ||
157 | + *irq_pcie_phandle = xplic_phandles[0]; | ||
158 | + } else { | ||
159 | + for (socket = 0; socket < socket_count; socket++) { | ||
160 | + if (socket == 0) { | ||
161 | + *irq_mmio_phandle = xplic_phandles[socket]; | ||
162 | + *irq_virtio_phandle = xplic_phandles[socket]; | ||
163 | + *irq_pcie_phandle = xplic_phandles[socket]; | ||
164 | + } | ||
165 | + if (socket == 1) { | ||
166 | + *irq_virtio_phandle = xplic_phandles[socket]; | ||
167 | + *irq_pcie_phandle = xplic_phandles[socket]; | ||
168 | + } | ||
169 | + if (socket == 2) { | ||
170 | + *irq_pcie_phandle = xplic_phandles[socket]; | ||
171 | + } | ||
172 | } | ||
173 | } | ||
174 | |||
175 | @@ -XXX,XX +XXX,XX @@ static void virt_machine_init(MachineState *machine) | ||
176 | } | ||
177 | } | ||
178 | |||
179 | + if (virt_use_kvm_aia(s)) { | ||
180 | + kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT, | ||
181 | + VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS, | ||
182 | + memmap[VIRT_APLIC_S].base, | ||
183 | + memmap[VIRT_IMSIC_S].base, | ||
184 | + s->aia_guests); | ||
185 | + } | ||
186 | + | ||
187 | if (riscv_is_32bit(&s->soc[0])) { | ||
188 | #if HOST_LONG_BITS == 64 | ||
189 | /* limit RAM size in a 32-bit system */ | ||
76 | -- | 190 | -- |
77 | 2.36.1 | 191 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Conor Dooley <conor.dooley@microchip.com> | ||
1 | 2 | ||
3 | On a dtb dumped from the virt machine, dt-validate complains: | ||
4 | soc: pmu: {'riscv,event-to-mhpmcounters': [[1, 1, 524281], [2, 2, 524284], [65561, 65561, 524280], [65563, 65563, 524280], [65569, 65569, 524280]], 'compatible': ['riscv,pmu']} should not be valid under {'type': 'object'} | ||
5 | from schema $id: http://devicetree.org/schemas/simple-bus.yaml# | ||
6 | That's pretty cryptic, but running the dtb back through dtc produces | ||
7 | something a lot more reasonable: | ||
8 | Warning (simple_bus_reg): /soc/pmu: missing or empty reg/ranges property | ||
9 | |||
10 | Moving the riscv,pmu node out of the soc bus solves the problem. | ||
11 | |||
12 | Signed-off-by: Conor Dooley <conor.dooley@microchip.com> | ||
13 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
14 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
15 | Message-ID: <20230727-groom-decline-2c57ce42841c@spud> | ||
16 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
17 | --- | ||
18 | hw/riscv/virt.c | 2 +- | ||
19 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
20 | |||
21 | diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/hw/riscv/virt.c | ||
24 | +++ b/hw/riscv/virt.c | ||
25 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_pmu(RISCVVirtState *s) | ||
26 | MachineState *ms = MACHINE(s); | ||
27 | RISCVCPU hart = s->soc[0].harts[0]; | ||
28 | |||
29 | - pmu_name = g_strdup_printf("/soc/pmu"); | ||
30 | + pmu_name = g_strdup_printf("/pmu"); | ||
31 | qemu_fdt_add_subnode(ms->fdt, pmu_name); | ||
32 | qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible", "riscv,pmu"); | ||
33 | riscv_pmu_generate_fdt_node(ms->fdt, hart.cfg.pmu_num, pmu_name); | ||
34 | -- | ||
35 | 2.41.0 | diff view generated by jsdifflib |
1 | From: Andrew Bresticker <abrestic@rivosinc.com> | 1 | From: Weiwei Li <liweiwei@iscas.ac.cn> |
---|---|---|---|
2 | 2 | ||
3 | Whether or not VSEIP is pending isn't reflected in env->mip and must | 3 | The Svadu specification updated the name of the *envcfg bit from |
4 | instead be determined from hstatus.vgein and hgeip. As a result a | 4 | HADE to ADUE. |
5 | CPU in WFI won't wake on a VSEIP, which violates the WFI behavior as | ||
6 | specified in the privileged ISA. Just use riscv_cpu_all_pending() | ||
7 | instead, which already accounts for VSEIP. | ||
8 | 5 | ||
9 | Signed-off-by: Andrew Bresticker <abrestic@rivosinc.com> | 6 | Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> |
10 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 7 | Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> |
11 | Message-Id: <20220531210544.181322-1-abrestic@rivosinc.com> | 8 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
9 | Message-ID: <20230816141916.66898-1-liweiwei@iscas.ac.cn> | ||
12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
13 | --- | 11 | --- |
14 | target/riscv/cpu.h | 1 + | 12 | target/riscv/cpu_bits.h | 8 ++++---- |
15 | target/riscv/cpu.c | 2 +- | 13 | target/riscv/cpu.c | 4 ++-- |
16 | target/riscv/cpu_helper.c | 2 +- | 14 | target/riscv/cpu_helper.c | 6 +++--- |
17 | 3 files changed, 3 insertions(+), 2 deletions(-) | 15 | target/riscv/csr.c | 12 ++++++------ |
16 | 4 files changed, 15 insertions(+), 15 deletions(-) | ||
18 | 17 | ||
19 | diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h | 18 | diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h |
20 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/target/riscv/cpu.h | 20 | --- a/target/riscv/cpu_bits.h |
22 | +++ b/target/riscv/cpu.h | 21 | +++ b/target/riscv/cpu_bits.h |
23 | @@ -XXX,XX +XXX,XX @@ int riscv_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); | 22 | @@ -XXX,XX +XXX,XX @@ typedef enum RISCVException { |
24 | int riscv_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); | 23 | #define MENVCFG_CBIE (3UL << 4) |
25 | int riscv_cpu_hviprio_index2irq(int index, int *out_irq, int *out_rdzero); | 24 | #define MENVCFG_CBCFE BIT(6) |
26 | uint8_t riscv_cpu_default_priority(int irq); | 25 | #define MENVCFG_CBZE BIT(7) |
27 | +uint64_t riscv_cpu_all_pending(CPURISCVState *env); | 26 | -#define MENVCFG_HADE (1ULL << 61) |
28 | int riscv_cpu_mirq_pending(CPURISCVState *env); | 27 | +#define MENVCFG_ADUE (1ULL << 61) |
29 | int riscv_cpu_sirq_pending(CPURISCVState *env); | 28 | #define MENVCFG_PBMTE (1ULL << 62) |
30 | int riscv_cpu_vsirq_pending(CPURISCVState *env); | 29 | #define MENVCFG_STCE (1ULL << 63) |
30 | |||
31 | /* For RV32 */ | ||
32 | -#define MENVCFGH_HADE BIT(29) | ||
33 | +#define MENVCFGH_ADUE BIT(29) | ||
34 | #define MENVCFGH_PBMTE BIT(30) | ||
35 | #define MENVCFGH_STCE BIT(31) | ||
36 | |||
37 | @@ -XXX,XX +XXX,XX @@ typedef enum RISCVException { | ||
38 | #define HENVCFG_CBIE MENVCFG_CBIE | ||
39 | #define HENVCFG_CBCFE MENVCFG_CBCFE | ||
40 | #define HENVCFG_CBZE MENVCFG_CBZE | ||
41 | -#define HENVCFG_HADE MENVCFG_HADE | ||
42 | +#define HENVCFG_ADUE MENVCFG_ADUE | ||
43 | #define HENVCFG_PBMTE MENVCFG_PBMTE | ||
44 | #define HENVCFG_STCE MENVCFG_STCE | ||
45 | |||
46 | /* For RV32 */ | ||
47 | -#define HENVCFGH_HADE MENVCFGH_HADE | ||
48 | +#define HENVCFGH_ADUE MENVCFGH_ADUE | ||
49 | #define HENVCFGH_PBMTE MENVCFGH_PBMTE | ||
50 | #define HENVCFGH_STCE MENVCFGH_STCE | ||
51 | |||
31 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | 52 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
32 | index XXXXXXX..XXXXXXX 100644 | 53 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/target/riscv/cpu.c | 54 | --- a/target/riscv/cpu.c |
34 | +++ b/target/riscv/cpu.c | 55 | +++ b/target/riscv/cpu.c |
35 | @@ -XXX,XX +XXX,XX @@ static bool riscv_cpu_has_work(CPUState *cs) | 56 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_reset_hold(Object *obj) |
36 | * Definition of the WFI instruction requires it to ignore the privilege | 57 | env->two_stage_lookup = false; |
37 | * mode and delegation registers, but respect individual enables | 58 | |
38 | */ | 59 | env->menvcfg = (cpu->cfg.ext_svpbmt ? MENVCFG_PBMTE : 0) | |
39 | - return (env->mip & env->mie) != 0; | 60 | - (cpu->cfg.ext_svadu ? MENVCFG_HADE : 0); |
40 | + return riscv_cpu_all_pending(env) != 0; | 61 | + (cpu->cfg.ext_svadu ? MENVCFG_ADUE : 0); |
41 | #else | 62 | env->henvcfg = (cpu->cfg.ext_svpbmt ? HENVCFG_PBMTE : 0) | |
42 | return true; | 63 | - (cpu->cfg.ext_svadu ? HENVCFG_HADE : 0); |
43 | #endif | 64 | + (cpu->cfg.ext_svadu ? HENVCFG_ADUE : 0); |
65 | |||
66 | /* Initialized default priorities of local interrupts. */ | ||
67 | for (i = 0; i < ARRAY_SIZE(env->miprio); i++) { | ||
44 | diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c | 68 | diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c |
45 | index XXXXXXX..XXXXXXX 100644 | 69 | index XXXXXXX..XXXXXXX 100644 |
46 | --- a/target/riscv/cpu_helper.c | 70 | --- a/target/riscv/cpu_helper.c |
47 | +++ b/target/riscv/cpu_helper.c | 71 | +++ b/target/riscv/cpu_helper.c |
48 | @@ -XXX,XX +XXX,XX @@ static int riscv_cpu_pending_to_irq(CPURISCVState *env, | 72 | @@ -XXX,XX +XXX,XX @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, |
49 | return best_irq; | 73 | } |
74 | |||
75 | bool pbmte = env->menvcfg & MENVCFG_PBMTE; | ||
76 | - bool hade = env->menvcfg & MENVCFG_HADE; | ||
77 | + bool adue = env->menvcfg & MENVCFG_ADUE; | ||
78 | |||
79 | if (first_stage && two_stage && env->virt_enabled) { | ||
80 | pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE); | ||
81 | - hade = hade && (env->henvcfg & HENVCFG_HADE); | ||
82 | + adue = adue && (env->henvcfg & HENVCFG_ADUE); | ||
83 | } | ||
84 | |||
85 | int ptshift = (levels - 1) * ptidxbits; | ||
86 | @@ -XXX,XX +XXX,XX @@ restart: | ||
87 | |||
88 | /* Page table updates need to be atomic with MTTCG enabled */ | ||
89 | if (updated_pte != pte && !is_debug) { | ||
90 | - if (!hade) { | ||
91 | + if (!adue) { | ||
92 | return TRANSLATE_FAIL; | ||
93 | } | ||
94 | |||
95 | diff --git a/target/riscv/csr.c b/target/riscv/csr.c | ||
96 | index XXXXXXX..XXXXXXX 100644 | ||
97 | --- a/target/riscv/csr.c | ||
98 | +++ b/target/riscv/csr.c | ||
99 | @@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, | ||
100 | if (riscv_cpu_mxl(env) == MXL_RV64) { | ||
101 | mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | | ||
102 | (cfg->ext_sstc ? MENVCFG_STCE : 0) | | ||
103 | - (cfg->ext_svadu ? MENVCFG_HADE : 0); | ||
104 | + (cfg->ext_svadu ? MENVCFG_ADUE : 0); | ||
105 | } | ||
106 | env->menvcfg = (env->menvcfg & ~mask) | (val & mask); | ||
107 | |||
108 | @@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfgh(CPURISCVState *env, int csrno, | ||
109 | const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); | ||
110 | uint64_t mask = (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | | ||
111 | (cfg->ext_sstc ? MENVCFG_STCE : 0) | | ||
112 | - (cfg->ext_svadu ? MENVCFG_HADE : 0); | ||
113 | + (cfg->ext_svadu ? MENVCFG_ADUE : 0); | ||
114 | uint64_t valh = (uint64_t)val << 32; | ||
115 | |||
116 | env->menvcfg = (env->menvcfg & ~mask) | (valh & mask); | ||
117 | @@ -XXX,XX +XXX,XX @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno, | ||
118 | * henvcfg.stce is read_only 0 when menvcfg.stce = 0 | ||
119 | * henvcfg.hade is read_only 0 when menvcfg.hade = 0 | ||
120 | */ | ||
121 | - *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) | | ||
122 | + *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) | | ||
123 | env->menvcfg); | ||
124 | return RISCV_EXCP_NONE; | ||
50 | } | 125 | } |
51 | 126 | @@ -XXX,XX +XXX,XX @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, | |
52 | -static uint64_t riscv_cpu_all_pending(CPURISCVState *env) | 127 | } |
53 | +uint64_t riscv_cpu_all_pending(CPURISCVState *env) | 128 | |
129 | if (riscv_cpu_mxl(env) == MXL_RV64) { | ||
130 | - mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE); | ||
131 | + mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE); | ||
132 | } | ||
133 | |||
134 | env->henvcfg = (env->henvcfg & ~mask) | (val & mask); | ||
135 | @@ -XXX,XX +XXX,XX @@ static RISCVException read_henvcfgh(CPURISCVState *env, int csrno, | ||
136 | return ret; | ||
137 | } | ||
138 | |||
139 | - *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) | | ||
140 | + *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) | | ||
141 | env->menvcfg)) >> 32; | ||
142 | return RISCV_EXCP_NONE; | ||
143 | } | ||
144 | @@ -XXX,XX +XXX,XX @@ static RISCVException write_henvcfgh(CPURISCVState *env, int csrno, | ||
145 | target_ulong val) | ||
54 | { | 146 | { |
55 | uint32_t gein = get_field(env->hstatus, HSTATUS_VGEIN); | 147 | uint64_t mask = env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | |
56 | uint64_t vsgein = (env->hgeip & (1ULL << gein)) ? MIP_VSEIP : 0; | 148 | - HENVCFG_HADE); |
149 | + HENVCFG_ADUE); | ||
150 | uint64_t valh = (uint64_t)val << 32; | ||
151 | RISCVException ret; | ||
152 | |||
57 | -- | 153 | -- |
58 | 2.36.1 | 154 | 2.41.0 | diff view generated by jsdifflib |
1 | From: Alistair Francis <alistair.francis@wdc.com> | 1 | From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
---|---|---|---|
2 | 2 | ||
3 | There are currently two types of RISC-V CPUs: | 3 | In the same emulated RISC-V host, the 'host' KVM CPU takes 4 times |
4 | - Generic CPUs (base or any) that allow complete custimisation | 4 | longer to boot than the 'rv64' KVM CPU. |
5 | - "Named" CPUs that match existing hardware | ||
6 | 5 | ||
7 | Users can use the base CPUs to custimise the extensions that they want, for | 6 | The reason is an unintended behavior of riscv_cpu_satp_mode_finalize() |
8 | example -cpu rv64,v=true. | 7 | when satp_mode.supported = 0, i.e. when cpu_init() does not set |
8 | satp_mode_max_supported(). satp_mode_max_from_map(map) does: | ||
9 | 9 | ||
10 | We originally exposed these as part of the named CPUs as well, but that was | 10 | 31 - __builtin_clz(map) |
11 | by accident. | ||
12 | 11 | ||
13 | Exposing the CPU properties to named CPUs means that we accidently | 12 | This means that, if satp_mode.supported = 0, satp_mode_supported_max |
14 | enable extensions that don't exist on the CPUs by default. For example | 13 | wil be '31 - 32'. But this is C, so satp_mode_supported_max will gladly |
15 | the SiFive E CPU currently support the zba extension, which is a bug. | 14 | set it to UINT_MAX (4294967295). After that, if the user didn't set a |
15 | satp_mode, set_satp_mode_default_map(cpu) will make | ||
16 | 16 | ||
17 | This patch instead only exposes the CPU extensions to the generic CPUs. | 17 | cfg.satp_mode.map = cfg.satp_mode.supported |
18 | 18 | ||
19 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 19 | So satp_mode.map = 0. And then satp_mode_map_max will be set to |
20 | Reviewed-by: Bin Meng <bmeng.cn@gmail.com> | 20 | satp_mode_max_from_map(cpu->cfg.satp_mode.map), i.e. also UINT_MAX. The |
21 | Message-Id: <20220608061437.314434-1-alistair.francis@opensource.wdc.com> | 21 | guard "satp_mode_map_max > satp_mode_supported_max" doesn't protect us |
22 | here since both are UINT_MAX. | ||
23 | |||
24 | And finally we have 2 loops: | ||
25 | |||
26 | for (int i = satp_mode_map_max - 1; i >= 0; --i) { | ||
27 | |||
28 | Which are, in fact, 2 loops from UINT_MAX -1 to -1. This is where the | ||
29 | extra delay when booting the 'host' CPU is coming from. | ||
30 | |||
31 | Commit 43d1de32f8 already set a precedence for satp_mode.supported = 0 | ||
32 | in a different manner. We're doing the same here. If supported == 0, | ||
33 | interpret as 'the CPU wants the OS to handle satp mode alone' and skip | ||
34 | satp_mode_finalize(). | ||
35 | |||
36 | We'll also put a guard in satp_mode_max_from_map() to assert out if map | ||
37 | is 0 since the function is not ready to deal with it. | ||
38 | |||
39 | Cc: Alexandre Ghiti <alexghiti@rivosinc.com> | ||
40 | Fixes: 6f23aaeb9b ("riscv: Allow user to set the satp mode") | ||
41 | Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
42 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> | ||
43 | Message-ID: <20230817152903.694926-1-dbarboza@ventanamicro.com> | ||
22 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 44 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
23 | --- | 45 | --- |
24 | target/riscv/cpu.c | 57 +++++++++++++++++++++++++++++++++++++--------- | 46 | target/riscv/cpu.c | 23 ++++++++++++++++++++--- |
25 | 1 file changed, 46 insertions(+), 11 deletions(-) | 47 | 1 file changed, 20 insertions(+), 3 deletions(-) |
26 | 48 | ||
27 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | 49 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
28 | index XXXXXXX..XXXXXXX 100644 | 50 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/target/riscv/cpu.c | 51 | --- a/target/riscv/cpu.c |
30 | +++ b/target/riscv/cpu.c | 52 | +++ b/target/riscv/cpu.c |
31 | @@ -XXX,XX +XXX,XX @@ static const char * const riscv_intr_names[] = { | 53 | @@ -XXX,XX +XXX,XX @@ static uint8_t satp_mode_from_str(const char *satp_mode_str) |
32 | "reserved" | 54 | |
33 | }; | 55 | uint8_t satp_mode_max_from_map(uint32_t map) |
34 | 56 | { | |
35 | +static void register_cpu_props(DeviceState *dev); | 57 | + /* |
58 | + * 'map = 0' will make us return (31 - 32), which C will | ||
59 | + * happily overflow to UINT_MAX. There's no good result to | ||
60 | + * return if 'map = 0' (e.g. returning 0 will be ambiguous | ||
61 | + * with the result for 'map = 1'). | ||
62 | + * | ||
63 | + * Assert out if map = 0. Callers will have to deal with | ||
64 | + * it outside of this function. | ||
65 | + */ | ||
66 | + g_assert(map > 0); | ||
36 | + | 67 | + |
37 | const char *riscv_cpu_get_trap_name(target_ulong cause, bool async) | 68 | /* map here has at least one bit set, so no problem with clz */ |
69 | return 31 - __builtin_clz(map); | ||
70 | } | ||
71 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
72 | static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) | ||
38 | { | 73 | { |
39 | if (async) { | 74 | bool rv32 = riscv_cpu_mxl(&cpu->env) == MXL_RV32; |
40 | @@ -XXX,XX +XXX,XX @@ static void riscv_any_cpu_init(Object *obj) | 75 | - uint8_t satp_mode_map_max; |
41 | set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVU); | 76 | - uint8_t satp_mode_supported_max = |
42 | #endif | 77 | - satp_mode_max_from_map(cpu->cfg.satp_mode.supported); |
43 | set_priv_version(env, PRIV_VERSION_1_12_0); | 78 | + uint8_t satp_mode_map_max, satp_mode_supported_max; |
44 | + register_cpu_props(DEVICE(obj)); | ||
45 | } | ||
46 | |||
47 | #if defined(TARGET_RISCV64) | ||
48 | @@ -XXX,XX +XXX,XX @@ static void rv64_base_cpu_init(Object *obj) | ||
49 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
50 | /* We set this in the realise function */ | ||
51 | set_misa(env, MXL_RV64, 0); | ||
52 | + register_cpu_props(DEVICE(obj)); | ||
53 | } | ||
54 | |||
55 | static void rv64_sifive_u_cpu_init(Object *obj) | ||
56 | @@ -XXX,XX +XXX,XX @@ static void rv64_sifive_u_cpu_init(Object *obj) | ||
57 | static void rv64_sifive_e_cpu_init(Object *obj) | ||
58 | { | ||
59 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
60 | + RISCVCPU *cpu = RISCV_CPU(obj); | ||
61 | + | 79 | + |
62 | set_misa(env, MXL_RV64, RVI | RVM | RVA | RVC | RVU); | 80 | + /* The CPU wants the OS to decide which satp mode to use */ |
63 | set_priv_version(env, PRIV_VERSION_1_10_0); | 81 | + if (cpu->cfg.satp_mode.supported == 0) { |
64 | - qdev_prop_set_bit(DEVICE(obj), "mmu", false); | 82 | + return; |
65 | + cpu->cfg.mmu = false; | 83 | + } |
66 | } | ||
67 | |||
68 | static void rv128_base_cpu_init(Object *obj) | ||
69 | @@ -XXX,XX +XXX,XX @@ static void rv128_base_cpu_init(Object *obj) | ||
70 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
71 | /* We set this in the realise function */ | ||
72 | set_misa(env, MXL_RV128, 0); | ||
73 | + register_cpu_props(DEVICE(obj)); | ||
74 | } | ||
75 | #else | ||
76 | static void rv32_base_cpu_init(Object *obj) | ||
77 | @@ -XXX,XX +XXX,XX @@ static void rv32_base_cpu_init(Object *obj) | ||
78 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
79 | /* We set this in the realise function */ | ||
80 | set_misa(env, MXL_RV32, 0); | ||
81 | + register_cpu_props(DEVICE(obj)); | ||
82 | } | ||
83 | |||
84 | static void rv32_sifive_u_cpu_init(Object *obj) | ||
85 | @@ -XXX,XX +XXX,XX @@ static void rv32_sifive_u_cpu_init(Object *obj) | ||
86 | static void rv32_sifive_e_cpu_init(Object *obj) | ||
87 | { | ||
88 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
89 | + RISCVCPU *cpu = RISCV_CPU(obj); | ||
90 | + | 84 | + |
91 | set_misa(env, MXL_RV32, RVI | RVM | RVA | RVC | RVU); | 85 | + satp_mode_supported_max = |
92 | set_priv_version(env, PRIV_VERSION_1_10_0); | 86 | + satp_mode_max_from_map(cpu->cfg.satp_mode.supported); |
93 | - qdev_prop_set_bit(DEVICE(obj), "mmu", false); | 87 | |
94 | + cpu->cfg.mmu = false; | 88 | if (cpu->cfg.satp_mode.map == 0) { |
95 | } | 89 | if (cpu->cfg.satp_mode.init == 0) { |
96 | |||
97 | static void rv32_ibex_cpu_init(Object *obj) | ||
98 | { | ||
99 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
100 | + RISCVCPU *cpu = RISCV_CPU(obj); | ||
101 | + | ||
102 | set_misa(env, MXL_RV32, RVI | RVM | RVC | RVU); | ||
103 | set_priv_version(env, PRIV_VERSION_1_10_0); | ||
104 | - qdev_prop_set_bit(DEVICE(obj), "mmu", false); | ||
105 | - qdev_prop_set_bit(DEVICE(obj), "x-epmp", true); | ||
106 | + cpu->cfg.mmu = false; | ||
107 | + cpu->cfg.epmp = true; | ||
108 | } | ||
109 | |||
110 | static void rv32_imafcu_nommu_cpu_init(Object *obj) | ||
111 | { | ||
112 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
113 | + RISCVCPU *cpu = RISCV_CPU(obj); | ||
114 | + | ||
115 | set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVC | RVU); | ||
116 | set_priv_version(env, PRIV_VERSION_1_10_0); | ||
117 | set_resetvec(env, DEFAULT_RSTVEC); | ||
118 | - qdev_prop_set_bit(DEVICE(obj), "mmu", false); | ||
119 | + cpu->cfg.mmu = false; | ||
120 | } | ||
121 | #endif | ||
122 | |||
123 | @@ -XXX,XX +XXX,XX @@ static void riscv_host_cpu_init(Object *obj) | ||
124 | #elif defined(TARGET_RISCV64) | ||
125 | set_misa(env, MXL_RV64, 0); | ||
126 | #endif | ||
127 | + register_cpu_props(DEVICE(obj)); | ||
128 | } | ||
129 | #endif | ||
130 | |||
131 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_init(Object *obj) | ||
132 | { | ||
133 | RISCVCPU *cpu = RISCV_CPU(obj); | ||
134 | |||
135 | + cpu->cfg.ext_counters = true; | ||
136 | + cpu->cfg.ext_ifencei = true; | ||
137 | + cpu->cfg.ext_icsr = true; | ||
138 | + cpu->cfg.mmu = true; | ||
139 | + cpu->cfg.pmp = true; | ||
140 | + | ||
141 | cpu_set_cpustate_pointers(cpu); | ||
142 | |||
143 | #ifndef CONFIG_USER_ONLY | ||
144 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_init(Object *obj) | ||
145 | #endif /* CONFIG_USER_ONLY */ | ||
146 | } | ||
147 | |||
148 | -static Property riscv_cpu_properties[] = { | ||
149 | +static Property riscv_cpu_extensions[] = { | ||
150 | /* Defaults for standard extensions */ | ||
151 | DEFINE_PROP_BOOL("i", RISCVCPU, cfg.ext_i, true), | ||
152 | DEFINE_PROP_BOOL("e", RISCVCPU, cfg.ext_e, false), | ||
153 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { | ||
154 | DEFINE_PROP_BOOL("Zve64f", RISCVCPU, cfg.ext_zve64f, false), | ||
155 | DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true), | ||
156 | DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true), | ||
157 | - DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true), | ||
158 | |||
159 | DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec), | ||
160 | DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec), | ||
161 | DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128), | ||
162 | DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64), | ||
163 | |||
164 | - DEFINE_PROP_UINT32("mvendorid", RISCVCPU, cfg.mvendorid, 0), | ||
165 | - DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID), | ||
166 | - DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID), | ||
167 | - | ||
168 | DEFINE_PROP_BOOL("svinval", RISCVCPU, cfg.ext_svinval, false), | ||
169 | DEFINE_PROP_BOOL("svnapot", RISCVCPU, cfg.ext_svnapot, false), | ||
170 | DEFINE_PROP_BOOL("svpbmt", RISCVCPU, cfg.ext_svpbmt, false), | ||
171 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { | ||
172 | DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false), | ||
173 | DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false), | ||
174 | |||
175 | + DEFINE_PROP_END_OF_LIST(), | ||
176 | +}; | ||
177 | + | ||
178 | +static void register_cpu_props(DeviceState *dev) | ||
179 | +{ | ||
180 | + Property *prop; | ||
181 | + | ||
182 | + for (prop = riscv_cpu_extensions; prop && prop->name; prop++) { | ||
183 | + qdev_property_add_static(dev, prop); | ||
184 | + } | ||
185 | +} | ||
186 | + | ||
187 | +static Property riscv_cpu_properties[] = { | ||
188 | + DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true), | ||
189 | + | ||
190 | + DEFINE_PROP_UINT32("mvendorid", RISCVCPU, cfg.mvendorid, 0), | ||
191 | + DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID), | ||
192 | + DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID), | ||
193 | + | ||
194 | DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC), | ||
195 | |||
196 | DEFINE_PROP_BOOL("short-isa-string", RISCVCPU, cfg.short_isa_string, false), | ||
197 | -- | 90 | -- |
198 | 2.36.1 | 91 | 2.41.0 | diff view generated by jsdifflib |
1 | From: Weiwei Li <liweiwei@iscas.ac.cn> | 1 | From: Vineet Gupta <vineetg@rivosinc.com> |
---|---|---|---|
2 | 2 | ||
3 | Add support for the zmmul extension v0.1. This extension includes all | 3 | zicond is now codegen supported in both llvm and gcc. |
4 | multiplication operations from the M extension but not the divide ops. | ||
5 | 4 | ||
6 | Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> | 5 | This change allows seamless enabling/testing of zicond in downstream |
7 | Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> | 6 | projects. e.g. currently riscv-gnu-toolchain parses elf attributes |
8 | Reviewed-by: Víctor Colombo <victor.colombo@eldorado.org.br> | 7 | to create a cmdline for qemu but fails short of enabling it because of |
8 | the "x-" prefix. | ||
9 | |||
10 | Signed-off-by: Vineet Gupta <vineetg@rivosinc.com> | ||
11 | Message-ID: <20230808181715.436395-1-vineetg@rivosinc.com> | ||
9 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 12 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> |
10 | Message-Id: <20220531030732.3850-1-liweiwei@iscas.ac.cn> | ||
11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 13 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
12 | --- | 14 | --- |
13 | target/riscv/cpu.h | 1 + | 15 | target/riscv/cpu.c | 2 +- |
14 | target/riscv/cpu.c | 7 +++++++ | 16 | 1 file changed, 1 insertion(+), 1 deletion(-) |
15 | target/riscv/insn_trans/trans_rvm.c.inc | 18 ++++++++++++------ | ||
16 | 3 files changed, 20 insertions(+), 6 deletions(-) | ||
17 | 17 | ||
18 | diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/riscv/cpu.h | ||
21 | +++ b/target/riscv/cpu.h | ||
22 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
23 | bool ext_zhinxmin; | ||
24 | bool ext_zve32f; | ||
25 | bool ext_zve64f; | ||
26 | + bool ext_zmmul; | ||
27 | |||
28 | uint32_t mvendorid; | ||
29 | uint64_t marchid; | ||
30 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | 18 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
31 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
32 | --- a/target/riscv/cpu.c | 20 | --- a/target/riscv/cpu.c |
33 | +++ b/target/riscv/cpu.c | 21 | +++ b/target/riscv/cpu.c |
34 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) | 22 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { |
35 | cpu->cfg.ext_ifencei = true; | 23 | DEFINE_PROP_BOOL("zcf", RISCVCPU, cfg.ext_zcf, false), |
36 | } | 24 | DEFINE_PROP_BOOL("zcmp", RISCVCPU, cfg.ext_zcmp, false), |
37 | 25 | DEFINE_PROP_BOOL("zcmt", RISCVCPU, cfg.ext_zcmt, false), | |
38 | + if (cpu->cfg.ext_m && cpu->cfg.ext_zmmul) { | 26 | + DEFINE_PROP_BOOL("zicond", RISCVCPU, cfg.ext_zicond, false), |
39 | + warn_report("Zmmul will override M"); | 27 | |
40 | + cpu->cfg.ext_m = false; | 28 | /* Vendor-specific custom extensions */ |
41 | + } | 29 | DEFINE_PROP_BOOL("xtheadba", RISCVCPU, cfg.ext_xtheadba, false), |
42 | + | 30 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { |
43 | if (cpu->cfg.ext_i && cpu->cfg.ext_e) { | 31 | DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false), |
44 | error_setg(errp, | ||
45 | "I and E extensions are incompatible"); | ||
46 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { | ||
47 | 32 | ||
48 | /* These are experimental so mark with 'x-' */ | 33 | /* These are experimental so mark with 'x-' */ |
49 | DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false), | 34 | - DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false), |
50 | + DEFINE_PROP_BOOL("x-zmmul", RISCVCPU, cfg.ext_zmmul, false), | 35 | |
51 | /* ePMP 0.9.3 */ | 36 | /* ePMP 0.9.3 */ |
52 | DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false), | 37 | DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false), |
53 | DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false), | ||
54 | @@ -XXX,XX +XXX,XX @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int max_str_len) | ||
55 | struct isa_ext_data isa_edata_arr[] = { | ||
56 | ISA_EDATA_ENTRY(zicsr, ext_icsr), | ||
57 | ISA_EDATA_ENTRY(zifencei, ext_ifencei), | ||
58 | + ISA_EDATA_ENTRY(zmmul, ext_zmmul), | ||
59 | ISA_EDATA_ENTRY(zfh, ext_zfh), | ||
60 | ISA_EDATA_ENTRY(zfhmin, ext_zfhmin), | ||
61 | ISA_EDATA_ENTRY(zfinx, ext_zfinx), | ||
62 | diff --git a/target/riscv/insn_trans/trans_rvm.c.inc b/target/riscv/insn_trans/trans_rvm.c.inc | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/riscv/insn_trans/trans_rvm.c.inc | ||
65 | +++ b/target/riscv/insn_trans/trans_rvm.c.inc | ||
66 | @@ -XXX,XX +XXX,XX @@ | ||
67 | * this program. If not, see <http://www.gnu.org/licenses/>. | ||
68 | */ | ||
69 | |||
70 | +#define REQUIRE_M_OR_ZMMUL(ctx) do { \ | ||
71 | + if (!ctx->cfg_ptr->ext_zmmul && !has_ext(ctx, RVM)) { \ | ||
72 | + return false; \ | ||
73 | + } \ | ||
74 | +} while (0) | ||
75 | + | ||
76 | static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, TCGv bh) | ||
77 | { | ||
78 | TCGv tmpl = tcg_temp_new(); | ||
79 | @@ -XXX,XX +XXX,XX @@ static void gen_mul_i128(TCGv rl, TCGv rh, | ||
80 | |||
81 | static bool trans_mul(DisasContext *ctx, arg_mul *a) | ||
82 | { | ||
83 | - REQUIRE_EXT(ctx, RVM); | ||
84 | + REQUIRE_M_OR_ZMMUL(ctx); | ||
85 | return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128); | ||
86 | } | ||
87 | |||
88 | @@ -XXX,XX +XXX,XX @@ static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2) | ||
89 | |||
90 | static bool trans_mulh(DisasContext *ctx, arg_mulh *a) | ||
91 | { | ||
92 | - REQUIRE_EXT(ctx, RVM); | ||
93 | + REQUIRE_M_OR_ZMMUL(ctx); | ||
94 | return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w, | ||
95 | gen_mulh_i128); | ||
96 | } | ||
97 | @@ -XXX,XX +XXX,XX @@ static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2) | ||
98 | |||
99 | static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a) | ||
100 | { | ||
101 | - REQUIRE_EXT(ctx, RVM); | ||
102 | + REQUIRE_M_OR_ZMMUL(ctx); | ||
103 | return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w, | ||
104 | gen_mulhsu_i128); | ||
105 | } | ||
106 | @@ -XXX,XX +XXX,XX @@ static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2) | ||
107 | |||
108 | static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a) | ||
109 | { | ||
110 | - REQUIRE_EXT(ctx, RVM); | ||
111 | + REQUIRE_M_OR_ZMMUL(ctx); | ||
112 | /* gen_mulh_w works for either sign as input. */ | ||
113 | return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w, | ||
114 | gen_mulhu_i128); | ||
115 | @@ -XXX,XX +XXX,XX @@ static bool trans_remu(DisasContext *ctx, arg_remu *a) | ||
116 | static bool trans_mulw(DisasContext *ctx, arg_mulw *a) | ||
117 | { | ||
118 | REQUIRE_64_OR_128BIT(ctx); | ||
119 | - REQUIRE_EXT(ctx, RVM); | ||
120 | + REQUIRE_M_OR_ZMMUL(ctx); | ||
121 | ctx->ol = MXL_RV32; | ||
122 | return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL); | ||
123 | } | ||
124 | @@ -XXX,XX +XXX,XX @@ static bool trans_remuw(DisasContext *ctx, arg_remuw *a) | ||
125 | static bool trans_muld(DisasContext *ctx, arg_muld *a) | ||
126 | { | ||
127 | REQUIRE_128BIT(ctx); | ||
128 | - REQUIRE_EXT(ctx, RVM); | ||
129 | + REQUIRE_M_OR_ZMMUL(ctx); | ||
130 | ctx->ol = MXL_RV64; | ||
131 | return gen_arith(ctx, a, EXT_SIGN, tcg_gen_mul_tl, NULL); | ||
132 | } | ||
133 | -- | 38 | -- |
134 | 2.36.1 | 39 | 2.41.0 | diff view generated by jsdifflib |
1 | From: Alistair Francis <alistair.francis@wdc.com> | 1 | From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
---|---|---|---|
2 | 2 | ||
3 | Since commit ad40be27 "target/riscv: Support start kernel directly by | 3 | A build with --enable-debug and without KVM will fail as follows: |
4 | KVM" we have been overflowing the addr_config on "M,MS..." | ||
5 | configurations, as reported https://gitlab.com/qemu-project/qemu/-/issues/1050. | ||
6 | 4 | ||
7 | This commit changes the loop in sifive_plic_create() from iterating over | 5 | /usr/bin/ld: libqemu-riscv64-softmmu.fa.p/hw_riscv_virt.c.o: in function `virt_machine_init': |
8 | the number of harts to just iterating over the addr_config. The | 6 | ./qemu/build/../hw/riscv/virt.c:1465: undefined reference to `kvm_riscv_aia_create' |
9 | addr_config is based on the hart_config, and will contain interrup details | ||
10 | for all harts. This way we can't iterate past the end of addr_config. | ||
11 | 7 | ||
12 | Fixes: ad40be27084536 ("target/riscv: Support start kernel directly by KVM") | 8 | This happens because the code block with "if virt_use_kvm_aia(s)" isn't |
13 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1050 | 9 | being ignored by the debug build, resulting in an undefined reference to |
10 | a KVM only function. | ||
11 | |||
12 | Add a 'kvm_enabled()' conditional together with virt_use_kvm_aia() will | ||
13 | make the compiler crop the kvm_riscv_aia_create() call entirely from a | ||
14 | non-KVM build. Note that adding the 'kvm_enabled()' conditional inside | ||
15 | virt_use_kvm_aia() won't fix the build because this function would need | ||
16 | to be inlined multiple times to make the compiler zero out the entire | ||
17 | block. | ||
18 | |||
19 | While we're at it, use kvm_enabled() in all instances where | ||
20 | virt_use_kvm_aia() is checked to allow the compiler to elide these other | ||
21 | kvm-only instances as well. | ||
22 | |||
23 | Suggested-by: Richard Henderson <richard.henderson@linaro.org> | ||
24 | Fixes: dbdb99948e ("target/riscv: select KVM AIA in riscv virt machine") | ||
25 | Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
26 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> | ||
27 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
28 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
29 | Message-ID: <20230830133503.711138-2-dbarboza@ventanamicro.com> | ||
14 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 30 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
15 | Reviewed-by: Mingwang Li <limingwang@huawei.com> | ||
16 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
17 | Message-Id: <20220601013631.196854-1-alistair.francis@opensource.wdc.com> | ||
18 | --- | 31 | --- |
19 | hw/intc/sifive_plic.c | 19 +++++++++---------- | 32 | hw/riscv/virt.c | 6 +++--- |
20 | 1 file changed, 9 insertions(+), 10 deletions(-) | 33 | 1 file changed, 3 insertions(+), 3 deletions(-) |
21 | 34 | ||
22 | diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c | 35 | diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c |
23 | index XXXXXXX..XXXXXXX 100644 | 36 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/hw/intc/sifive_plic.c | 37 | --- a/hw/riscv/virt.c |
25 | +++ b/hw/intc/sifive_plic.c | 38 | +++ b/hw/riscv/virt.c |
26 | @@ -XXX,XX +XXX,XX @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, | 39 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, |
27 | uint32_t context_stride, uint32_t aperture_size) | 40 | } |
28 | { | 41 | |
29 | DeviceState *dev = qdev_new(TYPE_SIFIVE_PLIC); | 42 | /* KVM AIA only has one APLIC instance */ |
30 | - int i, j = 0; | 43 | - if (virt_use_kvm_aia(s)) { |
31 | + int i; | 44 | + if (kvm_enabled() && virt_use_kvm_aia(s)) { |
32 | SiFivePLICState *plic; | 45 | create_fdt_socket_aplic(s, memmap, 0, |
33 | 46 | msi_m_phandle, msi_s_phandle, phandle, | |
34 | assert(enable_stride == (enable_stride & -enable_stride)); | 47 | &intc_phandles[0], xplic_phandles, |
35 | @@ -XXX,XX +XXX,XX @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, | 48 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, |
36 | sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); | 49 | |
37 | 50 | g_free(intc_phandles); | |
38 | plic = SIFIVE_PLIC(dev); | 51 | |
39 | - for (i = 0; i < num_harts; i++) { | 52 | - if (virt_use_kvm_aia(s)) { |
40 | - CPUState *cpu = qemu_get_cpu(hartid_base + i); | 53 | + if (kvm_enabled() && virt_use_kvm_aia(s)) { |
41 | 54 | *irq_mmio_phandle = xplic_phandles[0]; | |
42 | - if (plic->addr_config[j].mode == PLICMode_M) { | 55 | *irq_virtio_phandle = xplic_phandles[0]; |
43 | - j++; | 56 | *irq_pcie_phandle = xplic_phandles[0]; |
44 | - qdev_connect_gpio_out(dev, num_harts + i, | 57 | @@ -XXX,XX +XXX,XX @@ static void virt_machine_init(MachineState *machine) |
45 | + for (i = 0; i < plic->num_addrs; i++) { | ||
46 | + int cpu_num = plic->addr_config[i].hartid; | ||
47 | + CPUState *cpu = qemu_get_cpu(hartid_base + cpu_num); | ||
48 | + | ||
49 | + if (plic->addr_config[i].mode == PLICMode_M) { | ||
50 | + qdev_connect_gpio_out(dev, num_harts + cpu_num, | ||
51 | qdev_get_gpio_in(DEVICE(cpu), IRQ_M_EXT)); | ||
52 | } | ||
53 | - | ||
54 | - if (plic->addr_config[j].mode == PLICMode_S) { | ||
55 | - j++; | ||
56 | - qdev_connect_gpio_out(dev, i, | ||
57 | + if (plic->addr_config[i].mode == PLICMode_S) { | ||
58 | + qdev_connect_gpio_out(dev, cpu_num, | ||
59 | qdev_get_gpio_in(DEVICE(cpu), IRQ_S_EXT)); | ||
60 | } | 58 | } |
61 | } | 59 | } |
60 | |||
61 | - if (virt_use_kvm_aia(s)) { | ||
62 | + if (kvm_enabled() && virt_use_kvm_aia(s)) { | ||
63 | kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT, | ||
64 | VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS, | ||
65 | memmap[VIRT_APLIC_S].base, | ||
62 | -- | 66 | -- |
63 | 2.36.1 | 67 | 2.41.0 |
68 | |||
69 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
---|---|---|---|
2 | 2 | ||
3 | Compares write mask registers, and so always operate under a tail- | 3 | Commit 6df0b37e2ab breaks a --enable-debug build in a non-KVM |
4 | agnostic policy. | 4 | environment with the following error: |
5 | 5 | ||
6 | Signed-off-by: eop Chen <eop.chen@sifive.com> | 6 | /usr/bin/ld: libqemu-riscv64-softmmu.fa.p/hw_intc_riscv_aplic.c.o: in function `riscv_kvm_aplic_request': |
7 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | 7 | ./qemu/build/../hw/intc/riscv_aplic.c:486: undefined reference to `kvm_set_irq' |
8 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 8 | collect2: error: ld returned 1 exit status |
9 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | 9 | |
10 | Message-Id: <165449614532.19704.7000832880482980398-9@git.sr.ht> | 10 | This happens because the debug build will poke into the |
11 | 'if (is_kvm_aia(aplic->msimode))' block and fail to find a reference to | ||
12 | the KVM only function riscv_kvm_aplic_request(). | ||
13 | |||
14 | There are multiple solutions to fix this. We'll go with the same | ||
15 | solution from the previous patch, i.e. add a kvm_enabled() conditional | ||
16 | to filter out the block. But there's a catch: riscv_kvm_aplic_request() | ||
17 | is a local function that would end up being used if the compiler crops | ||
18 | the block, and this won't work. Quoting Richard Henderson's explanation | ||
19 | in [1]: | ||
20 | |||
21 | "(...) the compiler won't eliminate entire unused functions with -O0" | ||
22 | |||
23 | We'll solve it by moving riscv_kvm_aplic_request() to kvm.c and add its | ||
24 | declaration in kvm_riscv.h, where all other KVM specific public | ||
25 | functions are already declared. Other archs handles KVM specific code in | ||
26 | this manner and we expect to do the same from now on. | ||
27 | |||
28 | [1] https://lore.kernel.org/qemu-riscv/d2f1ad02-eb03-138f-9d08-db676deeed05@linaro.org/ | ||
29 | |||
30 | Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
31 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> | ||
32 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
33 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
34 | Message-ID: <20230830133503.711138-3-dbarboza@ventanamicro.com> | ||
11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 35 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
12 | --- | 36 | --- |
13 | target/riscv/vector_helper.c | 18 ++++++++++++++++++ | 37 | target/riscv/kvm_riscv.h | 1 + |
14 | 1 file changed, 18 insertions(+) | 38 | hw/intc/riscv_aplic.c | 8 ++------ |
39 | target/riscv/kvm.c | 5 +++++ | ||
40 | 3 files changed, 8 insertions(+), 6 deletions(-) | ||
15 | 41 | ||
16 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 42 | diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h |
17 | index XXXXXXX..XXXXXXX 100644 | 43 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/riscv/vector_helper.c | 44 | --- a/target/riscv/kvm_riscv.h |
19 | +++ b/target/riscv/vector_helper.c | 45 | +++ b/target/riscv/kvm_riscv.h |
20 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | 46 | @@ -XXX,XX +XXX,XX @@ void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, |
21 | { \ | 47 | uint64_t aia_irq_num, uint64_t aia_msi_num, |
22 | uint32_t vm = vext_vm(desc); \ | 48 | uint64_t aplic_base, uint64_t imsic_base, |
23 | uint32_t vl = env->vl; \ | 49 | uint64_t guest_num); |
24 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ | 50 | +void riscv_kvm_aplic_request(void *opaque, int irq, int level); |
25 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ | 51 | |
26 | uint32_t i; \ | 52 | #endif |
27 | \ | 53 | diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c |
28 | for (i = env->vstart; i < vl; i++) { \ | 54 | index XXXXXXX..XXXXXXX 100644 |
29 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | 55 | --- a/hw/intc/riscv_aplic.c |
30 | vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ | 56 | +++ b/hw/intc/riscv_aplic.c |
31 | } \ | 57 | @@ -XXX,XX +XXX,XX @@ |
32 | env->vstart = 0; \ | 58 | #include "target/riscv/cpu.h" |
33 | + /* mask destination register are always tail-agnostic */ \ | 59 | #include "sysemu/sysemu.h" |
34 | + /* set tail elements to 1s */ \ | 60 | #include "sysemu/kvm.h" |
35 | + if (vta_all_1s) { \ | 61 | +#include "kvm_riscv.h" |
36 | + for (; i < total_elems; i++) { \ | 62 | #include "migration/vmstate.h" |
37 | + vext_set_elem_mask(vd, i, 1); \ | 63 | |
38 | + } \ | 64 | #define APLIC_MAX_IDC (1UL << 14) |
39 | + } \ | 65 | @@ -XXX,XX +XXX,XX @@ static uint32_t riscv_aplic_idc_claimi(RISCVAPLICState *aplic, uint32_t idc) |
66 | return topi; | ||
40 | } | 67 | } |
41 | 68 | ||
42 | GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) | 69 | -static void riscv_kvm_aplic_request(void *opaque, int irq, int level) |
43 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | 70 | -{ |
44 | { \ | 71 | - kvm_set_irq(kvm_state, irq, !!level); |
45 | uint32_t vm = vext_vm(desc); \ | 72 | -} |
46 | uint32_t vl = env->vl; \ | 73 | - |
47 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ | 74 | static void riscv_aplic_request(void *opaque, int irq, int level) |
48 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ | 75 | { |
49 | uint32_t i; \ | 76 | bool update = false; |
50 | \ | 77 | @@ -XXX,XX +XXX,XX @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp) |
51 | for (i = env->vstart; i < vl; i++) { \ | 78 | * have IRQ lines delegated by their parent APLIC. |
52 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | 79 | */ |
53 | DO_OP(s2, (ETYPE)(target_long)s1)); \ | 80 | if (!aplic->parent) { |
54 | } \ | 81 | - if (is_kvm_aia(aplic->msimode)) { |
55 | env->vstart = 0; \ | 82 | + if (kvm_enabled() && is_kvm_aia(aplic->msimode)) { |
56 | + /* mask destination register are always tail-agnostic */ \ | 83 | qdev_init_gpio_in(dev, riscv_kvm_aplic_request, aplic->num_irqs); |
57 | + /* set tail elements to 1s */ \ | 84 | } else { |
58 | + if (vta_all_1s) { \ | 85 | qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs); |
59 | + for (; i < total_elems; i++) { \ | 86 | diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c |
60 | + vext_set_elem_mask(vd, i, 1); \ | 87 | index XXXXXXX..XXXXXXX 100644 |
61 | + } \ | 88 | --- a/target/riscv/kvm.c |
62 | + } \ | 89 | +++ b/target/riscv/kvm.c |
63 | } | 90 | @@ -XXX,XX +XXX,XX @@ |
64 | 91 | #include "sysemu/runstate.h" | |
65 | GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) | 92 | #include "hw/riscv/numa.h" |
93 | |||
94 | +void riscv_kvm_aplic_request(void *opaque, int irq, int level) | ||
95 | +{ | ||
96 | + kvm_set_irq(kvm_state, irq, !!level); | ||
97 | +} | ||
98 | + | ||
99 | static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, | ||
100 | uint64_t idx) | ||
101 | { | ||
66 | -- | 102 | -- |
67 | 2.36.1 | 103 | 2.41.0 |
104 | |||
105 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Robbin Ehn <rehn@rivosinc.com> | ||
1 | 2 | ||
3 | This patch adds the new extensions in | ||
4 | linux 6.5 to the hwprobe syscall. | ||
5 | |||
6 | And fixes RVC check to OR with correct value. | ||
7 | The previous variable contains 0 therefore it | ||
8 | did work. | ||
9 | |||
10 | Signed-off-by: Robbin Ehn <rehn@rivosinc.com> | ||
11 | Acked-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | Message-ID: <bc82203b72d7efb30f1b4a8f9eb3d94699799dc8.camel@rivosinc.com> | ||
14 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
15 | --- | ||
16 | linux-user/syscall.c | 14 +++++++++++++- | ||
17 | 1 file changed, 13 insertions(+), 1 deletion(-) | ||
18 | |||
19 | diff --git a/linux-user/syscall.c b/linux-user/syscall.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/linux-user/syscall.c | ||
22 | +++ b/linux-user/syscall.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static int do_getdents64(abi_long dirfd, abi_long arg2, abi_long count) | ||
24 | #define RISCV_HWPROBE_KEY_IMA_EXT_0 4 | ||
25 | #define RISCV_HWPROBE_IMA_FD (1 << 0) | ||
26 | #define RISCV_HWPROBE_IMA_C (1 << 1) | ||
27 | +#define RISCV_HWPROBE_IMA_V (1 << 2) | ||
28 | +#define RISCV_HWPROBE_EXT_ZBA (1 << 3) | ||
29 | +#define RISCV_HWPROBE_EXT_ZBB (1 << 4) | ||
30 | +#define RISCV_HWPROBE_EXT_ZBS (1 << 5) | ||
31 | |||
32 | #define RISCV_HWPROBE_KEY_CPUPERF_0 5 | ||
33 | #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) | ||
34 | @@ -XXX,XX +XXX,XX @@ static void risc_hwprobe_fill_pairs(CPURISCVState *env, | ||
35 | riscv_has_ext(env, RVD) ? | ||
36 | RISCV_HWPROBE_IMA_FD : 0; | ||
37 | value |= riscv_has_ext(env, RVC) ? | ||
38 | - RISCV_HWPROBE_IMA_C : pair->value; | ||
39 | + RISCV_HWPROBE_IMA_C : 0; | ||
40 | + value |= riscv_has_ext(env, RVV) ? | ||
41 | + RISCV_HWPROBE_IMA_V : 0; | ||
42 | + value |= cfg->ext_zba ? | ||
43 | + RISCV_HWPROBE_EXT_ZBA : 0; | ||
44 | + value |= cfg->ext_zbb ? | ||
45 | + RISCV_HWPROBE_EXT_ZBB : 0; | ||
46 | + value |= cfg->ext_zbs ? | ||
47 | + RISCV_HWPROBE_EXT_ZBS : 0; | ||
48 | __put_user(value, &pair->value); | ||
49 | break; | ||
50 | case RISCV_HWPROBE_KEY_CPUPERF_0: | ||
51 | -- | ||
52 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: Ard Biesheuvel <ardb@kernel.org> |
---|---|---|---|
2 | 2 | ||
3 | No functional change intended in this commit. | 3 | Use the accelerated SubBytes/ShiftRows/AddRoundKey AES helper to |
4 | implement the first half of the key schedule derivation. This does not | ||
5 | actually involve shifting rows, so clone the same value into all four | ||
6 | columns of the AES vector to counter that operation. | ||
4 | 7 | ||
5 | Signed-off-by: eop Chen <eop.chen@sifive.com> | 8 | Cc: Richard Henderson <richard.henderson@linaro.org> |
6 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 9 | Cc: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Message-Id: <165449614532.19704.7000832880482980398-2@git.sr.ht> | 10 | Cc: Palmer Dabbelt <palmer@dabbelt.com> |
11 | Cc: Alistair Francis <alistair.francis@wdc.com> | ||
12 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | ||
13 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | Message-ID: <20230831154118.138727-1-ardb@kernel.org> | ||
8 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 16 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
9 | --- | 17 | --- |
10 | target/riscv/vector_helper.c | 35 ++++++++++++++++------------------- | 18 | target/riscv/crypto_helper.c | 17 +++++------------ |
11 | 1 file changed, 16 insertions(+), 19 deletions(-) | 19 | 1 file changed, 5 insertions(+), 12 deletions(-) |
12 | 20 | ||
13 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 21 | diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c |
14 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/riscv/vector_helper.c | 23 | --- a/target/riscv/crypto_helper.c |
16 | +++ b/target/riscv/vector_helper.c | 24 | +++ b/target/riscv/crypto_helper.c |
17 | @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, | 25 | @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(aes64ks1i)(target_ulong rs1, target_ulong rnum) |
18 | target_ulong stride, CPURISCVState *env, | 26 | |
19 | uint32_t desc, uint32_t vm, | 27 | uint8_t enc_rnum = rnum; |
20 | vext_ldst_elem_fn *ldst_elem, | 28 | uint32_t temp = (RS1 >> 32) & 0xFFFFFFFF; |
21 | - uint32_t esz, uintptr_t ra, MMUAccessType access_type) | 29 | - uint8_t rcon_ = 0; |
22 | + uint32_t esz, uintptr_t ra) | 30 | - target_ulong result; |
23 | { | 31 | + AESState t, rc = {}; |
24 | uint32_t i, k; | 32 | |
25 | uint32_t nf = vext_nf(desc); | 33 | if (enc_rnum != 0xA) { |
26 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ | 34 | temp = ror32(temp, 8); /* Rotate right by 8 */ |
27 | { \ | 35 | - rcon_ = round_consts[enc_rnum]; |
28 | uint32_t vm = vext_vm(desc); \ | 36 | + rc.w[0] = rc.w[1] = round_consts[enc_rnum]; |
29 | vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ | 37 | } |
30 | - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ | 38 | |
31 | + ctzl(sizeof(ETYPE)), GETPC()); \ | 39 | - temp = ((uint32_t)AES_sbox[(temp >> 24) & 0xFF] << 24) | |
40 | - ((uint32_t)AES_sbox[(temp >> 16) & 0xFF] << 16) | | ||
41 | - ((uint32_t)AES_sbox[(temp >> 8) & 0xFF] << 8) | | ||
42 | - ((uint32_t)AES_sbox[(temp >> 0) & 0xFF] << 0); | ||
43 | + t.w[0] = t.w[1] = t.w[2] = t.w[3] = temp; | ||
44 | + aesenc_SB_SR_AK(&t, &t, &rc, false); | ||
45 | |||
46 | - temp ^= rcon_; | ||
47 | - | ||
48 | - result = ((uint64_t)temp << 32) | temp; | ||
49 | - | ||
50 | - return result; | ||
51 | + return t.d[0]; | ||
32 | } | 52 | } |
33 | 53 | ||
34 | GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) | 54 | target_ulong HELPER(aes64im)(target_ulong rs1) |
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
36 | { \ | ||
37 | uint32_t vm = vext_vm(desc); \ | ||
38 | vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ | ||
39 | - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ | ||
40 | + ctzl(sizeof(ETYPE)), GETPC()); \ | ||
41 | } | ||
42 | |||
43 | GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) | ||
44 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) | ||
45 | static void | ||
46 | vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
47 | vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, | ||
48 | - uintptr_t ra, MMUAccessType access_type) | ||
49 | + uintptr_t ra) | ||
50 | { | ||
51 | uint32_t i, k; | ||
52 | uint32_t nf = vext_nf(desc); | ||
53 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ | ||
54 | { \ | ||
55 | uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ | ||
56 | vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ | ||
57 | - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ | ||
58 | + ctzl(sizeof(ETYPE)), GETPC()); \ | ||
59 | } \ | ||
60 | \ | ||
61 | void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
62 | CPURISCVState *env, uint32_t desc) \ | ||
63 | { \ | ||
64 | vext_ldst_us(vd, base, env, desc, LOAD_FN, \ | ||
65 | - ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \ | ||
66 | + ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ | ||
67 | } | ||
68 | |||
69 | GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) | ||
70 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ | ||
71 | { \ | ||
72 | uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ | ||
73 | vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ | ||
74 | - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ | ||
75 | + ctzl(sizeof(ETYPE)), GETPC()); \ | ||
76 | } \ | ||
77 | \ | ||
78 | void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
79 | CPURISCVState *env, uint32_t desc) \ | ||
80 | { \ | ||
81 | vext_ldst_us(vd, base, env, desc, STORE_FN, \ | ||
82 | - ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \ | ||
83 | + ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ | ||
84 | } | ||
85 | |||
86 | GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) | ||
87 | @@ -XXX,XX +XXX,XX @@ void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, | ||
88 | /* evl = ceil(vl/8) */ | ||
89 | uint8_t evl = (env->vl + 7) >> 3; | ||
90 | vext_ldst_us(vd, base, env, desc, lde_b, | ||
91 | - 0, evl, GETPC(), MMU_DATA_LOAD); | ||
92 | + 0, evl, GETPC()); | ||
93 | } | ||
94 | |||
95 | void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, | ||
96 | @@ -XXX,XX +XXX,XX @@ void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, | ||
97 | /* evl = ceil(vl/8) */ | ||
98 | uint8_t evl = (env->vl + 7) >> 3; | ||
99 | vext_ldst_us(vd, base, env, desc, ste_b, | ||
100 | - 0, evl, GETPC(), MMU_DATA_STORE); | ||
101 | + 0, evl, GETPC()); | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, | ||
106 | void *vs2, CPURISCVState *env, uint32_t desc, | ||
107 | vext_get_index_addr get_index_addr, | ||
108 | vext_ldst_elem_fn *ldst_elem, | ||
109 | - uint32_t esz, uintptr_t ra, MMUAccessType access_type) | ||
110 | + uint32_t esz, uintptr_t ra) | ||
111 | { | ||
112 | uint32_t i, k; | ||
113 | uint32_t nf = vext_nf(desc); | ||
114 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
115 | void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
116 | { \ | ||
117 | vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ | ||
118 | - LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ | ||
119 | + LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ | ||
120 | } | ||
121 | |||
122 | GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) | ||
123 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
124 | { \ | ||
125 | vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ | ||
126 | STORE_FN, ctzl(sizeof(ETYPE)), \ | ||
127 | - GETPC(), MMU_DATA_STORE); \ | ||
128 | + GETPC()); \ | ||
129 | } | ||
130 | |||
131 | GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) | ||
132 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) | ||
133 | */ | ||
134 | static void | ||
135 | vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
136 | - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, | ||
137 | - MMUAccessType access_type) | ||
138 | + vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra) | ||
139 | { | ||
140 | uint32_t i, k, off, pos; | ||
141 | uint32_t nf = vext_nf(desc); | ||
142 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, target_ulong base, \ | ||
143 | CPURISCVState *env, uint32_t desc) \ | ||
144 | { \ | ||
145 | vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ | ||
146 | - ctzl(sizeof(ETYPE)), GETPC(), \ | ||
147 | - MMU_DATA_LOAD); \ | ||
148 | + ctzl(sizeof(ETYPE)), GETPC()); \ | ||
149 | } | ||
150 | |||
151 | GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) | ||
152 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, target_ulong base, \ | ||
153 | CPURISCVState *env, uint32_t desc) \ | ||
154 | { \ | ||
155 | vext_ldst_whole(vd, base, env, desc, STORE_FN, \ | ||
156 | - ctzl(sizeof(ETYPE)), GETPC(), \ | ||
157 | - MMU_DATA_STORE); \ | ||
158 | + ctzl(sizeof(ETYPE)), GETPC()); \ | ||
159 | } | ||
160 | |||
161 | GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) | ||
162 | -- | 55 | -- |
163 | 2.36.1 | 56 | 2.41.0 |
57 | |||
58 | diff view generated by jsdifflib |
1 | From: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr> | 1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> |
---|---|---|---|
2 | 2 | ||
3 | Add an MXL_RV128 case in two switches so that no error is triggered when | 3 | riscv_trigger_init() had been called on reset events that can happen |
4 | using the -cpu x-rv128 option. | 4 | several times for a CPU and it allocated timers for itrigger. If old |
5 | timers were present, they were simply overwritten by the new timers, | ||
6 | resulting in a memory leak. | ||
5 | 7 | ||
6 | Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr> | 8 | Divide riscv_trigger_init() into two functions, namely |
7 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | 9 | riscv_trigger_realize() and riscv_trigger_reset() and call them in |
8 | Reviewed-by: Bin Meng <bmeng.cn@gmail.com> | 10 | appropriate timing. The timer allocation will happen only once for a |
9 | Message-Id: <20220602155246.38837-1-frederic.petrot@univ-grenoble-alpes.fr> | 11 | CPU in riscv_trigger_realize(). |
12 | |||
13 | Fixes: 5a4ae64cac ("target/riscv: Add itrigger support when icount is enabled") | ||
14 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
15 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
16 | Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> | ||
17 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
18 | Message-ID: <20230818034059.9146-1-akihiko.odaki@daynix.com> | ||
10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 19 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
11 | --- | 20 | --- |
12 | target/riscv/debug.c | 2 ++ | 21 | target/riscv/debug.h | 3 ++- |
13 | 1 file changed, 2 insertions(+) | 22 | target/riscv/cpu.c | 8 +++++++- |
23 | target/riscv/debug.c | 15 ++++++++++++--- | ||
24 | 3 files changed, 21 insertions(+), 5 deletions(-) | ||
14 | 25 | ||
26 | diff --git a/target/riscv/debug.h b/target/riscv/debug.h | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/target/riscv/debug.h | ||
29 | +++ b/target/riscv/debug.h | ||
30 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_debug_excp_handler(CPUState *cs); | ||
31 | bool riscv_cpu_debug_check_breakpoint(CPUState *cs); | ||
32 | bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp); | ||
33 | |||
34 | -void riscv_trigger_init(CPURISCVState *env); | ||
35 | +void riscv_trigger_realize(CPURISCVState *env); | ||
36 | +void riscv_trigger_reset_hold(CPURISCVState *env); | ||
37 | |||
38 | bool riscv_itrigger_enabled(CPURISCVState *env); | ||
39 | void riscv_itrigger_update_priv(CPURISCVState *env); | ||
40 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/target/riscv/cpu.c | ||
43 | +++ b/target/riscv/cpu.c | ||
44 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_reset_hold(Object *obj) | ||
45 | |||
46 | #ifndef CONFIG_USER_ONLY | ||
47 | if (cpu->cfg.debug) { | ||
48 | - riscv_trigger_init(env); | ||
49 | + riscv_trigger_reset_hold(env); | ||
50 | } | ||
51 | |||
52 | if (kvm_enabled()) { | ||
53 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) | ||
54 | |||
55 | riscv_cpu_register_gdb_regs_for_features(cs); | ||
56 | |||
57 | +#ifndef CONFIG_USER_ONLY | ||
58 | + if (cpu->cfg.debug) { | ||
59 | + riscv_trigger_realize(&cpu->env); | ||
60 | + } | ||
61 | +#endif | ||
62 | + | ||
63 | qemu_init_vcpu(cs); | ||
64 | cpu_reset(cs); | ||
65 | |||
15 | diff --git a/target/riscv/debug.c b/target/riscv/debug.c | 66 | diff --git a/target/riscv/debug.c b/target/riscv/debug.c |
16 | index XXXXXXX..XXXXXXX 100644 | 67 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/debug.c | 68 | --- a/target/riscv/debug.c |
18 | +++ b/target/riscv/debug.c | 69 | +++ b/target/riscv/debug.c |
19 | @@ -XXX,XX +XXX,XX @@ static inline target_ulong trigger_type(CPURISCVState *env, | 70 | @@ -XXX,XX +XXX,XX @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) |
20 | tdata1 = RV32_TYPE(type); | 71 | return false; |
21 | break; | 72 | } |
22 | case MXL_RV64: | 73 | |
23 | + case MXL_RV128: | 74 | -void riscv_trigger_init(CPURISCVState *env) |
24 | tdata1 = RV64_TYPE(type); | 75 | +void riscv_trigger_realize(CPURISCVState *env) |
25 | break; | 76 | +{ |
26 | default: | 77 | + int i; |
27 | @@ -XXX,XX +XXX,XX @@ static target_ulong tdata1_validate(CPURISCVState *env, target_ulong val, | 78 | + |
28 | tdata1 = RV32_TYPE(t); | 79 | + for (i = 0; i < RV_MAX_TRIGGERS; i++) { |
29 | break; | 80 | + env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL, |
30 | case MXL_RV64: | 81 | + riscv_itrigger_timer_cb, env); |
31 | + case MXL_RV128: | 82 | + } |
32 | type = extract64(val, 60, 4); | 83 | +} |
33 | dmode = extract64(val, 59, 1); | 84 | + |
34 | tdata1 = RV64_TYPE(t); | 85 | +void riscv_trigger_reset_hold(CPURISCVState *env) |
86 | { | ||
87 | target_ulong tdata1 = build_tdata1(env, TRIGGER_TYPE_AD_MATCH, 0, 0); | ||
88 | int i; | ||
89 | @@ -XXX,XX +XXX,XX @@ void riscv_trigger_init(CPURISCVState *env) | ||
90 | env->tdata3[i] = 0; | ||
91 | env->cpu_breakpoint[i] = NULL; | ||
92 | env->cpu_watchpoint[i] = NULL; | ||
93 | - env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL, | ||
94 | - riscv_itrigger_timer_cb, env); | ||
95 | + timer_del(env->itrigger_timer[i]); | ||
96 | } | ||
97 | } | ||
35 | -- | 98 | -- |
36 | 2.36.1 | 99 | 2.41.0 |
100 | |||
101 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Leon Schuermann <leons@opentitan.org> | ||
1 | 2 | ||
3 | When the rule-lock bypass (RLB) bit is set in the mseccfg CSR, the PMP | ||
4 | configuration lock bits must not apply. While this behavior is | ||
5 | implemented for the pmpcfgX CSRs, this bit is not respected for | ||
6 | changes to the pmpaddrX CSRs. This patch ensures that pmpaddrX CSR | ||
7 | writes work even on locked regions when the global rule-lock bypass is | ||
8 | enabled. | ||
9 | |||
10 | Signed-off-by: Leon Schuermann <leons@opentitan.org> | ||
11 | Reviewed-by: Mayuresh Chitale <mchitale@ventanamicro.com> | ||
12 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | Message-ID: <20230829215046.1430463-1-leon@is.currently.online> | ||
14 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
15 | --- | ||
16 | target/riscv/pmp.c | 4 ++++ | ||
17 | 1 file changed, 4 insertions(+) | ||
18 | |||
19 | diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/riscv/pmp.c | ||
22 | +++ b/target/riscv/pmp.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static inline uint8_t pmp_get_a_field(uint8_t cfg) | ||
24 | */ | ||
25 | static inline int pmp_is_locked(CPURISCVState *env, uint32_t pmp_index) | ||
26 | { | ||
27 | + /* mseccfg.RLB is set */ | ||
28 | + if (MSECCFG_RLB_ISSET(env)) { | ||
29 | + return 0; | ||
30 | + } | ||
31 | |||
32 | if (env->pmp_state.pmp[pmp_index].cfg_reg & PMP_LOCK) { | ||
33 | return 1; | ||
34 | -- | ||
35 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Tommy Wu <tommy.wu@sifive.com> | ||
1 | 2 | ||
3 | According to the new spec, when vsiselect has a reserved value, attempts | ||
4 | from M-mode or HS-mode to access vsireg, or from VS-mode to access | ||
5 | sireg, should preferably raise an illegal instruction exception. | ||
6 | |||
7 | Signed-off-by: Tommy Wu <tommy.wu@sifive.com> | ||
8 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
9 | Message-ID: <20230816061647.600672-1-tommy.wu@sifive.com> | ||
10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
11 | --- | ||
12 | target/riscv/csr.c | 7 +++++-- | ||
13 | 1 file changed, 5 insertions(+), 2 deletions(-) | ||
14 | |||
15 | diff --git a/target/riscv/csr.c b/target/riscv/csr.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/csr.c | ||
18 | +++ b/target/riscv/csr.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static int rmw_iprio(target_ulong xlen, | ||
20 | static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val, | ||
21 | target_ulong new_val, target_ulong wr_mask) | ||
22 | { | ||
23 | - bool virt; | ||
24 | + bool virt, isel_reserved; | ||
25 | uint8_t *iprio; | ||
26 | int ret = -EINVAL; | ||
27 | target_ulong priv, isel, vgein; | ||
28 | @@ -XXX,XX +XXX,XX @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val, | ||
29 | |||
30 | /* Decode register details from CSR number */ | ||
31 | virt = false; | ||
32 | + isel_reserved = false; | ||
33 | switch (csrno) { | ||
34 | case CSR_MIREG: | ||
35 | iprio = env->miprio; | ||
36 | @@ -XXX,XX +XXX,XX @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val, | ||
37 | riscv_cpu_mxl_bits(env)), | ||
38 | val, new_val, wr_mask); | ||
39 | } | ||
40 | + } else { | ||
41 | + isel_reserved = true; | ||
42 | } | ||
43 | |||
44 | done: | ||
45 | if (ret) { | ||
46 | - return (env->virt_enabled && virt) ? | ||
47 | + return (env->virt_enabled && virt && !isel_reserved) ? | ||
48 | RISCV_EXCP_VIRT_INSTRUCTION_FAULT : RISCV_EXCP_ILLEGAL_INST; | ||
49 | } | ||
50 | return RISCV_EXCP_NONE; | ||
51 | -- | ||
52 | 2.41.0 | diff view generated by jsdifflib |
1 | From: eopXD <yueh.ting.chen@gmail.com> | 1 | From: Nikita Shubin <n.shubin@yadro.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: eop Chen <eop.chen@sifive.com> | 3 | As per ISA: |
4 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | 4 | |
5 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | 5 | "For CSRRWI, if rd=x0, then the instruction shall not read the CSR and |
6 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | 6 | shall not cause any of the side effects that might occur on a CSR read." |
7 | Message-Id: <165449614532.19704.7000832880482980398-15@git.sr.ht> | 7 | |
8 | trans_csrrwi() and trans_csrrw() call do_csrw() if rd=x0, do_csrw() calls | ||
9 | riscv_csrrw_do64(), via helper_csrw() passing NULL as *ret_value. | ||
10 | |||
11 | Signed-off-by: Nikita Shubin <n.shubin@yadro.com> | ||
12 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | Message-ID: <20230808090914.17634-1-nikita.shubin@maquefel.me> | ||
8 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 14 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
9 | --- | 15 | --- |
10 | target/riscv/vector_helper.c | 40 +++++++++++++++++++++++++ | 16 | target/riscv/csr.c | 24 +++++++++++++++--------- |
11 | target/riscv/insn_trans/trans_rvv.c.inc | 7 +++-- | 17 | 1 file changed, 15 insertions(+), 9 deletions(-) |
12 | 2 files changed, 45 insertions(+), 2 deletions(-) | ||
13 | 18 | ||
14 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 19 | diff --git a/target/riscv/csr.c b/target/riscv/csr.c |
15 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/riscv/vector_helper.c | 21 | --- a/target/riscv/csr.c |
17 | +++ b/target/riscv/vector_helper.c | 22 | +++ b/target/riscv/csr.c |
18 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | 23 | @@ -XXX,XX +XXX,XX @@ static RISCVException riscv_csrrw_do64(CPURISCVState *env, int csrno, |
19 | { \ | 24 | target_ulong write_mask) |
20 | uint32_t vm = vext_vm(desc); \ | 25 | { |
21 | uint32_t vl = env->vl; \ | 26 | RISCVException ret; |
22 | + uint32_t esz = sizeof(ETYPE); \ | 27 | - target_ulong old_value; |
23 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | 28 | + target_ulong old_value = 0; |
24 | + uint32_t vta = vext_vta(desc); \ | 29 | |
25 | target_ulong offset = s1, i_min, i; \ | 30 | /* execute combined read/write operation if it exists */ |
26 | \ | 31 | if (csr_ops[csrno].op) { |
27 | i_min = MAX(env->vstart, offset); \ | 32 | return csr_ops[csrno].op(env, csrno, ret_value, new_value, write_mask); |
28 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
29 | } \ | ||
30 | *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ | ||
31 | } \ | ||
32 | + /* set tail elements to 1s */ \ | ||
33 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
34 | } | ||
35 | |||
36 | /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ | ||
37 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
38 | uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ | ||
39 | uint32_t vm = vext_vm(desc); \ | ||
40 | uint32_t vl = env->vl; \ | ||
41 | + uint32_t esz = sizeof(ETYPE); \ | ||
42 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
43 | + uint32_t vta = vext_vta(desc); \ | ||
44 | target_ulong i_max, i; \ | ||
45 | \ | ||
46 | i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ | ||
47 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
48 | } \ | ||
49 | \ | ||
50 | env->vstart = 0; \ | ||
51 | + /* set tail elements to 1s */ \ | ||
52 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
53 | } | ||
54 | |||
55 | /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ | ||
56 | @@ -XXX,XX +XXX,XX @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ | ||
57 | typedef uint##BITWIDTH##_t ETYPE; \ | ||
58 | uint32_t vm = vext_vm(desc); \ | ||
59 | uint32_t vl = env->vl; \ | ||
60 | + uint32_t esz = sizeof(ETYPE); \ | ||
61 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
62 | + uint32_t vta = vext_vta(desc); \ | ||
63 | uint32_t i; \ | ||
64 | \ | ||
65 | for (i = env->vstart; i < vl; i++) { \ | ||
66 | @@ -XXX,XX +XXX,XX @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ | ||
67 | } \ | ||
68 | } \ | ||
69 | env->vstart = 0; \ | ||
70 | + /* set tail elements to 1s */ \ | ||
71 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
72 | } | ||
73 | |||
74 | GEN_VEXT_VSLIE1UP(8, H1) | ||
75 | @@ -XXX,XX +XXX,XX @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ | ||
76 | typedef uint##BITWIDTH##_t ETYPE; \ | ||
77 | uint32_t vm = vext_vm(desc); \ | ||
78 | uint32_t vl = env->vl; \ | ||
79 | + uint32_t esz = sizeof(ETYPE); \ | ||
80 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
81 | + uint32_t vta = vext_vta(desc); \ | ||
82 | uint32_t i; \ | ||
83 | \ | ||
84 | for (i = env->vstart; i < vl; i++) { \ | ||
85 | @@ -XXX,XX +XXX,XX @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ | ||
86 | } \ | ||
87 | } \ | ||
88 | env->vstart = 0; \ | ||
89 | + /* set tail elements to 1s */ \ | ||
90 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
91 | } | ||
92 | |||
93 | GEN_VEXT_VSLIDE1DOWN(8, H1) | ||
94 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
95 | uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ | ||
96 | uint32_t vm = vext_vm(desc); \ | ||
97 | uint32_t vl = env->vl; \ | ||
98 | + uint32_t esz = sizeof(TS2); \ | ||
99 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
100 | + uint32_t vta = vext_vta(desc); \ | ||
101 | uint64_t index; \ | ||
102 | uint32_t i; \ | ||
103 | \ | ||
104 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
105 | } \ | ||
106 | } \ | ||
107 | env->vstart = 0; \ | ||
108 | + /* set tail elements to 1s */ \ | ||
109 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
110 | } | ||
111 | |||
112 | /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ | ||
113 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
114 | uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ | ||
115 | uint32_t vm = vext_vm(desc); \ | ||
116 | uint32_t vl = env->vl; \ | ||
117 | + uint32_t esz = sizeof(ETYPE); \ | ||
118 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
119 | + uint32_t vta = vext_vta(desc); \ | ||
120 | uint64_t index = s1; \ | ||
121 | uint32_t i; \ | ||
122 | \ | ||
123 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
124 | } \ | ||
125 | } \ | ||
126 | env->vstart = 0; \ | ||
127 | + /* set tail elements to 1s */ \ | ||
128 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
129 | } | ||
130 | |||
131 | /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ | ||
132 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
133 | CPURISCVState *env, uint32_t desc) \ | ||
134 | { \ | ||
135 | uint32_t vl = env->vl; \ | ||
136 | + uint32_t esz = sizeof(ETYPE); \ | ||
137 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
138 | + uint32_t vta = vext_vta(desc); \ | ||
139 | uint32_t num = 0, i; \ | ||
140 | \ | ||
141 | for (i = env->vstart; i < vl; i++) { \ | ||
142 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
143 | num++; \ | ||
144 | } \ | ||
145 | env->vstart = 0; \ | ||
146 | + /* set tail elements to 1s */ \ | ||
147 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
148 | } | ||
149 | |||
150 | /* Compress into vd elements of vs2 where vs1 is enabled */ | ||
151 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
152 | { \ | ||
153 | uint32_t vl = env->vl; \ | ||
154 | uint32_t vm = vext_vm(desc); \ | ||
155 | + uint32_t esz = sizeof(ETYPE); \ | ||
156 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
157 | + uint32_t vta = vext_vta(desc); \ | ||
158 | uint32_t i; \ | ||
159 | \ | ||
160 | for (i = env->vstart; i < vl; i++) { \ | ||
161 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
162 | *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ | ||
163 | } \ | ||
164 | env->vstart = 0; \ | ||
165 | + /* set tail elements to 1s */ \ | ||
166 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
167 | } | ||
168 | |||
169 | GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) | ||
170 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
171 | index XXXXXXX..XXXXXXX 100644 | ||
172 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
173 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
174 | @@ -XXX,XX +XXX,XX @@ static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a) | ||
175 | return false; | ||
176 | } | 33 | } |
177 | 34 | ||
178 | - if (a->vm && s->vl_eq_vlmax) { | 35 | - /* if no accessor exists then return failure */ |
179 | + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | 36 | - if (!csr_ops[csrno].read) { |
180 | int scale = s->lmul - (s->sew + 3); | 37 | - return RISCV_EXCP_ILLEGAL_INST; |
181 | int vlmax = s->cfg_ptr->vlen >> -scale; | 38 | - } |
182 | TCGv_i64 dest = tcg_temp_new_i64(); | 39 | - /* read old value */ |
183 | @@ -XXX,XX +XXX,XX @@ static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a) | 40 | - ret = csr_ops[csrno].read(env, csrno, &old_value); |
184 | return false; | 41 | - if (ret != RISCV_EXCP_NONE) { |
42 | - return ret; | ||
43 | + /* | ||
44 | + * ret_value == NULL means that rd=x0 and we're coming from helper_csrw() | ||
45 | + * and we can't throw side effects caused by CSR reads. | ||
46 | + */ | ||
47 | + if (ret_value) { | ||
48 | + /* if no accessor exists then return failure */ | ||
49 | + if (!csr_ops[csrno].read) { | ||
50 | + return RISCV_EXCP_ILLEGAL_INST; | ||
51 | + } | ||
52 | + /* read old value */ | ||
53 | + ret = csr_ops[csrno].read(env, csrno, &old_value); | ||
54 | + if (ret != RISCV_EXCP_NONE) { | ||
55 | + return ret; | ||
56 | + } | ||
185 | } | 57 | } |
186 | 58 | ||
187 | - if (a->vm && s->vl_eq_vlmax) { | 59 | /* write value if writable and write mask set, otherwise drop writes */ |
188 | + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
189 | int scale = s->lmul - (s->sew + 3); | ||
190 | int vlmax = s->cfg_ptr->vlen >> -scale; | ||
191 | if (a->rs1 >= vlmax) { | ||
192 | @@ -XXX,XX +XXX,XX @@ static bool trans_vcompress_vm(DisasContext *s, arg_r *a) | ||
193 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
194 | |||
195 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
196 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
197 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
198 | vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), | ||
199 | cpu_env, s->cfg_ptr->vlen / 8, | ||
200 | @@ -XXX,XX +XXX,XX @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) | ||
201 | } | ||
202 | |||
203 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
204 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
205 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
206 | |||
207 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
208 | vreg_ofs(s, a->rs2), cpu_env, | ||
209 | -- | 60 | -- |
210 | 2.36.1 | 61 | 2.41.0 | diff view generated by jsdifflib |