1 | The following changes since commit 553cf5d7c47bee05a3dec9461c1f8430316d516b: | 1 | The following changes since commit c5ea91da443b458352c1b629b490ee6631775cb4: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200626' into staging (2020-06-26 18:22:36 +0100) | 3 | Merge tag 'pull-trivial-patches' of https://gitlab.com/mjt0k/qemu into staging (2023-09-08 10:06:25 -0400) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git@github.com:alistair23/qemu.git tags/pull-riscv-to-apply-20200626-1 | 7 | https://github.com/alistair23/qemu.git tags/pull-riscv-to-apply-20230911 |
8 | 8 | ||
9 | for you to fetch changes up to b39d59434ea10649fdb9e0a339c30c76e38c5e17: | 9 | for you to fetch changes up to e7a03409f29e2da59297d55afbaec98c96e43e3a: |
10 | 10 | ||
11 | target/riscv: configure and turn on vector extension from command line (2020-06-26 14:22:15 -0700) | 11 | target/riscv: don't read CSR in riscv_csrrw_do64 (2023-09-11 11:45:55 +1000) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | This PR contains two patches to improve PLIC support in QEMU. | 14 | First RISC-V PR for 8.2 |
15 | 15 | ||
16 | The rest of the PR is adding support for the v0.7.1 RISC-V vector | 16 | * Remove 'host' CPU from TCG |
17 | extensions. This is experimental support as the vector extensions are | 17 | * riscv_htif Fixup printing on big endian hosts |
18 | still in a draft state. | 18 | * Add zmmul isa string |
19 | * Add smepmp isa string | ||
20 | * Fix page_check_range use in fault-only-first | ||
21 | * Use existing lookup tables for MixColumns | ||
22 | * Add RISC-V vector cryptographic instruction set support | ||
23 | * Implement WARL behaviour for mcountinhibit/mcounteren | ||
24 | * Add Zihintntl extension ISA string to DTS | ||
25 | * Fix zfa fleq.d and fltq.d | ||
26 | * Fix upper/lower mtime write calculation | ||
27 | * Make rtc variable names consistent | ||
28 | * Use abi type for linux-user target_ucontext | ||
29 | * Add RISC-V KVM AIA Support | ||
30 | * Fix riscv,pmu DT node path in the virt machine | ||
31 | * Update CSR bits name for svadu extension | ||
32 | * Mark zicond non-experimental | ||
33 | * Fix satp_mode_finalize() when satp_mode.supported = 0 | ||
34 | * Fix non-KVM --enable-debug build | ||
35 | * Add new extensions to hwprobe | ||
36 | * Use accelerated helper for AES64KS1I | ||
37 | * Allocate itrigger timers only once | ||
38 | * Respect mseccfg.RLB for pmpaddrX changes | ||
39 | * Align the AIA model to v1.0 ratified spec | ||
40 | * Don't read the CSR in riscv_csrrw_do64 | ||
19 | 41 | ||
20 | ---------------------------------------------------------------- | 42 | ---------------------------------------------------------------- |
21 | Jessica Clarke (2): | 43 | Akihiko Odaki (1): |
22 | riscv: plic: Honour source priorities | 44 | target/riscv: Allocate itrigger timers only once |
23 | riscv: plic: Add a couple of mising sifive_plic_update calls | ||
24 | 45 | ||
25 | LIU Zhiwei (61): | 46 | Ard Biesheuvel (2): |
26 | target/riscv: add vector extension field in CPURISCVState | 47 | target/riscv: Use existing lookup tables for MixColumns |
27 | target/riscv: implementation-defined constant parameters | 48 | target/riscv: Use accelerated helper for AES64KS1I |
28 | target/riscv: support vector extension csr | ||
29 | target/riscv: add vector configure instruction | ||
30 | target/riscv: add an internals.h header | ||
31 | target/riscv: add vector stride load and store instructions | ||
32 | target/riscv: add vector index load and store instructions | ||
33 | target/riscv: add fault-only-first unit stride load | ||
34 | target/riscv: add vector amo operations | ||
35 | target/riscv: vector single-width integer add and subtract | ||
36 | target/riscv: vector widening integer add and subtract | ||
37 | target/riscv: vector integer add-with-carry / subtract-with-borrow instructions | ||
38 | target/riscv: vector bitwise logical instructions | ||
39 | target/riscv: vector single-width bit shift instructions | ||
40 | target/riscv: vector narrowing integer right shift instructions | ||
41 | target/riscv: vector integer comparison instructions | ||
42 | target/riscv: vector integer min/max instructions | ||
43 | target/riscv: vector single-width integer multiply instructions | ||
44 | target/riscv: vector integer divide instructions | ||
45 | target/riscv: vector widening integer multiply instructions | ||
46 | target/riscv: vector single-width integer multiply-add instructions | ||
47 | target/riscv: vector widening integer multiply-add instructions | ||
48 | target/riscv: vector integer merge and move instructions | ||
49 | target/riscv: vector single-width saturating add and subtract | ||
50 | target/riscv: vector single-width averaging add and subtract | ||
51 | target/riscv: vector single-width fractional multiply with rounding and saturation | ||
52 | target/riscv: vector widening saturating scaled multiply-add | ||
53 | target/riscv: vector single-width scaling shift instructions | ||
54 | target/riscv: vector narrowing fixed-point clip instructions | ||
55 | target/riscv: vector single-width floating-point add/subtract instructions | ||
56 | target/riscv: vector widening floating-point add/subtract instructions | ||
57 | target/riscv: vector single-width floating-point multiply/divide instructions | ||
58 | target/riscv: vector widening floating-point multiply | ||
59 | target/riscv: vector single-width floating-point fused multiply-add instructions | ||
60 | target/riscv: vector widening floating-point fused multiply-add instructions | ||
61 | target/riscv: vector floating-point square-root instruction | ||
62 | target/riscv: vector floating-point min/max instructions | ||
63 | target/riscv: vector floating-point sign-injection instructions | ||
64 | target/riscv: vector floating-point compare instructions | ||
65 | target/riscv: vector floating-point classify instructions | ||
66 | target/riscv: vector floating-point merge instructions | ||
67 | target/riscv: vector floating-point/integer type-convert instructions | ||
68 | target/riscv: widening floating-point/integer type-convert instructions | ||
69 | target/riscv: narrowing floating-point/integer type-convert instructions | ||
70 | target/riscv: vector single-width integer reduction instructions | ||
71 | target/riscv: vector wideing integer reduction instructions | ||
72 | target/riscv: vector single-width floating-point reduction instructions | ||
73 | target/riscv: vector widening floating-point reduction instructions | ||
74 | target/riscv: vector mask-register logical instructions | ||
75 | target/riscv: vector mask population count vmpopc | ||
76 | target/riscv: vmfirst find-first-set mask bit | ||
77 | target/riscv: set-X-first mask bit | ||
78 | target/riscv: vector iota instruction | ||
79 | target/riscv: vector element index instruction | ||
80 | target/riscv: integer extract instruction | ||
81 | target/riscv: integer scalar move instruction | ||
82 | target/riscv: floating-point scalar move instructions | ||
83 | target/riscv: vector slide instructions | ||
84 | target/riscv: vector register gather instruction | ||
85 | target/riscv: vector compress instruction | ||
86 | target/riscv: configure and turn on vector extension from command line | ||
87 | 49 | ||
88 | target/riscv/cpu.h | 82 +- | 50 | Conor Dooley (1): |
89 | target/riscv/cpu_bits.h | 15 + | 51 | hw/riscv: virt: Fix riscv,pmu DT node path |
90 | target/riscv/helper.h | 1069 +++++++ | ||
91 | target/riscv/internals.h | 41 + | ||
92 | target/riscv/insn32-64.decode | 11 + | ||
93 | target/riscv/insn32.decode | 372 +++ | ||
94 | hw/riscv/sifive_plic.c | 20 +- | ||
95 | target/riscv/cpu.c | 50 + | ||
96 | target/riscv/csr.c | 75 +- | ||
97 | target/riscv/fpu_helper.c | 33 +- | ||
98 | target/riscv/insn_trans/trans_rvv.inc.c | 2888 ++++++++++++++++++ | ||
99 | target/riscv/translate.c | 27 +- | ||
100 | target/riscv/vector_helper.c | 4899 +++++++++++++++++++++++++++++++ | ||
101 | target/riscv/Makefile.objs | 2 +- | ||
102 | 14 files changed, 9534 insertions(+), 50 deletions(-) | ||
103 | create mode 100644 target/riscv/internals.h | ||
104 | create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c | ||
105 | create mode 100644 target/riscv/vector_helper.c | ||
106 | 52 | ||
53 | Daniel Henrique Barboza (6): | ||
54 | target/riscv/cpu.c: do not run 'host' CPU with TCG | ||
55 | target/riscv/cpu.c: add zmmul isa string | ||
56 | target/riscv/cpu.c: add smepmp isa string | ||
57 | target/riscv: fix satp_mode_finalize() when satp_mode.supported = 0 | ||
58 | hw/riscv/virt.c: fix non-KVM --enable-debug build | ||
59 | hw/intc/riscv_aplic.c fix non-KVM --enable-debug build | ||
60 | |||
61 | Dickon Hood (2): | ||
62 | target/riscv: Refactor translation of vector-widening instruction | ||
63 | target/riscv: Add Zvbb ISA extension support | ||
64 | |||
65 | Jason Chien (3): | ||
66 | target/riscv: Add Zihintntl extension ISA string to DTS | ||
67 | hw/intc: Fix upper/lower mtime write calculation | ||
68 | hw/intc: Make rtc variable names consistent | ||
69 | |||
70 | Kiran Ostrolenk (4): | ||
71 | target/riscv: Refactor some of the generic vector functionality | ||
72 | target/riscv: Refactor vector-vector translation macro | ||
73 | target/riscv: Refactor some of the generic vector functionality | ||
74 | target/riscv: Add Zvknh ISA extension support | ||
75 | |||
76 | LIU Zhiwei (3): | ||
77 | target/riscv: Fix page_check_range use in fault-only-first | ||
78 | target/riscv: Fix zfa fleq.d and fltq.d | ||
79 | linux-user/riscv: Use abi type for target_ucontext | ||
80 | |||
81 | Lawrence Hunter (2): | ||
82 | target/riscv: Add Zvbc ISA extension support | ||
83 | target/riscv: Add Zvksh ISA extension support | ||
84 | |||
85 | Leon Schuermann (1): | ||
86 | target/riscv/pmp.c: respect mseccfg.RLB for pmpaddrX changes | ||
87 | |||
88 | Max Chou (3): | ||
89 | crypto: Create sm4_subword | ||
90 | crypto: Add SM4 constant parameter CK | ||
91 | target/riscv: Add Zvksed ISA extension support | ||
92 | |||
93 | Nazar Kazakov (4): | ||
94 | target/riscv: Remove redundant "cpu_vl == 0" checks | ||
95 | target/riscv: Move vector translation checks | ||
96 | target/riscv: Add Zvkned ISA extension support | ||
97 | target/riscv: Add Zvkg ISA extension support | ||
98 | |||
99 | Nikita Shubin (1): | ||
100 | target/riscv: don't read CSR in riscv_csrrw_do64 | ||
101 | |||
102 | Rob Bradford (1): | ||
103 | target/riscv: Implement WARL behaviour for mcountinhibit/mcounteren | ||
104 | |||
105 | Robbin Ehn (1): | ||
106 | linux-user/riscv: Add new extensions to hwprobe | ||
107 | |||
108 | Thomas Huth (2): | ||
109 | hw/char/riscv_htif: Fix printing of console characters on big endian hosts | ||
110 | hw/char/riscv_htif: Fix the console syscall on big endian hosts | ||
111 | |||
112 | Tommy Wu (1): | ||
113 | target/riscv: Align the AIA model to v1.0 ratified spec | ||
114 | |||
115 | Vineet Gupta (1): | ||
116 | riscv: zicond: make non-experimental | ||
117 | |||
118 | Weiwei Li (1): | ||
119 | target/riscv: Update CSR bits name for svadu extension | ||
120 | |||
121 | Yong-Xuan Wang (5): | ||
122 | target/riscv: support the AIA device emulation with KVM enabled | ||
123 | target/riscv: check the in-kernel irqchip support | ||
124 | target/riscv: Create an KVM AIA irqchip | ||
125 | target/riscv: update APLIC and IMSIC to support KVM AIA | ||
126 | target/riscv: select KVM AIA in riscv virt machine | ||
127 | |||
128 | include/crypto/aes.h | 7 + | ||
129 | include/crypto/sm4.h | 9 + | ||
130 | target/riscv/cpu_bits.h | 8 +- | ||
131 | target/riscv/cpu_cfg.h | 9 + | ||
132 | target/riscv/debug.h | 3 +- | ||
133 | target/riscv/helper.h | 98 +++ | ||
134 | target/riscv/kvm_riscv.h | 5 + | ||
135 | target/riscv/vector_internals.h | 228 +++++++ | ||
136 | target/riscv/insn32.decode | 58 ++ | ||
137 | crypto/aes.c | 4 +- | ||
138 | crypto/sm4.c | 10 + | ||
139 | hw/char/riscv_htif.c | 12 +- | ||
140 | hw/intc/riscv_aclint.c | 11 +- | ||
141 | hw/intc/riscv_aplic.c | 52 +- | ||
142 | hw/intc/riscv_imsic.c | 25 +- | ||
143 | hw/riscv/virt.c | 374 ++++++------ | ||
144 | linux-user/riscv/signal.c | 4 +- | ||
145 | linux-user/syscall.c | 14 +- | ||
146 | target/arm/tcg/crypto_helper.c | 10 +- | ||
147 | target/riscv/cpu.c | 83 ++- | ||
148 | target/riscv/cpu_helper.c | 6 +- | ||
149 | target/riscv/crypto_helper.c | 51 +- | ||
150 | target/riscv/csr.c | 54 +- | ||
151 | target/riscv/debug.c | 15 +- | ||
152 | target/riscv/kvm.c | 201 ++++++- | ||
153 | target/riscv/pmp.c | 4 + | ||
154 | target/riscv/translate.c | 1 + | ||
155 | target/riscv/vcrypto_helper.c | 970 ++++++++++++++++++++++++++++++ | ||
156 | target/riscv/vector_helper.c | 245 +------- | ||
157 | target/riscv/vector_internals.c | 81 +++ | ||
158 | target/riscv/insn_trans/trans_rvv.c.inc | 171 +++--- | ||
159 | target/riscv/insn_trans/trans_rvvk.c.inc | 606 +++++++++++++++++++ | ||
160 | target/riscv/insn_trans/trans_rvzfa.c.inc | 4 +- | ||
161 | target/riscv/meson.build | 4 +- | ||
162 | 34 files changed, 2785 insertions(+), 652 deletions(-) | ||
163 | create mode 100644 target/riscv/vector_internals.h | ||
164 | create mode 100644 target/riscv/vcrypto_helper.c | ||
165 | create mode 100644 target/riscv/vector_internals.c | ||
166 | create mode 100644 target/riscv/insn_trans/trans_rvvk.c.inc | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | The 'host' CPU is available in a CONFIG_KVM build and it's currently |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | available for all accels, but is a KVM only CPU. This means that in a |
5 | Message-id: 20200623215920.2594-60-zhiwei_liu@c-sky.com | 5 | RISC-V KVM capable host we can do things like this: |
6 | |||
7 | $ ./build/qemu-system-riscv64 -M virt,accel=tcg -cpu host --nographic | ||
8 | qemu-system-riscv64: H extension requires priv spec 1.12.0 | ||
9 | |||
10 | This CPU does not have a priv spec because we don't filter its extensions | ||
11 | via priv spec. We shouldn't be reaching riscv_cpu_realize_tcg() at all | ||
12 | with the 'host' CPU. | ||
13 | |||
14 | We don't have a way to filter the 'host' CPU out of the available CPU | ||
15 | options (-cpu help) if the build includes both KVM and TCG. What we can | ||
16 | do is to error out during riscv_cpu_realize_tcg() if the user chooses | ||
17 | the 'host' CPU with accel=tcg: | ||
18 | |||
19 | $ ./build/qemu-system-riscv64 -M virt,accel=tcg -cpu host --nographic | ||
20 | qemu-system-riscv64: 'host' CPU is not compatible with TCG acceleration | ||
21 | |||
22 | Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
23 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
24 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
25 | Message-Id: <20230721133411.474105-1-dbarboza@ventanamicro.com> | ||
6 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 26 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
7 | --- | 27 | --- |
8 | target/riscv/helper.h | 9 +++ | 28 | target/riscv/cpu.c | 5 +++++ |
9 | target/riscv/insn32.decode | 3 + | 29 | 1 file changed, 5 insertions(+) |
10 | target/riscv/insn_trans/trans_rvv.inc.c | 78 +++++++++++++++++++++++++ | ||
11 | target/riscv/vector_helper.c | 60 +++++++++++++++++++ | ||
12 | 4 files changed, 150 insertions(+) | ||
13 | 30 | ||
14 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 31 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
15 | index XXXXXXX..XXXXXXX 100644 | 32 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/riscv/helper.h | 33 | --- a/target/riscv/cpu.c |
17 | +++ b/target/riscv/helper.h | 34 | +++ b/target/riscv/cpu.c |
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vslide1down_vx_b, void, ptr, ptr, tl, ptr, env, i32) | 35 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize_tcg(DeviceState *dev, Error **errp) |
19 | DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, tl, ptr, env, i32) | 36 | CPURISCVState *env = &cpu->env; |
20 | DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, tl, ptr, env, i32) | 37 | Error *local_err = NULL; |
21 | DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, tl, ptr, env, i32) | 38 | |
22 | + | 39 | + if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_CPU_HOST)) { |
23 | +DEF_HELPER_6(vrgather_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | 40 | + error_setg(errp, "'host' CPU is not compatible with TCG acceleration"); |
24 | +DEF_HELPER_6(vrgather_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 41 | + return; |
25 | +DEF_HELPER_6(vrgather_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vrgather_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vrgather_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vrgather_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vrgather_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vrgather_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
31 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/target/riscv/insn32.decode | ||
34 | +++ b/target/riscv/insn32.decode | ||
35 | @@ -XXX,XX +XXX,XX @@ vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm | ||
36 | vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm | ||
37 | vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm | ||
38 | vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm | ||
39 | +vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm | ||
40 | +vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm | ||
41 | +vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm | ||
42 | |||
43 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
44 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
45 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
48 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
49 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVI_TRANS(vslideup_vi, 1, vslideup_vx, slideup_check) | ||
50 | GEN_OPIVX_TRANS(vslidedown_vx, opivx_check) | ||
51 | GEN_OPIVX_TRANS(vslide1down_vx, opivx_check) | ||
52 | GEN_OPIVI_TRANS(vslidedown_vi, 1, vslidedown_vx, opivx_check) | ||
53 | + | ||
54 | +/* Vector Register Gather Instruction */ | ||
55 | +static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a) | ||
56 | +{ | ||
57 | + return (vext_check_isa_ill(s) && | ||
58 | + vext_check_overlap_mask(s, a->rd, a->vm, true) && | ||
59 | + vext_check_reg(s, a->rd, false) && | ||
60 | + vext_check_reg(s, a->rs1, false) && | ||
61 | + vext_check_reg(s, a->rs2, false) && | ||
62 | + (a->rd != a->rs2) && (a->rd != a->rs1)); | ||
63 | +} | ||
64 | + | ||
65 | +GEN_OPIVV_TRANS(vrgather_vv, vrgather_vv_check) | ||
66 | + | ||
67 | +static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a) | ||
68 | +{ | ||
69 | + return (vext_check_isa_ill(s) && | ||
70 | + vext_check_overlap_mask(s, a->rd, a->vm, true) && | ||
71 | + vext_check_reg(s, a->rd, false) && | ||
72 | + vext_check_reg(s, a->rs2, false) && | ||
73 | + (a->rd != a->rs2)); | ||
74 | +} | ||
75 | + | ||
76 | +/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ | ||
77 | +static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a) | ||
78 | +{ | ||
79 | + if (!vrgather_vx_check(s, a)) { | ||
80 | + return false; | ||
81 | + } | 42 | + } |
82 | + | 43 | + |
83 | + if (a->vm && s->vl_eq_vlmax) { | 44 | riscv_cpu_validate_misa_mxl(cpu, &local_err); |
84 | + int vlmax = s->vlen / s->mlen; | 45 | if (local_err != NULL) { |
85 | + TCGv_i64 dest = tcg_temp_new_i64(); | 46 | error_propagate(errp, local_err); |
86 | + | ||
87 | + if (a->rs1 == 0) { | ||
88 | + vec_element_loadi(s, dest, a->rs2, 0); | ||
89 | + } else { | ||
90 | + vec_element_loadx(s, dest, a->rs2, cpu_gpr[a->rs1], vlmax); | ||
91 | + } | ||
92 | + | ||
93 | + tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), | ||
94 | + MAXSZ(s), MAXSZ(s), dest); | ||
95 | + tcg_temp_free_i64(dest); | ||
96 | + } else { | ||
97 | + static gen_helper_opivx * const fns[4] = { | ||
98 | + gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h, | ||
99 | + gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d | ||
100 | + }; | ||
101 | + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); | ||
102 | + } | ||
103 | + return true; | ||
104 | +} | ||
105 | + | ||
106 | +/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] */ | ||
107 | +static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a) | ||
108 | +{ | ||
109 | + if (!vrgather_vx_check(s, a)) { | ||
110 | + return false; | ||
111 | + } | ||
112 | + | ||
113 | + if (a->vm && s->vl_eq_vlmax) { | ||
114 | + if (a->rs1 >= s->vlen / s->mlen) { | ||
115 | + tcg_gen_gvec_dup_imm(SEW64, vreg_ofs(s, a->rd), | ||
116 | + MAXSZ(s), MAXSZ(s), 0); | ||
117 | + } else { | ||
118 | + tcg_gen_gvec_dup_mem(s->sew, vreg_ofs(s, a->rd), | ||
119 | + endian_ofs(s, a->rs2, a->rs1), | ||
120 | + MAXSZ(s), MAXSZ(s)); | ||
121 | + } | ||
122 | + } else { | ||
123 | + static gen_helper_opivx * const fns[4] = { | ||
124 | + gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h, | ||
125 | + gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d | ||
126 | + }; | ||
127 | + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, 1); | ||
128 | + } | ||
129 | + return true; | ||
130 | +} | ||
131 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
132 | index XXXXXXX..XXXXXXX 100644 | ||
133 | --- a/target/riscv/vector_helper.c | ||
134 | +++ b/target/riscv/vector_helper.c | ||
135 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1, clearb) | ||
136 | GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2, clearh) | ||
137 | GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4, clearl) | ||
138 | GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq) | ||
139 | + | ||
140 | +/* Vector Register Gather Instruction */ | ||
141 | +#define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H, CLEAR_FN) \ | ||
142 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
143 | + CPURISCVState *env, uint32_t desc) \ | ||
144 | +{ \ | ||
145 | + uint32_t mlen = vext_mlen(desc); \ | ||
146 | + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ | ||
147 | + uint32_t vm = vext_vm(desc); \ | ||
148 | + uint32_t vl = env->vl; \ | ||
149 | + uint32_t index, i; \ | ||
150 | + \ | ||
151 | + for (i = 0; i < vl; i++) { \ | ||
152 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
153 | + continue; \ | ||
154 | + } \ | ||
155 | + index = *((ETYPE *)vs1 + H(i)); \ | ||
156 | + if (index >= vlmax) { \ | ||
157 | + *((ETYPE *)vd + H(i)) = 0; \ | ||
158 | + } else { \ | ||
159 | + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ | ||
160 | + } \ | ||
161 | + } \ | ||
162 | + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ | ||
163 | +} | ||
164 | + | ||
165 | +/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ | ||
166 | +GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1, clearb) | ||
167 | +GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2, clearh) | ||
168 | +GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4, clearl) | ||
169 | +GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8, clearq) | ||
170 | + | ||
171 | +#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H, CLEAR_FN) \ | ||
172 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
173 | + CPURISCVState *env, uint32_t desc) \ | ||
174 | +{ \ | ||
175 | + uint32_t mlen = vext_mlen(desc); \ | ||
176 | + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ | ||
177 | + uint32_t vm = vext_vm(desc); \ | ||
178 | + uint32_t vl = env->vl; \ | ||
179 | + uint32_t index = s1, i; \ | ||
180 | + \ | ||
181 | + for (i = 0; i < vl; i++) { \ | ||
182 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
183 | + continue; \ | ||
184 | + } \ | ||
185 | + if (index >= vlmax) { \ | ||
186 | + *((ETYPE *)vd + H(i)) = 0; \ | ||
187 | + } else { \ | ||
188 | + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ | ||
189 | + } \ | ||
190 | + } \ | ||
191 | + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ | ||
192 | +} | ||
193 | + | ||
194 | +/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ | ||
195 | +GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1, clearb) | ||
196 | +GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2, clearh) | ||
197 | +GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4, clearl) | ||
198 | +GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq) | ||
199 | -- | 47 | -- |
200 | 2.27.0 | 48 | 2.41.0 |
201 | 49 | ||
202 | 50 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Thomas Huth <thuth@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | The character that should be printed is stored in the 64 bit "payload" |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | variable. The code currently tries to print it by taking the address |
5 | Message-id: 20200623215920.2594-58-zhiwei_liu@c-sky.com | 5 | of the variable and passing this pointer to qemu_chr_fe_write(). However, |
6 | this only works on little endian hosts where the least significant bits | ||
7 | are stored on the lowest address. To do this in a portable way, we have | ||
8 | to store the value in an uint8_t variable instead. | ||
9 | |||
10 | Fixes: 5033606780 ("RISC-V HTIF Console") | ||
11 | Signed-off-by: Thomas Huth <thuth@redhat.com> | ||
12 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | Reviewed-by: Bin Meng <bmeng@tinylab.org> | ||
14 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
15 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
16 | Message-Id: <20230721094720.902454-2-thuth@redhat.com> | ||
6 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 17 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
7 | --- | 18 | --- |
8 | target/riscv/insn32.decode | 3 ++ | 19 | hw/char/riscv_htif.c | 3 ++- |
9 | target/riscv/insn_trans/trans_rvv.inc.c | 49 +++++++++++++++++++++++++ | 20 | 1 file changed, 2 insertions(+), 1 deletion(-) |
10 | 2 files changed, 52 insertions(+) | ||
11 | 21 | ||
12 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 22 | diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c |
13 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/target/riscv/insn32.decode | 24 | --- a/hw/char/riscv_htif.c |
15 | +++ b/target/riscv/insn32.decode | 25 | +++ b/hw/char/riscv_htif.c |
16 | @@ -XXX,XX +XXX,XX @@ | 26 | @@ -XXX,XX +XXX,XX @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written) |
17 | @r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd | 27 | s->tohost = 0; /* clear to indicate we read */ |
18 | @r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd | 28 | return; |
19 | @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd | 29 | } else if (cmd == HTIF_CONSOLE_CMD_PUTC) { |
20 | +@r2rd ....... ..... ..... ... ..... ....... %rs2 %rd | 30 | - qemu_chr_fe_write(&s->chr, (uint8_t *)&payload, 1); |
21 | @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd | 31 | + uint8_t ch = (uint8_t)payload; |
22 | @r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd | 32 | + qemu_chr_fe_write(&s->chr, &ch, 1); |
23 | @r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd | 33 | resp = 0x100 | (uint8_t)payload; |
24 | @@ -XXX,XX +XXX,XX @@ viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm | 34 | } else { |
25 | vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm | 35 | qemu_log("HTIF device %d: unknown command\n", device); |
26 | vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r | ||
27 | vmv_s_x 001101 1 00000 ..... 110 ..... 1010111 @r2 | ||
28 | +vfmv_f_s 001100 1 ..... 00000 001 ..... 1010111 @r2rd | ||
29 | +vfmv_s_f 001101 1 00000 ..... 101 ..... 1010111 @r2 | ||
30 | |||
31 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
32 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
33 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
36 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
37 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) | ||
38 | } | ||
39 | return false; | ||
40 | } | ||
41 | + | ||
42 | +/* Floating-Point Scalar Move Instructions */ | ||
43 | +static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a) | ||
44 | +{ | ||
45 | + if (!s->vill && has_ext(s, RVF) && | ||
46 | + (s->mstatus_fs != 0) && (s->sew != 0)) { | ||
47 | + unsigned int len = 8 << s->sew; | ||
48 | + | ||
49 | + vec_element_loadi(s, cpu_fpr[a->rd], a->rs2, 0); | ||
50 | + if (len < 64) { | ||
51 | + tcg_gen_ori_i64(cpu_fpr[a->rd], cpu_fpr[a->rd], | ||
52 | + MAKE_64BIT_MASK(len, 64 - len)); | ||
53 | + } | ||
54 | + | ||
55 | + mark_fs_dirty(s); | ||
56 | + return true; | ||
57 | + } | ||
58 | + return false; | ||
59 | +} | ||
60 | + | ||
61 | +/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2=0) */ | ||
62 | +static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) | ||
63 | +{ | ||
64 | + if (!s->vill && has_ext(s, RVF) && (s->sew != 0)) { | ||
65 | + TCGv_i64 t1; | ||
66 | + /* The instructions ignore LMUL and vector register group. */ | ||
67 | + uint32_t vlmax = s->vlen >> 3; | ||
68 | + | ||
69 | + /* if vl == 0, skip vector register write back */ | ||
70 | + TCGLabel *over = gen_new_label(); | ||
71 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
72 | + | ||
73 | + /* zeroed all elements */ | ||
74 | + tcg_gen_gvec_dup_imm(SEW64, vreg_ofs(s, a->rd), vlmax, vlmax, 0); | ||
75 | + | ||
76 | + /* NaN-box f[rs1] as necessary for SEW */ | ||
77 | + t1 = tcg_temp_new_i64(); | ||
78 | + if (s->sew == MO_64 && !has_ext(s, RVD)) { | ||
79 | + tcg_gen_ori_i64(t1, cpu_fpr[a->rs1], MAKE_64BIT_MASK(32, 32)); | ||
80 | + } else { | ||
81 | + tcg_gen_mov_i64(t1, cpu_fpr[a->rs1]); | ||
82 | + } | ||
83 | + vec_element_storei(s, a->rd, 0, t1); | ||
84 | + tcg_temp_free_i64(t1); | ||
85 | + gen_set_label(over); | ||
86 | + return true; | ||
87 | + } | ||
88 | + return false; | ||
89 | +} | ||
90 | -- | 36 | -- |
91 | 2.27.0 | 37 | 2.41.0 |
92 | 38 | ||
93 | 39 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Thomas Huth <thuth@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Values that have been read via cpu_physical_memory_read() from the |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | guest's memory have to be swapped in case the host endianess differs |
5 | Message-id: 20200623215920.2594-56-zhiwei_liu@c-sky.com | 5 | from the guest. |
6 | |||
7 | Fixes: a6e13e31d5 ("riscv_htif: Support console output via proxy syscall") | ||
8 | Signed-off-by: Thomas Huth <thuth@redhat.com> | ||
9 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
10 | Reviewed-by: Bin Meng <bmeng@tinylab.org> | ||
11 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
12 | Message-Id: <20230721094720.902454-3-thuth@redhat.com> | ||
6 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 13 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
7 | --- | 14 | --- |
8 | target/riscv/insn32.decode | 1 + | 15 | hw/char/riscv_htif.c | 9 +++++---- |
9 | target/riscv/insn_trans/trans_rvv.inc.c | 116 ++++++++++++++++++++++++ | 16 | 1 file changed, 5 insertions(+), 4 deletions(-) |
10 | 2 files changed, 117 insertions(+) | ||
11 | 17 | ||
12 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 18 | diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c |
13 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/target/riscv/insn32.decode | 20 | --- a/hw/char/riscv_htif.c |
15 | +++ b/target/riscv/insn32.decode | 21 | +++ b/hw/char/riscv_htif.c |
16 | @@ -XXX,XX +XXX,XX @@ vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm | 22 | @@ -XXX,XX +XXX,XX @@ |
17 | vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm | 23 | #include "qemu/timer.h" |
18 | viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm | 24 | #include "qemu/error-report.h" |
19 | vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm | 25 | #include "exec/address-spaces.h" |
20 | +vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r | 26 | +#include "exec/tswap.h" |
21 | 27 | #include "sysemu/dma.h" | |
22 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | 28 | |
23 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | 29 | #define RISCV_DEBUG_HTIF 0 |
24 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | 30 | @@ -XXX,XX +XXX,XX @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written) |
25 | index XXXXXXX..XXXXXXX 100644 | 31 | } else { |
26 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 32 | uint64_t syscall[8]; |
27 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 33 | cpu_physical_memory_read(payload, syscall, sizeof(syscall)); |
28 | @@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) | 34 | - if (syscall[0] == PK_SYS_WRITE && |
29 | } | 35 | - syscall[1] == HTIF_DEV_CONSOLE && |
30 | return false; | 36 | - syscall[3] == HTIF_CONSOLE_CMD_PUTC) { |
31 | } | 37 | + if (tswap64(syscall[0]) == PK_SYS_WRITE && |
32 | + | 38 | + tswap64(syscall[1]) == HTIF_DEV_CONSOLE && |
33 | +/* | 39 | + tswap64(syscall[3]) == HTIF_CONSOLE_CMD_PUTC) { |
34 | + *** Vector Permutation Instructions | 40 | uint8_t ch; |
35 | + */ | 41 | - cpu_physical_memory_read(syscall[2], &ch, 1); |
36 | + | 42 | + cpu_physical_memory_read(tswap64(syscall[2]), &ch, 1); |
37 | +/* Integer Extract Instruction */ | 43 | qemu_chr_fe_write(&s->chr, &ch, 1); |
38 | + | 44 | resp = 0x100 | (uint8_t)payload; |
39 | +static void load_element(TCGv_i64 dest, TCGv_ptr base, | 45 | } else { |
40 | + int ofs, int sew) | ||
41 | +{ | ||
42 | + switch (sew) { | ||
43 | + case MO_8: | ||
44 | + tcg_gen_ld8u_i64(dest, base, ofs); | ||
45 | + break; | ||
46 | + case MO_16: | ||
47 | + tcg_gen_ld16u_i64(dest, base, ofs); | ||
48 | + break; | ||
49 | + case MO_32: | ||
50 | + tcg_gen_ld32u_i64(dest, base, ofs); | ||
51 | + break; | ||
52 | + case MO_64: | ||
53 | + tcg_gen_ld_i64(dest, base, ofs); | ||
54 | + break; | ||
55 | + default: | ||
56 | + g_assert_not_reached(); | ||
57 | + break; | ||
58 | + } | ||
59 | +} | ||
60 | + | ||
61 | +/* offset of the idx element with base regsiter r */ | ||
62 | +static uint32_t endian_ofs(DisasContext *s, int r, int idx) | ||
63 | +{ | ||
64 | +#ifdef HOST_WORDS_BIGENDIAN | ||
65 | + return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew); | ||
66 | +#else | ||
67 | + return vreg_ofs(s, r) + (idx << s->sew); | ||
68 | +#endif | ||
69 | +} | ||
70 | + | ||
71 | +/* adjust the index according to the endian */ | ||
72 | +static void endian_adjust(TCGv_i32 ofs, int sew) | ||
73 | +{ | ||
74 | +#ifdef HOST_WORDS_BIGENDIAN | ||
75 | + tcg_gen_xori_i32(ofs, ofs, 7 >> sew); | ||
76 | +#endif | ||
77 | +} | ||
78 | + | ||
79 | +/* Load idx >= VLMAX ? 0 : vreg[idx] */ | ||
80 | +static void vec_element_loadx(DisasContext *s, TCGv_i64 dest, | ||
81 | + int vreg, TCGv idx, int vlmax) | ||
82 | +{ | ||
83 | + TCGv_i32 ofs = tcg_temp_new_i32(); | ||
84 | + TCGv_ptr base = tcg_temp_new_ptr(); | ||
85 | + TCGv_i64 t_idx = tcg_temp_new_i64(); | ||
86 | + TCGv_i64 t_vlmax, t_zero; | ||
87 | + | ||
88 | + /* | ||
89 | + * Mask the index to the length so that we do | ||
90 | + * not produce an out-of-range load. | ||
91 | + */ | ||
92 | + tcg_gen_trunc_tl_i32(ofs, idx); | ||
93 | + tcg_gen_andi_i32(ofs, ofs, vlmax - 1); | ||
94 | + | ||
95 | + /* Convert the index to an offset. */ | ||
96 | + endian_adjust(ofs, s->sew); | ||
97 | + tcg_gen_shli_i32(ofs, ofs, s->sew); | ||
98 | + | ||
99 | + /* Convert the index to a pointer. */ | ||
100 | + tcg_gen_ext_i32_ptr(base, ofs); | ||
101 | + tcg_gen_add_ptr(base, base, cpu_env); | ||
102 | + | ||
103 | + /* Perform the load. */ | ||
104 | + load_element(dest, base, | ||
105 | + vreg_ofs(s, vreg), s->sew); | ||
106 | + tcg_temp_free_ptr(base); | ||
107 | + tcg_temp_free_i32(ofs); | ||
108 | + | ||
109 | + /* Flush out-of-range indexing to zero. */ | ||
110 | + t_vlmax = tcg_const_i64(vlmax); | ||
111 | + t_zero = tcg_const_i64(0); | ||
112 | + tcg_gen_extu_tl_i64(t_idx, idx); | ||
113 | + | ||
114 | + tcg_gen_movcond_i64(TCG_COND_LTU, dest, t_idx, | ||
115 | + t_vlmax, dest, t_zero); | ||
116 | + | ||
117 | + tcg_temp_free_i64(t_vlmax); | ||
118 | + tcg_temp_free_i64(t_zero); | ||
119 | + tcg_temp_free_i64(t_idx); | ||
120 | +} | ||
121 | + | ||
122 | +static void vec_element_loadi(DisasContext *s, TCGv_i64 dest, | ||
123 | + int vreg, int idx) | ||
124 | +{ | ||
125 | + load_element(dest, cpu_env, endian_ofs(s, vreg, idx), s->sew); | ||
126 | +} | ||
127 | + | ||
128 | +static bool trans_vext_x_v(DisasContext *s, arg_r *a) | ||
129 | +{ | ||
130 | + TCGv_i64 tmp = tcg_temp_new_i64(); | ||
131 | + TCGv dest = tcg_temp_new(); | ||
132 | + | ||
133 | + if (a->rs1 == 0) { | ||
134 | + /* Special case vmv.x.s rd, vs2. */ | ||
135 | + vec_element_loadi(s, tmp, a->rs2, 0); | ||
136 | + } else { | ||
137 | + /* This instruction ignores LMUL and vector register groups */ | ||
138 | + int vlmax = s->vlen >> (3 + s->sew); | ||
139 | + vec_element_loadx(s, tmp, a->rs2, cpu_gpr[a->rs1], vlmax); | ||
140 | + } | ||
141 | + tcg_gen_trunc_i64_tl(dest, tmp); | ||
142 | + gen_set_gpr(a->rd, dest); | ||
143 | + | ||
144 | + tcg_temp_free(dest); | ||
145 | + tcg_temp_free_i64(tmp); | ||
146 | + return true; | ||
147 | +} | ||
148 | -- | 46 | -- |
149 | 2.27.0 | 47 | 2.41.0 |
150 | |||
151 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | zmmul was promoted from experimental to ratified in commit 6d00ffad4e95. |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Add a riscv,isa string for it. |
5 | Message-id: 20200623215920.2594-55-zhiwei_liu@c-sky.com | 5 | |
6 | Fixes: 6d00ffad4e95 ("target/riscv: move zmmul out of the experimental properties") | ||
7 | Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
8 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
9 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
10 | Message-Id: <20230720132424.371132-2-dbarboza@ventanamicro.com> | ||
6 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
7 | --- | 12 | --- |
8 | target/riscv/helper.h | 5 +++++ | 13 | target/riscv/cpu.c | 1 + |
9 | target/riscv/insn32.decode | 2 ++ | 14 | 1 file changed, 1 insertion(+) |
10 | target/riscv/insn_trans/trans_rvv.inc.c | 25 +++++++++++++++++++++++++ | ||
11 | target/riscv/vector_helper.c | 24 ++++++++++++++++++++++++ | ||
12 | 4 files changed, 56 insertions(+) | ||
13 | 15 | ||
14 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 16 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
15 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/riscv/helper.h | 18 | --- a/target/riscv/cpu.c |
17 | +++ b/target/riscv/helper.h | 19 | +++ b/target/riscv/cpu.c |
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(viota_m_b, void, ptr, ptr, ptr, env, i32) | 20 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { |
19 | DEF_HELPER_5(viota_m_h, void, ptr, ptr, ptr, env, i32) | 21 | ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr), |
20 | DEF_HELPER_5(viota_m_w, void, ptr, ptr, ptr, env, i32) | 22 | ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei), |
21 | DEF_HELPER_5(viota_m_d, void, ptr, ptr, ptr, env, i32) | 23 | ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause), |
22 | + | 24 | + ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul), |
23 | +DEF_HELPER_4(vid_v_b, void, ptr, ptr, env, i32) | 25 | ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs), |
24 | +DEF_HELPER_4(vid_v_h, void, ptr, ptr, env, i32) | 26 | ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa), |
25 | +DEF_HELPER_4(vid_v_w, void, ptr, ptr, env, i32) | 27 | ISA_EXT_DATA_ENTRY(zfbfmin, PRIV_VERSION_1_12_0, ext_zfbfmin), |
26 | +DEF_HELPER_4(vid_v_d, void, ptr, ptr, env, i32) | ||
27 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/riscv/insn32.decode | ||
30 | +++ b/target/riscv/insn32.decode | ||
31 | @@ -XXX,XX +XXX,XX @@ | ||
32 | @r2 ....... ..... ..... ... ..... ....... %rs1 %rd | ||
33 | @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd | ||
34 | @r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd | ||
35 | +@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd | ||
36 | @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd | ||
37 | @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd | ||
38 | @r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd | ||
39 | @@ -XXX,XX +XXX,XX @@ vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm | ||
40 | vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm | ||
41 | vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm | ||
42 | viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm | ||
43 | +vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm | ||
44 | |||
45 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
46 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
47 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
50 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
51 | @@ -XXX,XX +XXX,XX @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a) | ||
52 | } | ||
53 | return false; | ||
54 | } | ||
55 | + | ||
56 | +/* Vector Element Index Instruction */ | ||
57 | +static bool trans_vid_v(DisasContext *s, arg_vid_v *a) | ||
58 | +{ | ||
59 | + if (vext_check_isa_ill(s) && | ||
60 | + vext_check_reg(s, a->rd, false) && | ||
61 | + vext_check_overlap_mask(s, a->rd, a->vm, false)) { | ||
62 | + uint32_t data = 0; | ||
63 | + TCGLabel *over = gen_new_label(); | ||
64 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
65 | + | ||
66 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | ||
67 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
68 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
69 | + static gen_helper_gvec_2_ptr * const fns[4] = { | ||
70 | + gen_helper_vid_v_b, gen_helper_vid_v_h, | ||
71 | + gen_helper_vid_v_w, gen_helper_vid_v_d, | ||
72 | + }; | ||
73 | + tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
74 | + cpu_env, 0, s->vlen / 8, data, fns[s->sew]); | ||
75 | + gen_set_label(over); | ||
76 | + return true; | ||
77 | + } | ||
78 | + return false; | ||
79 | +} | ||
80 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
81 | index XXXXXXX..XXXXXXX 100644 | ||
82 | --- a/target/riscv/vector_helper.c | ||
83 | +++ b/target/riscv/vector_helper.c | ||
84 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1, clearb) | ||
85 | GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2, clearh) | ||
86 | GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4, clearl) | ||
87 | GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8, clearq) | ||
88 | + | ||
89 | +/* Vector Element Index Instruction */ | ||
90 | +#define GEN_VEXT_VID_V(NAME, ETYPE, H, CLEAR_FN) \ | ||
91 | +void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ | ||
92 | +{ \ | ||
93 | + uint32_t mlen = vext_mlen(desc); \ | ||
94 | + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ | ||
95 | + uint32_t vm = vext_vm(desc); \ | ||
96 | + uint32_t vl = env->vl; \ | ||
97 | + int i; \ | ||
98 | + \ | ||
99 | + for (i = 0; i < vl; i++) { \ | ||
100 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
101 | + continue; \ | ||
102 | + } \ | ||
103 | + *((ETYPE *)vd + H(i)) = i; \ | ||
104 | + } \ | ||
105 | + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ | ||
106 | +} | ||
107 | + | ||
108 | +GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb) | ||
109 | +GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh) | ||
110 | +GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl) | ||
111 | +GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq) | ||
112 | -- | 28 | -- |
113 | 2.27.0 | 29 | 2.41.0 |
114 | |||
115 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | The cpu->cfg.epmp extension is still experimental, but it already has a |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | 'smepmp' riscv,isa string. Add it. |
5 | Message-id: 20200623215920.2594-54-zhiwei_liu@c-sky.com | 5 | |
6 | Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
7 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
8 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
9 | Message-Id: <20230720132424.371132-3-dbarboza@ventanamicro.com> | ||
6 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
7 | --- | 11 | --- |
8 | target/riscv/helper.h | 5 +++++ | 12 | target/riscv/cpu.c | 1 + |
9 | target/riscv/insn32.decode | 1 + | 13 | 1 file changed, 1 insertion(+) |
10 | target/riscv/insn_trans/trans_rvv.inc.c | 27 +++++++++++++++++++++++ | ||
11 | target/riscv/vector_helper.c | 29 +++++++++++++++++++++++++ | ||
12 | 4 files changed, 62 insertions(+) | ||
13 | 14 | ||
14 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 15 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
15 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/riscv/helper.h | 17 | --- a/target/riscv/cpu.c |
17 | +++ b/target/riscv/helper.h | 18 | +++ b/target/riscv/cpu.c |
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vmfirst_m, tl, ptr, ptr, env, i32) | 19 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { |
19 | DEF_HELPER_5(vmsbf_m, void, ptr, ptr, ptr, env, i32) | 20 | ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), |
20 | DEF_HELPER_5(vmsif_m, void, ptr, ptr, ptr, env, i32) | 21 | ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), |
21 | DEF_HELPER_5(vmsof_m, void, ptr, ptr, ptr, env, i32) | 22 | ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia), |
22 | + | 23 | + ISA_EXT_DATA_ENTRY(smepmp, PRIV_VERSION_1_12_0, epmp), |
23 | +DEF_HELPER_5(viota_m_b, void, ptr, ptr, ptr, env, i32) | 24 | ISA_EXT_DATA_ENTRY(smstateen, PRIV_VERSION_1_12_0, ext_smstateen), |
24 | +DEF_HELPER_5(viota_m_h, void, ptr, ptr, ptr, env, i32) | 25 | ISA_EXT_DATA_ENTRY(ssaia, PRIV_VERSION_1_12_0, ext_ssaia), |
25 | +DEF_HELPER_5(viota_m_w, void, ptr, ptr, ptr, env, i32) | 26 | ISA_EXT_DATA_ENTRY(sscofpmf, PRIV_VERSION_1_12_0, ext_sscofpmf), |
26 | +DEF_HELPER_5(viota_m_d, void, ptr, ptr, ptr, env, i32) | ||
27 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/riscv/insn32.decode | ||
30 | +++ b/target/riscv/insn32.decode | ||
31 | @@ -XXX,XX +XXX,XX @@ vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm | ||
32 | vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm | ||
33 | vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm | ||
34 | vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm | ||
35 | +viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm | ||
36 | |||
37 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
38 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
39 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
42 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
44 | GEN_M_TRANS(vmsbf_m) | ||
45 | GEN_M_TRANS(vmsif_m) | ||
46 | GEN_M_TRANS(vmsof_m) | ||
47 | + | ||
48 | +/* Vector Iota Instruction */ | ||
49 | +static bool trans_viota_m(DisasContext *s, arg_viota_m *a) | ||
50 | +{ | ||
51 | + if (vext_check_isa_ill(s) && | ||
52 | + vext_check_reg(s, a->rd, false) && | ||
53 | + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, 1) && | ||
54 | + (a->vm != 0 || a->rd != 0)) { | ||
55 | + uint32_t data = 0; | ||
56 | + TCGLabel *over = gen_new_label(); | ||
57 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
58 | + | ||
59 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | ||
60 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
61 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
62 | + static gen_helper_gvec_3_ptr * const fns[4] = { | ||
63 | + gen_helper_viota_m_b, gen_helper_viota_m_h, | ||
64 | + gen_helper_viota_m_w, gen_helper_viota_m_d, | ||
65 | + }; | ||
66 | + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
67 | + vreg_ofs(s, a->rs2), cpu_env, 0, | ||
68 | + s->vlen / 8, data, fns[s->sew]); | ||
69 | + gen_set_label(over); | ||
70 | + return true; | ||
71 | + } | ||
72 | + return false; | ||
73 | +} | ||
74 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/target/riscv/vector_helper.c | ||
77 | +++ b/target/riscv/vector_helper.c | ||
78 | @@ -XXX,XX +XXX,XX @@ void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, | ||
79 | { | ||
80 | vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); | ||
81 | } | ||
82 | + | ||
83 | +/* Vector Iota Instruction */ | ||
84 | +#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H, CLEAR_FN) \ | ||
85 | +void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ | ||
86 | + uint32_t desc) \ | ||
87 | +{ \ | ||
88 | + uint32_t mlen = vext_mlen(desc); \ | ||
89 | + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ | ||
90 | + uint32_t vm = vext_vm(desc); \ | ||
91 | + uint32_t vl = env->vl; \ | ||
92 | + uint32_t sum = 0; \ | ||
93 | + int i; \ | ||
94 | + \ | ||
95 | + for (i = 0; i < vl; i++) { \ | ||
96 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
97 | + continue; \ | ||
98 | + } \ | ||
99 | + *((ETYPE *)vd + H(i)) = sum; \ | ||
100 | + if (vext_elem_mask(vs2, mlen, i)) { \ | ||
101 | + sum++; \ | ||
102 | + } \ | ||
103 | + } \ | ||
104 | + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ | ||
105 | +} | ||
106 | + | ||
107 | +GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1, clearb) | ||
108 | +GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2, clearh) | ||
109 | +GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4, clearl) | ||
110 | +GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8, clearq) | ||
111 | -- | 27 | -- |
112 | 2.27.0 | 28 | 2.41.0 |
113 | |||
114 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Commit bef6f008b98(accel/tcg: Return bool from page_check_range) converts |
4 | integer return value to bool type. However, it wrongly converted the use | ||
5 | of the API in riscv fault-only-first, where page_check_range < = 0, should | ||
6 | be converted to !page_check_range. | ||
7 | |||
8 | Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Message-id: 20200623215920.2594-52-zhiwei_liu@c-sky.com | 10 | Message-ID: <20230729031618.821-1-zhiwei_liu@linux.alibaba.com> |
6 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
7 | --- | 12 | --- |
8 | target/riscv/helper.h | 2 ++ | 13 | target/riscv/vector_helper.c | 2 +- |
9 | target/riscv/insn32.decode | 1 + | 14 | 1 file changed, 1 insertion(+), 1 deletion(-) |
10 | target/riscv/insn_trans/trans_rvv.inc.c | 32 +++++++++++++++++++++++++ | ||
11 | target/riscv/vector_helper.c | 19 +++++++++++++++ | ||
12 | 4 files changed, 54 insertions(+) | ||
13 | 15 | ||
14 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/riscv/helper.h | ||
17 | +++ b/target/riscv/helper.h | ||
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmornot_mm, void, ptr, ptr, ptr, ptr, env, i32) | ||
19 | DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32) | ||
20 | |||
21 | DEF_HELPER_4(vmpopc_m, tl, ptr, ptr, env, i32) | ||
22 | + | ||
23 | +DEF_HELPER_4(vmfirst_m, tl, ptr, ptr, env, i32) | ||
24 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/target/riscv/insn32.decode | ||
27 | +++ b/target/riscv/insn32.decode | ||
28 | @@ -XXX,XX +XXX,XX @@ vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r | ||
29 | vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r | ||
30 | vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r | ||
31 | vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm | ||
32 | +vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm | ||
33 | |||
34 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
35 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
36 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
39 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmpopc_m(DisasContext *s, arg_rmr *a) | ||
41 | } | ||
42 | return false; | ||
43 | } | ||
44 | + | ||
45 | +/* vmfirst find-first-set mask bit */ | ||
46 | +static bool trans_vmfirst_m(DisasContext *s, arg_rmr *a) | ||
47 | +{ | ||
48 | + if (vext_check_isa_ill(s)) { | ||
49 | + TCGv_ptr src2, mask; | ||
50 | + TCGv dst; | ||
51 | + TCGv_i32 desc; | ||
52 | + uint32_t data = 0; | ||
53 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | ||
54 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
55 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
56 | + | ||
57 | + mask = tcg_temp_new_ptr(); | ||
58 | + src2 = tcg_temp_new_ptr(); | ||
59 | + dst = tcg_temp_new(); | ||
60 | + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); | ||
61 | + | ||
62 | + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2)); | ||
63 | + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); | ||
64 | + | ||
65 | + gen_helper_vmfirst_m(dst, mask, src2, cpu_env, desc); | ||
66 | + gen_set_gpr(a->rd, dst); | ||
67 | + | ||
68 | + tcg_temp_free_ptr(mask); | ||
69 | + tcg_temp_free_ptr(src2); | ||
70 | + tcg_temp_free(dst); | ||
71 | + tcg_temp_free_i32(desc); | ||
72 | + return true; | ||
73 | + } | ||
74 | + return false; | ||
75 | +} | ||
76 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 16 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c |
77 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
78 | --- a/target/riscv/vector_helper.c | 18 | --- a/target/riscv/vector_helper.c |
79 | +++ b/target/riscv/vector_helper.c | 19 | +++ b/target/riscv/vector_helper.c |
80 | @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env, | 20 | @@ -XXX,XX +XXX,XX @@ vext_ldff(void *vd, void *v0, target_ulong base, |
81 | } | 21 | cpu_mmu_index(env, false)); |
82 | return cnt; | 22 | if (host) { |
83 | } | 23 | #ifdef CONFIG_USER_ONLY |
84 | + | 24 | - if (page_check_range(addr, offset, PAGE_READ)) { |
85 | +/* vmfirst find-first-set mask bit*/ | 25 | + if (!page_check_range(addr, offset, PAGE_READ)) { |
86 | +target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env, | 26 | vl = i; |
87 | + uint32_t desc) | 27 | goto ProbeSuccess; |
88 | +{ | 28 | } |
89 | + uint32_t mlen = vext_mlen(desc); | ||
90 | + uint32_t vm = vext_vm(desc); | ||
91 | + uint32_t vl = env->vl; | ||
92 | + int i; | ||
93 | + | ||
94 | + for (i = 0; i < vl; i++) { | ||
95 | + if (vm || vext_elem_mask(v0, mlen, i)) { | ||
96 | + if (vext_elem_mask(vs2, mlen, i)) { | ||
97 | + return i; | ||
98 | + } | ||
99 | + } | ||
100 | + } | ||
101 | + return -1LL; | ||
102 | +} | ||
103 | -- | 29 | -- |
104 | 2.27.0 | 30 | 2.41.0 |
105 | |||
106 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Ard Biesheuvel <ardb@kernel.org> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | The AES MixColumns and InvMixColumns operations are relatively |
4 | expensive 4x4 matrix multiplications in GF(2^8), which is why C | ||
5 | implementations usually rely on precomputed lookup tables rather than | ||
6 | performing the calculations on demand. | ||
7 | |||
8 | Given that we already carry those tables in QEMU, we can just grab the | ||
9 | right value in the implementation of the RISC-V AES32 instructions. Note | ||
10 | that the tables in question are permuted according to the respective | ||
11 | Sbox, so we can omit the Sbox lookup as well in this case. | ||
12 | |||
13 | Cc: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Cc: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
15 | Cc: Zewen Ye <lustrew@foxmail.com> | ||
16 | Cc: Weiwei Li <liweiwei@iscas.ac.cn> | ||
17 | Cc: Junqiang Wang <wangjunqiang@iscas.ac.cn> | ||
18 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Message-id: 20200623215920.2594-57-zhiwei_liu@c-sky.com | 20 | Message-ID: <20230731084043.1791984-1-ardb@kernel.org> |
6 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 21 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
7 | --- | 22 | --- |
8 | target/riscv/internals.h | 6 +++ | 23 | include/crypto/aes.h | 7 +++++++ |
9 | target/riscv/insn32.decode | 1 + | 24 | crypto/aes.c | 4 ++-- |
10 | target/riscv/insn_trans/trans_rvv.inc.c | 60 +++++++++++++++++++++++++ | 25 | target/riscv/crypto_helper.c | 34 ++++------------------------------ |
11 | 3 files changed, 67 insertions(+) | 26 | 3 files changed, 13 insertions(+), 32 deletions(-) |
12 | 27 | ||
13 | diff --git a/target/riscv/internals.h b/target/riscv/internals.h | 28 | diff --git a/include/crypto/aes.h b/include/crypto/aes.h |
14 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/riscv/internals.h | 30 | --- a/include/crypto/aes.h |
16 | +++ b/target/riscv/internals.h | 31 | +++ b/include/crypto/aes.h |
17 | @@ -XXX,XX +XXX,XX @@ FIELD(VDATA, WD, 11, 1) | 32 | @@ -XXX,XX +XXX,XX @@ void AES_decrypt(const unsigned char *in, unsigned char *out, |
18 | target_ulong fclass_h(uint64_t frs1); | 33 | extern const uint8_t AES_sbox[256]; |
19 | target_ulong fclass_s(uint64_t frs1); | 34 | extern const uint8_t AES_isbox[256]; |
20 | target_ulong fclass_d(uint64_t frs1); | 35 | |
36 | +/* | ||
37 | +AES_Te0[x] = S [x].[02, 01, 01, 03]; | ||
38 | +AES_Td0[x] = Si[x].[0e, 09, 0d, 0b]; | ||
39 | +*/ | ||
21 | + | 40 | + |
22 | +#define SEW8 0 | 41 | +extern const uint32_t AES_Te0[256], AES_Td0[256]; |
23 | +#define SEW16 1 | ||
24 | +#define SEW32 2 | ||
25 | +#define SEW64 3 | ||
26 | + | 42 | + |
27 | #endif | 43 | #endif |
28 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 44 | diff --git a/crypto/aes.c b/crypto/aes.c |
29 | index XXXXXXX..XXXXXXX 100644 | 45 | index XXXXXXX..XXXXXXX 100644 |
30 | --- a/target/riscv/insn32.decode | 46 | --- a/crypto/aes.c |
31 | +++ b/target/riscv/insn32.decode | 47 | +++ b/crypto/aes.c |
32 | @@ -XXX,XX +XXX,XX @@ vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm | 48 | @@ -XXX,XX +XXX,XX @@ AES_Td3[x] = Si[x].[09, 0d, 0b, 0e]; |
33 | viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm | 49 | AES_Td4[x] = Si[x].[01, 01, 01, 01]; |
34 | vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm | 50 | */ |
35 | vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r | 51 | |
36 | +vmv_s_x 001101 1 00000 ..... 110 ..... 1010111 @r2 | 52 | -static const uint32_t AES_Te0[256] = { |
37 | 53 | +const uint32_t AES_Te0[256] = { | |
38 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | 54 | 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, |
39 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | 55 | 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, |
40 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | 56 | 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, |
57 | @@ -XXX,XX +XXX,XX @@ static const uint32_t AES_Te4[256] = { | ||
58 | 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U, | ||
59 | }; | ||
60 | |||
61 | -static const uint32_t AES_Td0[256] = { | ||
62 | +const uint32_t AES_Td0[256] = { | ||
63 | 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, | ||
64 | 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, | ||
65 | 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, | ||
66 | diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | 67 | index XXXXXXX..XXXXXXX 100644 |
42 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 68 | --- a/target/riscv/crypto_helper.c |
43 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 69 | +++ b/target/riscv/crypto_helper.c |
44 | @@ -XXX,XX +XXX,XX @@ static bool trans_vext_x_v(DisasContext *s, arg_r *a) | 70 | @@ -XXX,XX +XXX,XX @@ |
45 | tcg_temp_free_i64(tmp); | 71 | #include "crypto/aes-round.h" |
46 | return true; | 72 | #include "crypto/sm4.h" |
47 | } | 73 | |
48 | + | 74 | -#define AES_XTIME(a) \ |
49 | +/* Integer Scalar Move Instruction */ | 75 | - ((a << 1) ^ ((a & 0x80) ? 0x1b : 0)) |
50 | + | 76 | - |
51 | +static void store_element(TCGv_i64 val, TCGv_ptr base, | 77 | -#define AES_GFMUL(a, b) (( \ |
52 | + int ofs, int sew) | 78 | - (((b) & 0x1) ? (a) : 0) ^ \ |
53 | +{ | 79 | - (((b) & 0x2) ? AES_XTIME(a) : 0) ^ \ |
54 | + switch (sew) { | 80 | - (((b) & 0x4) ? AES_XTIME(AES_XTIME(a)) : 0) ^ \ |
55 | + case MO_8: | 81 | - (((b) & 0x8) ? AES_XTIME(AES_XTIME(AES_XTIME(a))) : 0)) & 0xFF) |
56 | + tcg_gen_st8_i64(val, base, ofs); | 82 | - |
57 | + break; | 83 | -static inline uint32_t aes_mixcolumn_byte(uint8_t x, bool fwd) |
58 | + case MO_16: | 84 | -{ |
59 | + tcg_gen_st16_i64(val, base, ofs); | 85 | - uint32_t u; |
60 | + break; | 86 | - |
61 | + case MO_32: | 87 | - if (fwd) { |
62 | + tcg_gen_st32_i64(val, base, ofs); | 88 | - u = (AES_GFMUL(x, 3) << 24) | (x << 16) | (x << 8) | |
63 | + break; | 89 | - (AES_GFMUL(x, 2) << 0); |
64 | + case MO_64: | 90 | - } else { |
65 | + tcg_gen_st_i64(val, base, ofs); | 91 | - u = (AES_GFMUL(x, 0xb) << 24) | (AES_GFMUL(x, 0xd) << 16) | |
66 | + break; | 92 | - (AES_GFMUL(x, 0x9) << 8) | (AES_GFMUL(x, 0xe) << 0); |
67 | + default: | 93 | - } |
68 | + g_assert_not_reached(); | 94 | - return u; |
69 | + break; | 95 | -} |
70 | + } | 96 | - |
71 | +} | 97 | #define sext32_xlen(x) (target_ulong)(int32_t)(x) |
72 | + | 98 | |
73 | +/* | 99 | static inline target_ulong aes32_operation(target_ulong shamt, |
74 | + * Store vreg[idx] = val. | 100 | @@ -XXX,XX +XXX,XX @@ static inline target_ulong aes32_operation(target_ulong shamt, |
75 | + * The index must be in range of VLMAX. | 101 | bool enc, bool mix) |
76 | + */ | 102 | { |
77 | +static void vec_element_storei(DisasContext *s, int vreg, | 103 | uint8_t si = rs2 >> shamt; |
78 | + int idx, TCGv_i64 val) | 104 | - uint8_t so; |
79 | +{ | 105 | uint32_t mixed; |
80 | + store_element(val, cpu_env, endian_ofs(s, vreg, idx), s->sew); | 106 | target_ulong res; |
81 | +} | 107 | |
82 | + | 108 | if (enc) { |
83 | +/* vmv.s.x vd, rs1 # vd[0] = rs1 */ | 109 | - so = AES_sbox[si]; |
84 | +static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) | 110 | if (mix) { |
85 | +{ | 111 | - mixed = aes_mixcolumn_byte(so, true); |
86 | + if (vext_check_isa_ill(s)) { | 112 | + mixed = be32_to_cpu(AES_Te0[si]); |
87 | + /* This instruction ignores LMUL and vector register groups */ | 113 | } else { |
88 | + int maxsz = s->vlen >> 3; | 114 | - mixed = so; |
89 | + TCGv_i64 t1; | 115 | + mixed = AES_sbox[si]; |
90 | + TCGLabel *over = gen_new_label(); | 116 | } |
91 | + | 117 | } else { |
92 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 118 | - so = AES_isbox[si]; |
93 | + tcg_gen_gvec_dup_imm(SEW64, vreg_ofs(s, a->rd), maxsz, maxsz, 0); | 119 | if (mix) { |
94 | + if (a->rs1 == 0) { | 120 | - mixed = aes_mixcolumn_byte(so, false); |
95 | + goto done; | 121 | + mixed = be32_to_cpu(AES_Td0[si]); |
96 | + } | 122 | } else { |
97 | + | 123 | - mixed = so; |
98 | + t1 = tcg_temp_new_i64(); | 124 | + mixed = AES_isbox[si]; |
99 | + tcg_gen_extu_tl_i64(t1, cpu_gpr[a->rs1]); | 125 | } |
100 | + vec_element_storei(s, a->rd, 0, t1); | 126 | } |
101 | + tcg_temp_free_i64(t1); | 127 | mixed = rol32(mixed, shamt); |
102 | + done: | ||
103 | + gen_set_label(over); | ||
104 | + return true; | ||
105 | + } | ||
106 | + return false; | ||
107 | +} | ||
108 | -- | 128 | -- |
109 | 2.27.0 | 129 | 2.41.0 |
110 | 130 | ||
111 | 131 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Take some functions/macros out of `vector_helper` and put them in a new |
4 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 4 | module called `vector_internals`. This ensures they can be used by both |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 5 | vector and vector-crypto helpers (latter implemented in proceeding |
6 | Message-id: 20200623215920.2594-11-zhiwei_liu@c-sky.com | 6 | commits). |
7 | |||
8 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
9 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
10 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
11 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
12 | Message-ID: <20230711165917.2629866-2-max.chou@sifive.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 13 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 14 | --- |
9 | target/riscv/helper.h | 25 ++ | 15 | target/riscv/vector_internals.h | 182 +++++++++++++++++++++++++++++ |
10 | target/riscv/insn32.decode | 10 + | 16 | target/riscv/vector_helper.c | 201 +------------------------------- |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 291 ++++++++++++++++++++++++ | 17 | target/riscv/vector_internals.c | 81 +++++++++++++ |
12 | target/riscv/vector_helper.c | 183 +++++++++++++++ | 18 | target/riscv/meson.build | 1 + |
13 | 4 files changed, 509 insertions(+) | 19 | 4 files changed, 265 insertions(+), 200 deletions(-) |
20 | create mode 100644 target/riscv/vector_internals.h | ||
21 | create mode 100644 target/riscv/vector_internals.c | ||
14 | 22 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 23 | diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h |
16 | index XXXXXXX..XXXXXXX 100644 | 24 | new file mode 100644 |
17 | --- a/target/riscv/helper.h | 25 | index XXXXXXX..XXXXXXX |
18 | +++ b/target/riscv/helper.h | 26 | --- /dev/null |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr, env, i32) | 27 | +++ b/target/riscv/vector_internals.h |
20 | DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vrsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vrsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vrsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vrsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_FLAGS_4(vec_rsubs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
45 | +DEF_HELPER_FLAGS_4(vec_rsubs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
46 | +DEF_HELPER_FLAGS_4(vec_rsubs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
47 | +DEF_HELPER_FLAGS_4(vec_rsubs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
48 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/riscv/insn32.decode | ||
51 | +++ b/target/riscv/insn32.decode | ||
52 | @@ -XXX,XX +XXX,XX @@ | 28 | @@ -XXX,XX +XXX,XX @@ |
53 | &u imm rd | 29 | +/* |
54 | &shift shamt rs1 rd | 30 | + * RISC-V Vector Extension Internals |
55 | &atomic aq rl rs2 rs1 rd | 31 | + * |
56 | +&rmrr vm rd rs1 rs2 | 32 | + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. |
57 | &rwdvm vm wd rd rs1 rs2 | 33 | + * |
58 | &r2nfvm vm rd rs1 nf | 34 | + * This program is free software; you can redistribute it and/or modify it |
59 | &rnfvm vm rd rs1 rs2 nf | 35 | + * under the terms and conditions of the GNU General Public License, |
60 | @@ -XXX,XX +XXX,XX @@ | 36 | + * version 2 or later, as published by the Free Software Foundation. |
61 | @r2 ....... ..... ..... ... ..... ....... %rs1 %rd | 37 | + * |
62 | @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd | 38 | + * This program is distributed in the hope it will be useful, but WITHOUT |
63 | @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd | 39 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
64 | +@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd | 40 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
65 | @r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd | 41 | + * more details. |
66 | @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd | 42 | + * |
67 | 43 | + * You should have received a copy of the GNU General Public License along with | |
68 | @@ -XXX,XX +XXX,XX @@ vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm | 44 | + * this program. If not, see <http://www.gnu.org/licenses/>. |
69 | vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm | 45 | + */ |
70 | 46 | + | |
71 | # *** new major opcode OP-V *** | 47 | +#ifndef TARGET_RISCV_VECTOR_INTERNALS_H |
72 | +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm | 48 | +#define TARGET_RISCV_VECTOR_INTERNALS_H |
73 | +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm | 49 | + |
74 | +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm | 50 | +#include "qemu/osdep.h" |
75 | +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm | 51 | +#include "qemu/bitops.h" |
76 | +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm | 52 | +#include "cpu.h" |
77 | +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm | 53 | +#include "tcg/tcg-gvec-desc.h" |
78 | +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm | 54 | +#include "internals.h" |
79 | + | 55 | + |
80 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | 56 | +static inline uint32_t vext_nf(uint32_t desc) |
81 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | 57 | +{ |
82 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | 58 | + return FIELD_EX32(simd_data(desc), VDATA, NF); |
83 | index XXXXXXX..XXXXXXX 100644 | 59 | +} |
84 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 60 | + |
85 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 61 | +/* |
86 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check) | 62 | + * Note that vector data is stored in host-endian 64-bit chunks, |
87 | GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check) | 63 | + * so addressing units smaller than that needs a host-endian fixup. |
88 | GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check) | 64 | + */ |
89 | #endif | 65 | +#if HOST_BIG_ENDIAN |
90 | + | 66 | +#define H1(x) ((x) ^ 7) |
91 | +/* | 67 | +#define H1_2(x) ((x) ^ 6) |
92 | + *** Vector Integer Arithmetic Instructions | 68 | +#define H1_4(x) ((x) ^ 4) |
93 | + */ | 69 | +#define H2(x) ((x) ^ 3) |
94 | +#define MAXSZ(s) (s->vlen >> (3 - s->lmul)) | 70 | +#define H4(x) ((x) ^ 1) |
95 | + | 71 | +#define H8(x) ((x)) |
96 | +static bool opivv_check(DisasContext *s, arg_rmrr *a) | 72 | +#else |
97 | +{ | 73 | +#define H1(x) (x) |
98 | + return (vext_check_isa_ill(s) && | 74 | +#define H1_2(x) (x) |
99 | + vext_check_overlap_mask(s, a->rd, a->vm, false) && | 75 | +#define H1_4(x) (x) |
100 | + vext_check_reg(s, a->rd, false) && | 76 | +#define H2(x) (x) |
101 | + vext_check_reg(s, a->rs2, false) && | 77 | +#define H4(x) (x) |
102 | + vext_check_reg(s, a->rs1, false)); | 78 | +#define H8(x) (x) |
103 | +} | 79 | +#endif |
104 | + | 80 | + |
105 | +typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t, | 81 | +/* |
106 | + uint32_t, uint32_t, uint32_t); | 82 | + * Encode LMUL to lmul as following: |
107 | + | 83 | + * LMUL vlmul lmul |
108 | +static inline bool | 84 | + * 1 000 0 |
109 | +do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, | 85 | + * 2 001 1 |
110 | + gen_helper_gvec_4_ptr *fn) | 86 | + * 4 010 2 |
111 | +{ | 87 | + * 8 011 3 |
112 | + TCGLabel *over = gen_new_label(); | 88 | + * - 100 - |
113 | + if (!opivv_check(s, a)) { | 89 | + * 1/8 101 -3 |
114 | + return false; | 90 | + * 1/4 110 -2 |
115 | + } | 91 | + * 1/2 111 -1 |
116 | + | 92 | + */ |
117 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 93 | +static inline int32_t vext_lmul(uint32_t desc) |
118 | + | 94 | +{ |
119 | + if (a->vm && s->vl_eq_vlmax) { | 95 | + return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); |
120 | + gvec_fn(s->sew, vreg_ofs(s, a->rd), | 96 | +} |
121 | + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), | 97 | + |
122 | + MAXSZ(s), MAXSZ(s)); | 98 | +static inline uint32_t vext_vm(uint32_t desc) |
123 | + } else { | 99 | +{ |
124 | + uint32_t data = 0; | 100 | + return FIELD_EX32(simd_data(desc), VDATA, VM); |
125 | + | 101 | +} |
126 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | 102 | + |
127 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | 103 | +static inline uint32_t vext_vma(uint32_t desc) |
128 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | 104 | +{ |
129 | + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | 105 | + return FIELD_EX32(simd_data(desc), VDATA, VMA); |
130 | + vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), | 106 | +} |
131 | + cpu_env, 0, s->vlen / 8, data, fn); | 107 | + |
132 | + } | 108 | +static inline uint32_t vext_vta(uint32_t desc) |
133 | + gen_set_label(over); | 109 | +{ |
134 | + return true; | 110 | + return FIELD_EX32(simd_data(desc), VDATA, VTA); |
135 | +} | 111 | +} |
136 | + | 112 | + |
137 | +/* OPIVV with GVEC IR */ | 113 | +static inline uint32_t vext_vta_all_1s(uint32_t desc) |
138 | +#define GEN_OPIVV_GVEC_TRANS(NAME, SUF) \ | 114 | +{ |
139 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | 115 | + return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); |
140 | +{ \ | 116 | +} |
141 | + static gen_helper_gvec_4_ptr * const fns[4] = { \ | 117 | + |
142 | + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ | 118 | +/* |
143 | + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ | 119 | + * Earlier designs (pre-0.9) had a varying number of bits |
144 | + }; \ | 120 | + * per mask value (MLEN). In the 0.9 design, MLEN=1. |
145 | + return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ | 121 | + * (Section 4.5) |
146 | +} | 122 | + */ |
147 | + | 123 | +static inline int vext_elem_mask(void *v0, int index) |
148 | +GEN_OPIVV_GVEC_TRANS(vadd_vv, add) | 124 | +{ |
149 | +GEN_OPIVV_GVEC_TRANS(vsub_vv, sub) | 125 | + int idx = index / 64; |
150 | + | 126 | + int pos = index % 64; |
151 | +typedef void gen_helper_opivx(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr, | 127 | + return (((uint64_t *)v0)[idx] >> pos) & 1; |
152 | + TCGv_env, TCGv_i32); | 128 | +} |
153 | + | 129 | + |
154 | +static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, | 130 | +/* |
155 | + gen_helper_opivx *fn, DisasContext *s) | 131 | + * Get number of total elements, including prestart, body and tail elements. |
156 | +{ | 132 | + * Note that when LMUL < 1, the tail includes the elements past VLMAX that |
157 | + TCGv_ptr dest, src2, mask; | 133 | + * are held in the same vector register. |
158 | + TCGv src1; | 134 | + */ |
159 | + TCGv_i32 desc; | 135 | +static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, |
160 | + uint32_t data = 0; | 136 | + uint32_t esz) |
161 | + | 137 | +{ |
162 | + TCGLabel *over = gen_new_label(); | 138 | + uint32_t vlenb = simd_maxsz(desc); |
163 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 139 | + uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); |
164 | + | 140 | + int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : |
165 | + dest = tcg_temp_new_ptr(); | 141 | + ctzl(esz) - ctzl(sew) + vext_lmul(desc); |
166 | + mask = tcg_temp_new_ptr(); | 142 | + return (vlenb << emul) / esz; |
167 | + src2 = tcg_temp_new_ptr(); | 143 | +} |
168 | + src1 = tcg_temp_new(); | 144 | + |
169 | + gen_get_gpr(src1, rs1); | 145 | +/* set agnostic elements to 1s */ |
170 | + | 146 | +void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, |
171 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | 147 | + uint32_t tot); |
172 | + data = FIELD_DP32(data, VDATA, VM, vm); | ||
173 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
174 | + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); | ||
175 | + | ||
176 | + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); | ||
177 | + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2)); | ||
178 | + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); | ||
179 | + | ||
180 | + fn(dest, mask, src1, src2, cpu_env, desc); | ||
181 | + | ||
182 | + tcg_temp_free_ptr(dest); | ||
183 | + tcg_temp_free_ptr(mask); | ||
184 | + tcg_temp_free_ptr(src2); | ||
185 | + tcg_temp_free(src1); | ||
186 | + tcg_temp_free_i32(desc); | ||
187 | + gen_set_label(over); | ||
188 | + return true; | ||
189 | +} | ||
190 | + | ||
191 | +static bool opivx_check(DisasContext *s, arg_rmrr *a) | ||
192 | +{ | ||
193 | + return (vext_check_isa_ill(s) && | ||
194 | + vext_check_overlap_mask(s, a->rd, a->vm, false) && | ||
195 | + vext_check_reg(s, a->rd, false) && | ||
196 | + vext_check_reg(s, a->rs2, false)); | ||
197 | +} | ||
198 | + | ||
199 | +typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, TCGv_i64, | ||
200 | + uint32_t, uint32_t); | ||
201 | + | ||
202 | +static inline bool | ||
203 | +do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, | ||
204 | + gen_helper_opivx *fn) | ||
205 | +{ | ||
206 | + if (!opivx_check(s, a)) { | ||
207 | + return false; | ||
208 | + } | ||
209 | + | ||
210 | + if (a->vm && s->vl_eq_vlmax) { | ||
211 | + TCGv_i64 src1 = tcg_temp_new_i64(); | ||
212 | + TCGv tmp = tcg_temp_new(); | ||
213 | + | ||
214 | + gen_get_gpr(tmp, a->rs1); | ||
215 | + tcg_gen_ext_tl_i64(src1, tmp); | ||
216 | + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), | ||
217 | + src1, MAXSZ(s), MAXSZ(s)); | ||
218 | + | ||
219 | + tcg_temp_free_i64(src1); | ||
220 | + tcg_temp_free(tmp); | ||
221 | + return true; | ||
222 | + } | ||
223 | + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); | ||
224 | +} | ||
225 | + | ||
226 | +/* OPIVX with GVEC IR */ | ||
227 | +#define GEN_OPIVX_GVEC_TRANS(NAME, SUF) \ | ||
228 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
229 | +{ \ | ||
230 | + static gen_helper_opivx * const fns[4] = { \ | ||
231 | + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ | ||
232 | + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ | ||
233 | + }; \ | ||
234 | + return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ | ||
235 | +} | ||
236 | + | ||
237 | +GEN_OPIVX_GVEC_TRANS(vadd_vx, adds) | ||
238 | +GEN_OPIVX_GVEC_TRANS(vsub_vx, subs) | ||
239 | + | ||
240 | +static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) | ||
241 | +{ | ||
242 | + tcg_gen_vec_sub8_i64(d, b, a); | ||
243 | +} | ||
244 | + | ||
245 | +static void gen_vec_rsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) | ||
246 | +{ | ||
247 | + tcg_gen_vec_sub8_i64(d, b, a); | ||
248 | +} | ||
249 | + | ||
250 | +static void gen_rsub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) | ||
251 | +{ | ||
252 | + tcg_gen_sub_i32(ret, arg2, arg1); | ||
253 | +} | ||
254 | + | ||
255 | +static void gen_rsub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) | ||
256 | +{ | ||
257 | + tcg_gen_sub_i64(ret, arg2, arg1); | ||
258 | +} | ||
259 | + | ||
260 | +static void gen_rsub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) | ||
261 | +{ | ||
262 | + tcg_gen_sub_vec(vece, r, b, a); | ||
263 | +} | ||
264 | + | ||
265 | +static void tcg_gen_gvec_rsubs(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
266 | + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz) | ||
267 | +{ | ||
268 | + static const GVecGen2s rsub_op[4] = { | ||
269 | + { .fni8 = gen_vec_rsub8_i64, | ||
270 | + .fniv = gen_rsub_vec, | ||
271 | + .fno = gen_helper_vec_rsubs8, | ||
272 | + .vece = MO_8 }, | ||
273 | + { .fni8 = gen_vec_rsub16_i64, | ||
274 | + .fniv = gen_rsub_vec, | ||
275 | + .fno = gen_helper_vec_rsubs16, | ||
276 | + .vece = MO_16 }, | ||
277 | + { .fni4 = gen_rsub_i32, | ||
278 | + .fniv = gen_rsub_vec, | ||
279 | + .fno = gen_helper_vec_rsubs32, | ||
280 | + .vece = MO_32 }, | ||
281 | + { .fni8 = gen_rsub_i64, | ||
282 | + .fniv = gen_rsub_vec, | ||
283 | + .fno = gen_helper_vec_rsubs64, | ||
284 | + .prefer_i64 = TCG_TARGET_REG_BITS == 64, | ||
285 | + .vece = MO_64 }, | ||
286 | + }; | ||
287 | + | ||
288 | + tcg_debug_assert(vece <= MO_64); | ||
289 | + tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); | ||
290 | +} | ||
291 | + | ||
292 | +GEN_OPIVX_GVEC_TRANS(vrsub_vx, rsubs) | ||
293 | + | ||
294 | +static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, | ||
295 | + gen_helper_opivx *fn, DisasContext *s, int zx) | ||
296 | +{ | ||
297 | + TCGv_ptr dest, src2, mask; | ||
298 | + TCGv src1; | ||
299 | + TCGv_i32 desc; | ||
300 | + uint32_t data = 0; | ||
301 | + | ||
302 | + TCGLabel *over = gen_new_label(); | ||
303 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
304 | + | ||
305 | + dest = tcg_temp_new_ptr(); | ||
306 | + mask = tcg_temp_new_ptr(); | ||
307 | + src2 = tcg_temp_new_ptr(); | ||
308 | + if (zx) { | ||
309 | + src1 = tcg_const_tl(imm); | ||
310 | + } else { | ||
311 | + src1 = tcg_const_tl(sextract64(imm, 0, 5)); | ||
312 | + } | ||
313 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | ||
314 | + data = FIELD_DP32(data, VDATA, VM, vm); | ||
315 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
316 | + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); | ||
317 | + | ||
318 | + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); | ||
319 | + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2)); | ||
320 | + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); | ||
321 | + | ||
322 | + fn(dest, mask, src1, src2, cpu_env, desc); | ||
323 | + | ||
324 | + tcg_temp_free_ptr(dest); | ||
325 | + tcg_temp_free_ptr(mask); | ||
326 | + tcg_temp_free_ptr(src2); | ||
327 | + tcg_temp_free(src1); | ||
328 | + tcg_temp_free_i32(desc); | ||
329 | + gen_set_label(over); | ||
330 | + return true; | ||
331 | +} | ||
332 | + | ||
333 | +typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t, | ||
334 | + uint32_t, uint32_t); | ||
335 | + | ||
336 | +static inline bool | ||
337 | +do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, | ||
338 | + gen_helper_opivx *fn, int zx) | ||
339 | +{ | ||
340 | + if (!opivx_check(s, a)) { | ||
341 | + return false; | ||
342 | + } | ||
343 | + | ||
344 | + if (a->vm && s->vl_eq_vlmax) { | ||
345 | + if (zx) { | ||
346 | + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), | ||
347 | + extract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s)); | ||
348 | + } else { | ||
349 | + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), | ||
350 | + sextract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s)); | ||
351 | + } | ||
352 | + } else { | ||
353 | + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, zx); | ||
354 | + } | ||
355 | + return true; | ||
356 | +} | ||
357 | + | ||
358 | +/* OPIVI with GVEC IR */ | ||
359 | +#define GEN_OPIVI_GVEC_TRANS(NAME, ZX, OPIVX, SUF) \ | ||
360 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
361 | +{ \ | ||
362 | + static gen_helper_opivx * const fns[4] = { \ | ||
363 | + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ | ||
364 | + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ | ||
365 | + }; \ | ||
366 | + return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \ | ||
367 | + fns[s->sew], ZX); \ | ||
368 | +} | ||
369 | + | ||
370 | +GEN_OPIVI_GVEC_TRANS(vadd_vi, 0, vadd_vx, addi) | ||
371 | + | ||
372 | +static void tcg_gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
373 | + int64_t c, uint32_t oprsz, uint32_t maxsz) | ||
374 | +{ | ||
375 | + TCGv_i64 tmp = tcg_const_i64(c); | ||
376 | + tcg_gen_gvec_rsubs(vece, dofs, aofs, tmp, oprsz, maxsz); | ||
377 | + tcg_temp_free_i64(tmp); | ||
378 | +} | ||
379 | + | ||
380 | +GEN_OPIVI_GVEC_TRANS(vrsub_vi, 0, vrsub_vx, rsubi) | ||
381 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
382 | index XXXXXXX..XXXXXXX 100644 | ||
383 | --- a/target/riscv/vector_helper.c | ||
384 | +++ b/target/riscv/vector_helper.c | ||
385 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl) | ||
386 | GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl) | ||
387 | GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl) | ||
388 | GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) | ||
389 | + | ||
390 | +/* | ||
391 | + *** Vector Integer Arithmetic Instructions | ||
392 | + */ | ||
393 | + | 148 | + |
394 | +/* expand macro args before macro */ | 149 | +/* expand macro args before macro */ |
395 | +#define RVVCALL(macro, ...) macro(__VA_ARGS__) | 150 | +#define RVVCALL(macro, ...) macro(__VA_ARGS__) |
396 | + | 151 | + |
397 | +/* (TD, T1, T2, TX1, TX2) */ | 152 | +/* (TD, T1, T2, TX1, TX2) */ |
398 | +#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t | 153 | +#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t |
399 | +#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t | 154 | +#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t |
400 | +#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t | 155 | +#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t |
401 | +#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t | 156 | +#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t |
402 | + | 157 | + |
403 | +/* operation of two vector elements */ | 158 | +/* operation of two vector elements */ |
404 | +typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); | 159 | +typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); |
405 | + | 160 | + |
406 | +#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | 161 | +#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ |
407 | +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ | 162 | +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ |
408 | +{ \ | 163 | +{ \ |
409 | + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ | 164 | + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ |
410 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | 165 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ |
411 | + *((TD *)vd + HD(i)) = OP(s2, s1); \ | 166 | + *((TD *)vd + HD(i)) = OP(s2, s1); \ |
412 | +} | 167 | +} |
413 | +#define DO_SUB(N, M) (N - M) | 168 | + |
414 | +#define DO_RSUB(N, M) (M - N) | 169 | +void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, |
415 | + | 170 | + CPURISCVState *env, uint32_t desc, |
416 | +RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) | 171 | + opivv2_fn *fn, uint32_t esz); |
417 | +RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) | ||
418 | +RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) | ||
419 | +RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) | ||
420 | +RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) | ||
421 | +RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) | ||
422 | +RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) | ||
423 | +RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) | ||
424 | + | ||
425 | +static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, | ||
426 | + CPURISCVState *env, uint32_t desc, | ||
427 | + uint32_t esz, uint32_t dsz, | ||
428 | + opivv2_fn *fn, clear_fn *clearfn) | ||
429 | +{ | ||
430 | + uint32_t vlmax = vext_maxsz(desc) / esz; | ||
431 | + uint32_t mlen = vext_mlen(desc); | ||
432 | + uint32_t vm = vext_vm(desc); | ||
433 | + uint32_t vl = env->vl; | ||
434 | + uint32_t i; | ||
435 | + | ||
436 | + for (i = 0; i < vl; i++) { | ||
437 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | ||
438 | + continue; | ||
439 | + } | ||
440 | + fn(vd, vs1, vs2, i); | ||
441 | + } | ||
442 | + clearfn(vd, vl, vl * dsz, vlmax * dsz); | ||
443 | +} | ||
444 | + | 172 | + |
445 | +/* generate the helpers for OPIVV */ | 173 | +/* generate the helpers for OPIVV */ |
446 | +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \ | 174 | +#define GEN_VEXT_VV(NAME, ESZ) \ |
447 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | 175 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ |
448 | + void *vs2, CPURISCVState *env, \ | 176 | + void *vs2, CPURISCVState *env, \ |
449 | + uint32_t desc) \ | 177 | + uint32_t desc) \ |
450 | +{ \ | 178 | +{ \ |
451 | + do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ | 179 | + do_vext_vv(vd, v0, vs1, vs2, env, desc, \ |
452 | + do_##NAME, CLEAR_FN); \ | 180 | + do_##NAME, ESZ); \ |
453 | +} | 181 | +} |
454 | + | ||
455 | +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb) | ||
456 | +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh) | ||
457 | +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl) | ||
458 | +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq) | ||
459 | +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb) | ||
460 | +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh) | ||
461 | +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl) | ||
462 | +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq) | ||
463 | + | 182 | + |
464 | +typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); | 183 | +typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); |
465 | + | 184 | + |
466 | +/* | 185 | +/* |
467 | + * (T1)s1 gives the real operator type. | 186 | + * (T1)s1 gives the real operator type. |
... | ... | ||
472 | +{ \ | 191 | +{ \ |
473 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | 192 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ |
474 | + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ | 193 | + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ |
475 | +} | 194 | +} |
476 | + | 195 | + |
477 | +RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) | 196 | +void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, |
478 | +RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) | 197 | + CPURISCVState *env, uint32_t desc, |
479 | +RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) | 198 | + opivx2_fn fn, uint32_t esz); |
480 | +RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) | 199 | + |
481 | +RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) | 200 | +/* generate the helpers for OPIVX */ |
482 | +RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) | 201 | +#define GEN_VEXT_VX(NAME, ESZ) \ |
483 | +RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) | 202 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ |
484 | +RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) | 203 | + void *vs2, CPURISCVState *env, \ |
485 | +RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) | 204 | + uint32_t desc) \ |
486 | +RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) | 205 | +{ \ |
487 | +RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) | 206 | + do_vext_vx(vd, v0, s1, vs2, env, desc, \ |
488 | +RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) | 207 | + do_##NAME, ESZ); \ |
489 | + | 208 | +} |
490 | +static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, | 209 | + |
491 | + CPURISCVState *env, uint32_t desc, | 210 | +#endif /* TARGET_RISCV_VECTOR_INTERNALS_H */ |
492 | + uint32_t esz, uint32_t dsz, | 211 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c |
493 | + opivx2_fn fn, clear_fn *clearfn) | 212 | index XXXXXXX..XXXXXXX 100644 |
494 | +{ | 213 | --- a/target/riscv/vector_helper.c |
495 | + uint32_t vlmax = vext_maxsz(desc) / esz; | 214 | +++ b/target/riscv/vector_helper.c |
496 | + uint32_t mlen = vext_mlen(desc); | 215 | @@ -XXX,XX +XXX,XX @@ |
216 | #include "fpu/softfloat.h" | ||
217 | #include "tcg/tcg-gvec-desc.h" | ||
218 | #include "internals.h" | ||
219 | +#include "vector_internals.h" | ||
220 | #include <math.h> | ||
221 | |||
222 | target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, | ||
223 | @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, | ||
224 | return vl; | ||
225 | } | ||
226 | |||
227 | -/* | ||
228 | - * Note that vector data is stored in host-endian 64-bit chunks, | ||
229 | - * so addressing units smaller than that needs a host-endian fixup. | ||
230 | - */ | ||
231 | -#if HOST_BIG_ENDIAN | ||
232 | -#define H1(x) ((x) ^ 7) | ||
233 | -#define H1_2(x) ((x) ^ 6) | ||
234 | -#define H1_4(x) ((x) ^ 4) | ||
235 | -#define H2(x) ((x) ^ 3) | ||
236 | -#define H4(x) ((x) ^ 1) | ||
237 | -#define H8(x) ((x)) | ||
238 | -#else | ||
239 | -#define H1(x) (x) | ||
240 | -#define H1_2(x) (x) | ||
241 | -#define H1_4(x) (x) | ||
242 | -#define H2(x) (x) | ||
243 | -#define H4(x) (x) | ||
244 | -#define H8(x) (x) | ||
245 | -#endif | ||
246 | - | ||
247 | -static inline uint32_t vext_nf(uint32_t desc) | ||
248 | -{ | ||
249 | - return FIELD_EX32(simd_data(desc), VDATA, NF); | ||
250 | -} | ||
251 | - | ||
252 | -static inline uint32_t vext_vm(uint32_t desc) | ||
253 | -{ | ||
254 | - return FIELD_EX32(simd_data(desc), VDATA, VM); | ||
255 | -} | ||
256 | - | ||
257 | -/* | ||
258 | - * Encode LMUL to lmul as following: | ||
259 | - * LMUL vlmul lmul | ||
260 | - * 1 000 0 | ||
261 | - * 2 001 1 | ||
262 | - * 4 010 2 | ||
263 | - * 8 011 3 | ||
264 | - * - 100 - | ||
265 | - * 1/8 101 -3 | ||
266 | - * 1/4 110 -2 | ||
267 | - * 1/2 111 -1 | ||
268 | - */ | ||
269 | -static inline int32_t vext_lmul(uint32_t desc) | ||
270 | -{ | ||
271 | - return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); | ||
272 | -} | ||
273 | - | ||
274 | -static inline uint32_t vext_vta(uint32_t desc) | ||
275 | -{ | ||
276 | - return FIELD_EX32(simd_data(desc), VDATA, VTA); | ||
277 | -} | ||
278 | - | ||
279 | -static inline uint32_t vext_vma(uint32_t desc) | ||
280 | -{ | ||
281 | - return FIELD_EX32(simd_data(desc), VDATA, VMA); | ||
282 | -} | ||
283 | - | ||
284 | -static inline uint32_t vext_vta_all_1s(uint32_t desc) | ||
285 | -{ | ||
286 | - return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); | ||
287 | -} | ||
288 | - | ||
289 | /* | ||
290 | * Get the maximum number of elements can be operated. | ||
291 | * | ||
292 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) | ||
293 | return scale < 0 ? vlenb >> -scale : vlenb << scale; | ||
294 | } | ||
295 | |||
296 | -/* | ||
297 | - * Get number of total elements, including prestart, body and tail elements. | ||
298 | - * Note that when LMUL < 1, the tail includes the elements past VLMAX that | ||
299 | - * are held in the same vector register. | ||
300 | - */ | ||
301 | -static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, | ||
302 | - uint32_t esz) | ||
303 | -{ | ||
304 | - uint32_t vlenb = simd_maxsz(desc); | ||
305 | - uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); | ||
306 | - int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : | ||
307 | - ctzl(esz) - ctzl(sew) + vext_lmul(desc); | ||
308 | - return (vlenb << emul) / esz; | ||
309 | -} | ||
310 | - | ||
311 | static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) | ||
312 | { | ||
313 | return (addr & ~env->cur_pmmask) | env->cur_pmbase; | ||
314 | @@ -XXX,XX +XXX,XX @@ static void probe_pages(CPURISCVState *env, target_ulong addr, | ||
315 | } | ||
316 | } | ||
317 | |||
318 | -/* set agnostic elements to 1s */ | ||
319 | -static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, | ||
320 | - uint32_t tot) | ||
321 | -{ | ||
322 | - if (is_agnostic == 0) { | ||
323 | - /* policy undisturbed */ | ||
324 | - return; | ||
325 | - } | ||
326 | - if (tot - cnt == 0) { | ||
327 | - return; | ||
328 | - } | ||
329 | - memset(base + cnt, -1, tot - cnt); | ||
330 | -} | ||
331 | - | ||
332 | static inline void vext_set_elem_mask(void *v0, int index, | ||
333 | uint8_t value) | ||
334 | { | ||
335 | @@ -XXX,XX +XXX,XX @@ static inline void vext_set_elem_mask(void *v0, int index, | ||
336 | ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); | ||
337 | } | ||
338 | |||
339 | -/* | ||
340 | - * Earlier designs (pre-0.9) had a varying number of bits | ||
341 | - * per mask value (MLEN). In the 0.9 design, MLEN=1. | ||
342 | - * (Section 4.5) | ||
343 | - */ | ||
344 | -static inline int vext_elem_mask(void *v0, int index) | ||
345 | -{ | ||
346 | - int idx = index / 64; | ||
347 | - int pos = index % 64; | ||
348 | - return (((uint64_t *)v0)[idx] >> pos) & 1; | ||
349 | -} | ||
350 | - | ||
351 | /* elements operations for load and store */ | ||
352 | typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr, | ||
353 | uint32_t idx, void *vd, uintptr_t retaddr); | ||
354 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) | ||
355 | * Vector Integer Arithmetic Instructions | ||
356 | */ | ||
357 | |||
358 | -/* expand macro args before macro */ | ||
359 | -#define RVVCALL(macro, ...) macro(__VA_ARGS__) | ||
360 | - | ||
361 | /* (TD, T1, T2, TX1, TX2) */ | ||
362 | #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t | ||
363 | #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t | ||
364 | #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t | ||
365 | #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t | ||
366 | -#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t | ||
367 | -#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t | ||
368 | -#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t | ||
369 | -#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t | ||
370 | #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t | ||
371 | #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t | ||
372 | #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t | ||
373 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) | ||
374 | #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t | ||
375 | #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t | ||
376 | |||
377 | -/* operation of two vector elements */ | ||
378 | -typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); | ||
379 | - | ||
380 | -#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
381 | -static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ | ||
382 | -{ \ | ||
383 | - TX1 s1 = *((T1 *)vs1 + HS1(i)); \ | ||
384 | - TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
385 | - *((TD *)vd + HD(i)) = OP(s2, s1); \ | ||
386 | -} | ||
387 | #define DO_SUB(N, M) (N - M) | ||
388 | #define DO_RSUB(N, M) (M - N) | ||
389 | |||
390 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) | ||
391 | RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) | ||
392 | RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) | ||
393 | |||
394 | -static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, | ||
395 | - CPURISCVState *env, uint32_t desc, | ||
396 | - opivv2_fn *fn, uint32_t esz) | ||
397 | -{ | ||
398 | - uint32_t vm = vext_vm(desc); | ||
399 | - uint32_t vl = env->vl; | ||
400 | - uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
401 | - uint32_t vta = vext_vta(desc); | ||
402 | - uint32_t vma = vext_vma(desc); | ||
403 | - uint32_t i; | ||
404 | - | ||
405 | - for (i = env->vstart; i < vl; i++) { | ||
406 | - if (!vm && !vext_elem_mask(v0, i)) { | ||
407 | - /* set masked-off elements to 1s */ | ||
408 | - vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); | ||
409 | - continue; | ||
410 | - } | ||
411 | - fn(vd, vs1, vs2, i); | ||
412 | - } | ||
413 | - env->vstart = 0; | ||
414 | - /* set tail elements to 1s */ | ||
415 | - vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); | ||
416 | -} | ||
417 | - | ||
418 | -/* generate the helpers for OPIVV */ | ||
419 | -#define GEN_VEXT_VV(NAME, ESZ) \ | ||
420 | -void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
421 | - void *vs2, CPURISCVState *env, \ | ||
422 | - uint32_t desc) \ | ||
423 | -{ \ | ||
424 | - do_vext_vv(vd, v0, vs1, vs2, env, desc, \ | ||
425 | - do_##NAME, ESZ); \ | ||
426 | -} | ||
427 | - | ||
428 | GEN_VEXT_VV(vadd_vv_b, 1) | ||
429 | GEN_VEXT_VV(vadd_vv_h, 2) | ||
430 | GEN_VEXT_VV(vadd_vv_w, 4) | ||
431 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VV(vsub_vv_h, 2) | ||
432 | GEN_VEXT_VV(vsub_vv_w, 4) | ||
433 | GEN_VEXT_VV(vsub_vv_d, 8) | ||
434 | |||
435 | -typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); | ||
436 | - | ||
437 | -/* | ||
438 | - * (T1)s1 gives the real operator type. | ||
439 | - * (TX1)(T1)s1 expands the operator type of widen or narrow operations. | ||
440 | - */ | ||
441 | -#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
442 | -static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ | ||
443 | -{ \ | ||
444 | - TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
445 | - *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ | ||
446 | -} | ||
447 | |||
448 | RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) | ||
449 | RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) | ||
450 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) | ||
451 | RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) | ||
452 | RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) | ||
453 | |||
454 | -static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, | ||
455 | - CPURISCVState *env, uint32_t desc, | ||
456 | - opivx2_fn fn, uint32_t esz) | ||
457 | -{ | ||
458 | - uint32_t vm = vext_vm(desc); | ||
459 | - uint32_t vl = env->vl; | ||
460 | - uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
461 | - uint32_t vta = vext_vta(desc); | ||
462 | - uint32_t vma = vext_vma(desc); | ||
463 | - uint32_t i; | ||
464 | - | ||
465 | - for (i = env->vstart; i < vl; i++) { | ||
466 | - if (!vm && !vext_elem_mask(v0, i)) { | ||
467 | - /* set masked-off elements to 1s */ | ||
468 | - vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); | ||
469 | - continue; | ||
470 | - } | ||
471 | - fn(vd, s1, vs2, i); | ||
472 | - } | ||
473 | - env->vstart = 0; | ||
474 | - /* set tail elements to 1s */ | ||
475 | - vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); | ||
476 | -} | ||
477 | - | ||
478 | -/* generate the helpers for OPIVX */ | ||
479 | -#define GEN_VEXT_VX(NAME, ESZ) \ | ||
480 | -void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
481 | - void *vs2, CPURISCVState *env, \ | ||
482 | - uint32_t desc) \ | ||
483 | -{ \ | ||
484 | - do_vext_vx(vd, v0, s1, vs2, env, desc, \ | ||
485 | - do_##NAME, ESZ); \ | ||
486 | -} | ||
487 | - | ||
488 | GEN_VEXT_VX(vadd_vx_b, 1) | ||
489 | GEN_VEXT_VX(vadd_vx_h, 2) | ||
490 | GEN_VEXT_VX(vadd_vx_w, 4) | ||
491 | diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c | ||
492 | new file mode 100644 | ||
493 | index XXXXXXX..XXXXXXX | ||
494 | --- /dev/null | ||
495 | +++ b/target/riscv/vector_internals.c | ||
496 | @@ -XXX,XX +XXX,XX @@ | ||
497 | +/* | ||
498 | + * RISC-V Vector Extension Internals | ||
499 | + * | ||
500 | + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. | ||
501 | + * | ||
502 | + * This program is free software; you can redistribute it and/or modify it | ||
503 | + * under the terms and conditions of the GNU General Public License, | ||
504 | + * version 2 or later, as published by the Free Software Foundation. | ||
505 | + * | ||
506 | + * This program is distributed in the hope it will be useful, but WITHOUT | ||
507 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
508 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
509 | + * more details. | ||
510 | + * | ||
511 | + * You should have received a copy of the GNU General Public License along with | ||
512 | + * this program. If not, see <http://www.gnu.org/licenses/>. | ||
513 | + */ | ||
514 | + | ||
515 | +#include "vector_internals.h" | ||
516 | + | ||
517 | +/* set agnostic elements to 1s */ | ||
518 | +void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, | ||
519 | + uint32_t tot) | ||
520 | +{ | ||
521 | + if (is_agnostic == 0) { | ||
522 | + /* policy undisturbed */ | ||
523 | + return; | ||
524 | + } | ||
525 | + if (tot - cnt == 0) { | ||
526 | + return ; | ||
527 | + } | ||
528 | + memset(base + cnt, -1, tot - cnt); | ||
529 | +} | ||
530 | + | ||
531 | +void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, | ||
532 | + CPURISCVState *env, uint32_t desc, | ||
533 | + opivv2_fn *fn, uint32_t esz) | ||
534 | +{ | ||
497 | + uint32_t vm = vext_vm(desc); | 535 | + uint32_t vm = vext_vm(desc); |
498 | + uint32_t vl = env->vl; | 536 | + uint32_t vl = env->vl; |
537 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
538 | + uint32_t vta = vext_vta(desc); | ||
539 | + uint32_t vma = vext_vma(desc); | ||
499 | + uint32_t i; | 540 | + uint32_t i; |
500 | + | 541 | + |
501 | + for (i = 0; i < vl; i++) { | 542 | + for (i = env->vstart; i < vl; i++) { |
502 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | 543 | + if (!vm && !vext_elem_mask(v0, i)) { |
544 | + /* set masked-off elements to 1s */ | ||
545 | + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); | ||
546 | + continue; | ||
547 | + } | ||
548 | + fn(vd, vs1, vs2, i); | ||
549 | + } | ||
550 | + env->vstart = 0; | ||
551 | + /* set tail elements to 1s */ | ||
552 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); | ||
553 | +} | ||
554 | + | ||
555 | +void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, | ||
556 | + CPURISCVState *env, uint32_t desc, | ||
557 | + opivx2_fn fn, uint32_t esz) | ||
558 | +{ | ||
559 | + uint32_t vm = vext_vm(desc); | ||
560 | + uint32_t vl = env->vl; | ||
561 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
562 | + uint32_t vta = vext_vta(desc); | ||
563 | + uint32_t vma = vext_vma(desc); | ||
564 | + uint32_t i; | ||
565 | + | ||
566 | + for (i = env->vstart; i < vl; i++) { | ||
567 | + if (!vm && !vext_elem_mask(v0, i)) { | ||
568 | + /* set masked-off elements to 1s */ | ||
569 | + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); | ||
503 | + continue; | 570 | + continue; |
504 | + } | 571 | + } |
505 | + fn(vd, s1, vs2, i); | 572 | + fn(vd, s1, vs2, i); |
506 | + } | 573 | + } |
507 | + clearfn(vd, vl, vl * dsz, vlmax * dsz); | 574 | + env->vstart = 0; |
508 | +} | 575 | + /* set tail elements to 1s */ |
509 | + | 576 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); |
510 | +/* generate the helpers for OPIVX */ | 577 | +} |
511 | +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \ | 578 | diff --git a/target/riscv/meson.build b/target/riscv/meson.build |
512 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | 579 | index XXXXXXX..XXXXXXX 100644 |
513 | + void *vs2, CPURISCVState *env, \ | 580 | --- a/target/riscv/meson.build |
514 | + uint32_t desc) \ | 581 | +++ b/target/riscv/meson.build |
515 | +{ \ | 582 | @@ -XXX,XX +XXX,XX @@ riscv_ss.add(files( |
516 | + do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ | 583 | 'gdbstub.c', |
517 | + do_##NAME, CLEAR_FN); \ | 584 | 'op_helper.c', |
518 | +} | 585 | 'vector_helper.c', |
519 | + | 586 | + 'vector_internals.c', |
520 | +GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb) | 587 | 'bitmanip_helper.c', |
521 | +GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh) | 588 | 'translate.c', |
522 | +GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl) | 589 | 'm128_helper.c', |
523 | +GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq) | ||
524 | +GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb) | ||
525 | +GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh) | ||
526 | +GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl) | ||
527 | +GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq) | ||
528 | +GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb) | ||
529 | +GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh) | ||
530 | +GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl) | ||
531 | +GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq) | ||
532 | + | ||
533 | +void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) | ||
534 | +{ | ||
535 | + intptr_t oprsz = simd_oprsz(desc); | ||
536 | + intptr_t i; | ||
537 | + | ||
538 | + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { | ||
539 | + *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); | ||
540 | + } | ||
541 | +} | ||
542 | + | ||
543 | +void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) | ||
544 | +{ | ||
545 | + intptr_t oprsz = simd_oprsz(desc); | ||
546 | + intptr_t i; | ||
547 | + | ||
548 | + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { | ||
549 | + *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); | ||
550 | + } | ||
551 | +} | ||
552 | + | ||
553 | +void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) | ||
554 | +{ | ||
555 | + intptr_t oprsz = simd_oprsz(desc); | ||
556 | + intptr_t i; | ||
557 | + | ||
558 | + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { | ||
559 | + *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); | ||
560 | + } | ||
561 | +} | ||
562 | + | ||
563 | +void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) | ||
564 | +{ | ||
565 | + intptr_t oprsz = simd_oprsz(desc); | ||
566 | + intptr_t i; | ||
567 | + | ||
568 | + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { | ||
569 | + *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); | ||
570 | + } | ||
571 | +} | ||
572 | -- | 590 | -- |
573 | 2.27.0 | 591 | 2.41.0 |
574 | |||
575 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Refactor the non SEW-specific stuff out of `GEN_OPIVV_TRANS` into |
4 | function `opivv_trans` (similar to `opivi_trans`). `opivv_trans` will be | ||
5 | used in proceeding vector-crypto commits. | ||
6 | |||
7 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 9 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 10 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> |
6 | Message-id: 20200623215920.2594-13-zhiwei_liu@c-sky.com | 11 | Signed-off-by: Max Chou <max.chou@sifive.com> |
12 | Message-ID: <20230711165917.2629866-3-max.chou@sifive.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 13 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 14 | --- |
9 | target/riscv/helper.h | 33 ++++++ | 15 | target/riscv/insn_trans/trans_rvv.c.inc | 62 +++++++++++++------------ |
10 | target/riscv/insn32.decode | 11 ++ | 16 | 1 file changed, 32 insertions(+), 30 deletions(-) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 113 +++++++++++++++++++ | ||
12 | target/riscv/vector_helper.c | 137 ++++++++++++++++++++++++ | ||
13 | 4 files changed, 294 insertions(+) | ||
14 | 17 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 18 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc |
16 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 20 | --- a/target/riscv/insn_trans/trans_rvv.c.inc |
18 | +++ b/target/riscv/helper.h | 21 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwadd_wx_w, void, ptr, ptr, tl, ptr, env, i32) | 22 | @@ -XXX,XX +XXX,XX @@ GEN_OPIWX_WIDEN_TRANS(vwadd_wx) |
20 | DEF_HELPER_6(vwsub_wx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vwsub_wx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vwsub_wx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vadc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vadc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vadc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vadc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vsbc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vsbc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vsbc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vsbc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vmadc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vmadc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vmadc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vmadc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vmsbc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vmsbc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vmsbc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vmsbc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vadc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vadc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vadc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vadc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) | ||
48 | +DEF_HELPER_6(vmadc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) | ||
49 | +DEF_HELPER_6(vmadc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) | ||
50 | +DEF_HELPER_6(vmadc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) | ||
51 | +DEF_HELPER_6(vmadc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) | ||
52 | +DEF_HELPER_6(vmsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) | ||
53 | +DEF_HELPER_6(vmsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) | ||
54 | +DEF_HELPER_6(vmsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) | ||
55 | +DEF_HELPER_6(vmsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) | ||
56 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/riscv/insn32.decode | ||
59 | +++ b/target/riscv/insn32.decode | ||
60 | @@ -XXX,XX +XXX,XX @@ | ||
61 | @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd | ||
62 | @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd | ||
63 | @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd | ||
64 | +@r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd | ||
65 | @r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd | ||
66 | @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd | ||
67 | |||
68 | @@ -XXX,XX +XXX,XX @@ vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm | ||
69 | vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm | ||
70 | vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm | ||
71 | vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm | ||
72 | +vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r_vm_1 | ||
73 | +vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r_vm_1 | ||
74 | +vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r_vm_1 | ||
75 | +vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r_vm_1 | ||
76 | +vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r_vm_1 | ||
77 | +vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r_vm_1 | ||
78 | +vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r_vm_1 | ||
79 | +vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r_vm_1 | ||
80 | +vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r_vm_1 | ||
81 | +vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r_vm_1 | ||
82 | |||
83 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
84 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
85 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
88 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
89 | @@ -XXX,XX +XXX,XX @@ GEN_OPIWX_WIDEN_TRANS(vwaddu_wx) | ||
90 | GEN_OPIWX_WIDEN_TRANS(vwadd_wx) | ||
91 | GEN_OPIWX_WIDEN_TRANS(vwsubu_wx) | 23 | GEN_OPIWX_WIDEN_TRANS(vwsubu_wx) |
92 | GEN_OPIWX_WIDEN_TRANS(vwsub_wx) | 24 | GEN_OPIWX_WIDEN_TRANS(vwsub_wx) |
25 | |||
26 | +static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm, | ||
27 | + gen_helper_gvec_4_ptr *fn, DisasContext *s) | ||
28 | +{ | ||
29 | + uint32_t data = 0; | ||
30 | + TCGLabel *over = gen_new_label(); | ||
31 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
32 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
93 | + | 33 | + |
94 | +/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ | 34 | + data = FIELD_DP32(data, VDATA, VM, vm); |
95 | +/* OPIVV without GVEC IR */ | 35 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); |
96 | +#define GEN_OPIVV_TRANS(NAME, CHECK) \ | 36 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); |
97 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | 37 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); |
98 | +{ \ | 38 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); |
99 | + if (CHECK(s, a)) { \ | 39 | + tcg_gen_gvec_4_ptr(vreg_ofs(s, vd), vreg_ofs(s, 0), vreg_ofs(s, vs1), |
100 | + uint32_t data = 0; \ | 40 | + vreg_ofs(s, vs2), cpu_env, s->cfg_ptr->vlen / 8, |
101 | + static gen_helper_gvec_4_ptr * const fns[4] = { \ | 41 | + s->cfg_ptr->vlen / 8, data, fn); |
102 | + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ | 42 | + mark_vs_dirty(s); |
103 | + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ | 43 | + gen_set_label(over); |
104 | + }; \ | 44 | + return true; |
105 | + TCGLabel *over = gen_new_label(); \ | ||
106 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
107 | + \ | ||
108 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ | ||
109 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
110 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
111 | + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
112 | + vreg_ofs(s, a->rs1), \ | ||
113 | + vreg_ofs(s, a->rs2), cpu_env, 0, \ | ||
114 | + s->vlen / 8, data, fns[s->sew]); \ | ||
115 | + gen_set_label(over); \ | ||
116 | + return true; \ | ||
117 | + } \ | ||
118 | + return false; \ | ||
119 | +} | 45 | +} |
120 | + | 46 | + |
121 | +/* | 47 | /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ |
122 | + * For vadc and vsbc, an illegal instruction exception is raised if the | 48 | /* OPIVV without GVEC IR */ |
123 | + * destination vector register is v0 and LMUL > 1. (Section 12.3) | 49 | -#define GEN_OPIVV_TRANS(NAME, CHECK) \ |
124 | + */ | 50 | -static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
125 | +static bool opivv_vadc_check(DisasContext *s, arg_rmrr *a) | 51 | -{ \ |
126 | +{ | 52 | - if (CHECK(s, a)) { \ |
127 | + return (vext_check_isa_ill(s) && | 53 | - uint32_t data = 0; \ |
128 | + vext_check_reg(s, a->rd, false) && | 54 | - static gen_helper_gvec_4_ptr * const fns[4] = { \ |
129 | + vext_check_reg(s, a->rs2, false) && | 55 | - gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ |
130 | + vext_check_reg(s, a->rs1, false) && | 56 | - gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ |
131 | + ((a->rd != 0) || (s->lmul == 0))); | 57 | - }; \ |
132 | +} | 58 | - TCGLabel *over = gen_new_label(); \ |
133 | + | 59 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ |
134 | +GEN_OPIVV_TRANS(vadc_vvm, opivv_vadc_check) | 60 | - tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ |
135 | +GEN_OPIVV_TRANS(vsbc_vvm, opivv_vadc_check) | 61 | - \ |
136 | + | 62 | - data = FIELD_DP32(data, VDATA, VM, a->vm); \ |
137 | +/* | 63 | - data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ |
138 | + * For vmadc and vmsbc, an illegal instruction exception is raised if the | 64 | - data = FIELD_DP32(data, VDATA, VTA, s->vta); \ |
139 | + * destination vector register overlaps a source vector register group. | 65 | - data = \ |
140 | + */ | 66 | - FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ |
141 | +static bool opivv_vmadc_check(DisasContext *s, arg_rmrr *a) | 67 | - data = FIELD_DP32(data, VDATA, VMA, s->vma); \ |
142 | +{ | 68 | - tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ |
143 | + return (vext_check_isa_ill(s) && | 69 | - vreg_ofs(s, a->rs1), \ |
144 | + vext_check_reg(s, a->rs2, false) && | 70 | - vreg_ofs(s, a->rs2), cpu_env, \ |
145 | + vext_check_reg(s, a->rs1, false) && | 71 | - s->cfg_ptr->vlen / 8, \ |
146 | + vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) && | 72 | - s->cfg_ptr->vlen / 8, data, \ |
147 | + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)); | 73 | - fns[s->sew]); \ |
148 | +} | 74 | - mark_vs_dirty(s); \ |
149 | + | 75 | - gen_set_label(over); \ |
150 | +GEN_OPIVV_TRANS(vmadc_vvm, opivv_vmadc_check) | 76 | - return true; \ |
151 | +GEN_OPIVV_TRANS(vmsbc_vvm, opivv_vmadc_check) | 77 | - } \ |
152 | + | 78 | - return false; \ |
153 | +static bool opivx_vadc_check(DisasContext *s, arg_rmrr *a) | 79 | +#define GEN_OPIVV_TRANS(NAME, CHECK) \ |
154 | +{ | ||
155 | + return (vext_check_isa_ill(s) && | ||
156 | + vext_check_reg(s, a->rd, false) && | ||
157 | + vext_check_reg(s, a->rs2, false) && | ||
158 | + ((a->rd != 0) || (s->lmul == 0))); | ||
159 | +} | ||
160 | + | ||
161 | +/* OPIVX without GVEC IR */ | ||
162 | +#define GEN_OPIVX_TRANS(NAME, CHECK) \ | ||
163 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | 80 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
164 | +{ \ | 81 | +{ \ |
165 | + if (CHECK(s, a)) { \ | 82 | + if (CHECK(s, a)) { \ |
166 | + static gen_helper_opivx * const fns[4] = { \ | 83 | + static gen_helper_gvec_4_ptr * const fns[4] = { \ |
167 | + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ | 84 | + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ |
168 | + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ | 85 | + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ |
169 | + }; \ | 86 | + }; \ |
170 | + \ | 87 | + return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ |
171 | + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ | ||
172 | + } \ | 88 | + } \ |
173 | + return false; \ | 89 | + return false; \ |
174 | +} | ||
175 | + | ||
176 | +GEN_OPIVX_TRANS(vadc_vxm, opivx_vadc_check) | ||
177 | +GEN_OPIVX_TRANS(vsbc_vxm, opivx_vadc_check) | ||
178 | + | ||
179 | +static bool opivx_vmadc_check(DisasContext *s, arg_rmrr *a) | ||
180 | +{ | ||
181 | + return (vext_check_isa_ill(s) && | ||
182 | + vext_check_reg(s, a->rs2, false) && | ||
183 | + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)); | ||
184 | +} | ||
185 | + | ||
186 | +GEN_OPIVX_TRANS(vmadc_vxm, opivx_vmadc_check) | ||
187 | +GEN_OPIVX_TRANS(vmsbc_vxm, opivx_vmadc_check) | ||
188 | + | ||
189 | +/* OPIVI without GVEC IR */ | ||
190 | +#define GEN_OPIVI_TRANS(NAME, ZX, OPIVX, CHECK) \ | ||
191 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
192 | +{ \ | ||
193 | + if (CHECK(s, a)) { \ | ||
194 | + static gen_helper_opivx * const fns[4] = { \ | ||
195 | + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ | ||
196 | + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ | ||
197 | + }; \ | ||
198 | + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ | ||
199 | + fns[s->sew], s, ZX); \ | ||
200 | + } \ | ||
201 | + return false; \ | ||
202 | +} | ||
203 | + | ||
204 | +GEN_OPIVI_TRANS(vadc_vim, 0, vadc_vxm, opivx_vadc_check) | ||
205 | +GEN_OPIVI_TRANS(vmadc_vim, 0, vmadc_vxm, opivx_vmadc_check) | ||
206 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
207 | index XXXXXXX..XXXXXXX 100644 | ||
208 | --- a/target/riscv/vector_helper.c | ||
209 | +++ b/target/riscv/vector_helper.c | ||
210 | @@ -XXX,XX +XXX,XX @@ static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) | ||
211 | vext_clear(cur, cnt, tot); | ||
212 | } | 90 | } |
213 | 91 | ||
214 | +static inline void vext_set_elem_mask(void *v0, int mlen, int index, | 92 | /* |
215 | + uint8_t value) | ||
216 | +{ | ||
217 | + int idx = (index * mlen) / 64; | ||
218 | + int pos = (index * mlen) % 64; | ||
219 | + uint64_t old = ((uint64_t *)v0)[idx]; | ||
220 | + ((uint64_t *)v0)[idx] = deposit64(old, pos, mlen, value); | ||
221 | +} | ||
222 | |||
223 | static inline int vext_elem_mask(void *v0, int mlen, int index) | ||
224 | { | ||
225 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq) | ||
226 | GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh) | ||
227 | GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl) | ||
228 | GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq) | ||
229 | + | ||
230 | +/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ | ||
231 | +#define DO_VADC(N, M, C) (N + M + C) | ||
232 | +#define DO_VSBC(N, M, C) (N - M - C) | ||
233 | + | ||
234 | +#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \ | ||
235 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
236 | + CPURISCVState *env, uint32_t desc) \ | ||
237 | +{ \ | ||
238 | + uint32_t mlen = vext_mlen(desc); \ | ||
239 | + uint32_t vl = env->vl; \ | ||
240 | + uint32_t esz = sizeof(ETYPE); \ | ||
241 | + uint32_t vlmax = vext_maxsz(desc) / esz; \ | ||
242 | + uint32_t i; \ | ||
243 | + \ | ||
244 | + for (i = 0; i < vl; i++) { \ | ||
245 | + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ | ||
246 | + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ | ||
247 | + uint8_t carry = vext_elem_mask(v0, mlen, i); \ | ||
248 | + \ | ||
249 | + *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ | ||
250 | + } \ | ||
251 | + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ | ||
252 | +} | ||
253 | + | ||
254 | +GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC, clearb) | ||
255 | +GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC, clearh) | ||
256 | +GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC, clearl) | ||
257 | +GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC, clearq) | ||
258 | + | ||
259 | +GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC, clearb) | ||
260 | +GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC, clearh) | ||
261 | +GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC, clearl) | ||
262 | +GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC, clearq) | ||
263 | + | ||
264 | +#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \ | ||
265 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
266 | + CPURISCVState *env, uint32_t desc) \ | ||
267 | +{ \ | ||
268 | + uint32_t mlen = vext_mlen(desc); \ | ||
269 | + uint32_t vl = env->vl; \ | ||
270 | + uint32_t esz = sizeof(ETYPE); \ | ||
271 | + uint32_t vlmax = vext_maxsz(desc) / esz; \ | ||
272 | + uint32_t i; \ | ||
273 | + \ | ||
274 | + for (i = 0; i < vl; i++) { \ | ||
275 | + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ | ||
276 | + uint8_t carry = vext_elem_mask(v0, mlen, i); \ | ||
277 | + \ | ||
278 | + *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ | ||
279 | + } \ | ||
280 | + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ | ||
281 | +} | ||
282 | + | ||
283 | +GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC, clearb) | ||
284 | +GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC, clearh) | ||
285 | +GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC, clearl) | ||
286 | +GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC, clearq) | ||
287 | + | ||
288 | +GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC, clearb) | ||
289 | +GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC, clearh) | ||
290 | +GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC, clearl) | ||
291 | +GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC, clearq) | ||
292 | + | ||
293 | +#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ | ||
294 | + (__typeof(N))(N + M) < N) | ||
295 | +#define DO_MSBC(N, M, C) (C ? N <= M : N < M) | ||
296 | + | ||
297 | +#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ | ||
298 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
299 | + CPURISCVState *env, uint32_t desc) \ | ||
300 | +{ \ | ||
301 | + uint32_t mlen = vext_mlen(desc); \ | ||
302 | + uint32_t vl = env->vl; \ | ||
303 | + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ | ||
304 | + uint32_t i; \ | ||
305 | + \ | ||
306 | + for (i = 0; i < vl; i++) { \ | ||
307 | + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ | ||
308 | + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ | ||
309 | + uint8_t carry = vext_elem_mask(v0, mlen, i); \ | ||
310 | + \ | ||
311 | + vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1, carry));\ | ||
312 | + } \ | ||
313 | + for (; i < vlmax; i++) { \ | ||
314 | + vext_set_elem_mask(vd, mlen, i, 0); \ | ||
315 | + } \ | ||
316 | +} | ||
317 | + | ||
318 | +GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) | ||
319 | +GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) | ||
320 | +GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) | ||
321 | +GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) | ||
322 | + | ||
323 | +GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) | ||
324 | +GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) | ||
325 | +GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) | ||
326 | +GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) | ||
327 | + | ||
328 | +#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ | ||
329 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
330 | + void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
331 | +{ \ | ||
332 | + uint32_t mlen = vext_mlen(desc); \ | ||
333 | + uint32_t vl = env->vl; \ | ||
334 | + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ | ||
335 | + uint32_t i; \ | ||
336 | + \ | ||
337 | + for (i = 0; i < vl; i++) { \ | ||
338 | + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ | ||
339 | + uint8_t carry = vext_elem_mask(v0, mlen, i); \ | ||
340 | + \ | ||
341 | + vext_set_elem_mask(vd, mlen, i, \ | ||
342 | + DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ | ||
343 | + } \ | ||
344 | + for (; i < vlmax; i++) { \ | ||
345 | + vext_set_elem_mask(vd, mlen, i, 0); \ | ||
346 | + } \ | ||
347 | +} | ||
348 | + | ||
349 | +GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) | ||
350 | +GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) | ||
351 | +GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) | ||
352 | +GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) | ||
353 | + | ||
354 | +GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) | ||
355 | +GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) | ||
356 | +GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) | ||
357 | +GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) | ||
358 | -- | 93 | -- |
359 | 2.27.0 | 94 | 2.41.0 |
360 | |||
361 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Nazar Kazakov <nazar.kazakov@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Remove the redundant "vl == 0" check which is already included within the vstart >= vl check, when vl == 0. |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | |
5 | Message-id: 20200623215920.2594-50-zhiwei_liu@c-sky.com | 5 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> |
6 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
7 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
8 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
9 | Message-ID: <20230711165917.2629866-4-max.chou@sifive.com> | ||
6 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
7 | --- | 11 | --- |
8 | target/riscv/helper.h | 9 ++++++ | 12 | target/riscv/insn_trans/trans_rvv.c.inc | 31 +------------------------ |
9 | target/riscv/insn32.decode | 8 +++++ | 13 | 1 file changed, 1 insertion(+), 30 deletions(-) |
10 | target/riscv/insn_trans/trans_rvv.inc.c | 35 ++++++++++++++++++++++ | ||
11 | target/riscv/vector_helper.c | 40 +++++++++++++++++++++++++ | ||
12 | 4 files changed, 92 insertions(+) | ||
13 | 14 | ||
14 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 15 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc |
15 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/riscv/helper.h | 17 | --- a/target/riscv/insn_trans/trans_rvv.c.inc |
17 | +++ b/target/riscv/helper.h | 18 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc |
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | 19 | @@ -XXX,XX +XXX,XX @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data, |
19 | 20 | TCGv_i32 desc; | |
20 | DEF_HELPER_6(vfwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | 21 | |
21 | DEF_HELPER_6(vfwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | 22 | TCGLabel *over = gen_new_label(); |
22 | + | 23 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
23 | +DEF_HELPER_6(vmand_mm, void, ptr, ptr, ptr, ptr, env, i32) | 24 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
24 | +DEF_HELPER_6(vmnand_mm, void, ptr, ptr, ptr, ptr, env, i32) | 25 | |
25 | +DEF_HELPER_6(vmandnot_mm, void, ptr, ptr, ptr, ptr, env, i32) | 26 | dest = tcg_temp_new_ptr(); |
26 | +DEF_HELPER_6(vmxor_mm, void, ptr, ptr, ptr, ptr, env, i32) | 27 | @@ -XXX,XX +XXX,XX @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, |
27 | +DEF_HELPER_6(vmor_mm, void, ptr, ptr, ptr, ptr, env, i32) | 28 | TCGv_i32 desc; |
28 | +DEF_HELPER_6(vmnor_mm, void, ptr, ptr, ptr, ptr, env, i32) | 29 | |
29 | +DEF_HELPER_6(vmornot_mm, void, ptr, ptr, ptr, ptr, env, i32) | 30 | TCGLabel *over = gen_new_label(); |
30 | +DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32) | 31 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
31 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 32 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
32 | index XXXXXXX..XXXXXXX 100644 | 33 | |
33 | --- a/target/riscv/insn32.decode | 34 | dest = tcg_temp_new_ptr(); |
34 | +++ b/target/riscv/insn32.decode | 35 | @@ -XXX,XX +XXX,XX @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, |
35 | @@ -XXX,XX +XXX,XX @@ vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm | 36 | TCGv_i32 desc; |
36 | vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm | 37 | |
37 | # Vector widening ordered and unordered float reduction sum | 38 | TCGLabel *over = gen_new_label(); |
38 | vfwredsum_vs 1100-1 . ..... ..... 001 ..... 1010111 @r_vm | 39 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
39 | +vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r | 40 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
40 | +vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r | 41 | |
41 | +vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r | 42 | dest = tcg_temp_new_ptr(); |
42 | +vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r | 43 | @@ -XXX,XX +XXX,XX @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, |
43 | +vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r | 44 | TCGv_i32 desc; |
44 | +vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r | 45 | |
45 | +vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r | 46 | TCGLabel *over = gen_new_label(); |
46 | +vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r | 47 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
47 | 48 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | |
48 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | 49 | |
49 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | 50 | dest = tcg_temp_new_ptr(); |
50 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | 51 | @@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, |
51 | index XXXXXXX..XXXXXXX 100644 | 52 | return false; |
52 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 53 | } |
53 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 54 | |
54 | @@ -XXX,XX +XXX,XX @@ GEN_OPFVV_TRANS(vfredmin_vs, reduction_check) | 55 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
55 | 56 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | |
56 | /* Vector Widening Floating-Point Reduction Instructions */ | 57 | |
57 | GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check) | 58 | if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { |
58 | + | 59 | @@ -XXX,XX +XXX,XX @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, |
59 | +/* | 60 | uint32_t data = 0; |
60 | + *** Vector Mask Operations | 61 | |
61 | + */ | 62 | TCGLabel *over = gen_new_label(); |
62 | + | 63 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
63 | +/* Vector Mask-Register Logical Instructions */ | 64 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
64 | +#define GEN_MM_TRANS(NAME) \ | 65 | |
65 | +static bool trans_##NAME(DisasContext *s, arg_r *a) \ | 66 | dest = tcg_temp_new_ptr(); |
66 | +{ \ | 67 | @@ -XXX,XX +XXX,XX @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, |
67 | + if (vext_check_isa_ill(s)) { \ | 68 | uint32_t data = 0; |
68 | + uint32_t data = 0; \ | 69 | |
69 | + gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ | 70 | TCGLabel *over = gen_new_label(); |
70 | + TCGLabel *over = gen_new_label(); \ | 71 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
71 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | 72 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
72 | + \ | 73 | |
73 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ | 74 | dest = tcg_temp_new_ptr(); |
74 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | 75 | @@ -XXX,XX +XXX,XX @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, |
75 | + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | 76 | if (checkfn(s, a)) { |
76 | + vreg_ofs(s, a->rs1), \ | 77 | uint32_t data = 0; |
77 | + vreg_ofs(s, a->rs2), cpu_env, 0, \ | 78 | TCGLabel *over = gen_new_label(); |
78 | + s->vlen / 8, data, fn); \ | 79 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
79 | + gen_set_label(over); \ | 80 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
80 | + return true; \ | 81 | |
81 | + } \ | 82 | data = FIELD_DP32(data, VDATA, VM, a->vm); |
82 | + return false; \ | 83 | @@ -XXX,XX +XXX,XX @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, |
83 | +} | 84 | if (opiwv_widen_check(s, a)) { |
84 | + | 85 | uint32_t data = 0; |
85 | +GEN_MM_TRANS(vmand_mm) | 86 | TCGLabel *over = gen_new_label(); |
86 | +GEN_MM_TRANS(vmnand_mm) | 87 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
87 | +GEN_MM_TRANS(vmandnot_mm) | 88 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
88 | +GEN_MM_TRANS(vmxor_mm) | 89 | |
89 | +GEN_MM_TRANS(vmor_mm) | 90 | data = FIELD_DP32(data, VDATA, VM, a->vm); |
90 | +GEN_MM_TRANS(vmnor_mm) | 91 | @@ -XXX,XX +XXX,XX @@ static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm, |
91 | +GEN_MM_TRANS(vmornot_mm) | 92 | { |
92 | +GEN_MM_TRANS(vmxnor_mm) | 93 | uint32_t data = 0; |
93 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 94 | TCGLabel *over = gen_new_label(); |
94 | index XXXXXXX..XXXXXXX 100644 | 95 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
95 | --- a/target/riscv/vector_helper.c | 96 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
96 | +++ b/target/riscv/vector_helper.c | 97 | |
97 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, | 98 | data = FIELD_DP32(data, VDATA, VM, vm); |
98 | *((uint64_t *)vd) = s1; | 99 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
99 | clearq(vd, 1, sizeof(uint64_t), tot); | 100 | gen_helper_##NAME##_w, \ |
100 | } | 101 | }; \ |
101 | + | 102 | TCGLabel *over = gen_new_label(); \ |
102 | +/* | 103 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ |
103 | + *** Vector Mask Operations | 104 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ |
104 | + */ | 105 | \ |
105 | +/* Vector Mask-Register Logical Instructions */ | 106 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ |
106 | +#define GEN_VEXT_MASK_VV(NAME, OP) \ | 107 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) |
107 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | 108 | gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, |
108 | + void *vs2, CPURISCVState *env, \ | 109 | }; |
109 | + uint32_t desc) \ | 110 | TCGLabel *over = gen_new_label(); |
110 | +{ \ | 111 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
111 | + uint32_t mlen = vext_mlen(desc); \ | 112 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
112 | + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ | 113 | |
113 | + uint32_t vl = env->vl; \ | 114 | tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), |
114 | + uint32_t i; \ | 115 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) |
115 | + int a, b; \ | 116 | vext_check_ss(s, a->rd, 0, 1)) { |
116 | + \ | 117 | TCGv s1; |
117 | + for (i = 0; i < vl; i++) { \ | 118 | TCGLabel *over = gen_new_label(); |
118 | + a = vext_elem_mask(vs1, mlen, i); \ | 119 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
119 | + b = vext_elem_mask(vs2, mlen, i); \ | 120 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
120 | + vext_set_elem_mask(vd, mlen, i, OP(b, a)); \ | 121 | |
121 | + } \ | 122 | s1 = get_gpr(s, a->rs1, EXT_SIGN); |
122 | + for (; i < vlmax; i++) { \ | 123 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) |
123 | + vext_set_elem_mask(vd, mlen, i, 0); \ | 124 | gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, |
124 | + } \ | 125 | }; |
125 | +} | 126 | TCGLabel *over = gen_new_label(); |
126 | + | 127 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); |
127 | +#define DO_NAND(N, M) (!(N & M)) | 128 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
128 | +#define DO_ANDNOT(N, M) (N & !M) | 129 | |
129 | +#define DO_NOR(N, M) (!(N | M)) | 130 | s1 = tcg_constant_i64(simm); |
130 | +#define DO_ORNOT(N, M) (N | !M) | 131 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
131 | +#define DO_XNOR(N, M) (!(N ^ M)) | 132 | }; \ |
132 | + | 133 | TCGLabel *over = gen_new_label(); \ |
133 | +GEN_VEXT_MASK_VV(vmand_mm, DO_AND) | 134 | gen_set_rm(s, RISCV_FRM_DYN); \ |
134 | +GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) | 135 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ |
135 | +GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) | 136 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ |
136 | +GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) | 137 | \ |
137 | +GEN_VEXT_MASK_VV(vmor_mm, DO_OR) | 138 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ |
138 | +GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) | 139 | @@ -XXX,XX +XXX,XX @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, |
139 | +GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) | 140 | TCGv_i64 t1; |
140 | +GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) | 141 | |
142 | TCGLabel *over = gen_new_label(); | ||
143 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
144 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
145 | |||
146 | dest = tcg_temp_new_ptr(); | ||
147 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
148 | }; \ | ||
149 | TCGLabel *over = gen_new_label(); \ | ||
150 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
151 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
152 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\ | ||
153 | \ | ||
154 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
155 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
156 | }; \ | ||
157 | TCGLabel *over = gen_new_label(); \ | ||
158 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
159 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
160 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
161 | \ | ||
162 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
163 | @@ -XXX,XX +XXX,XX @@ static bool do_opfv(DisasContext *s, arg_rmr *a, | ||
164 | uint32_t data = 0; | ||
165 | TCGLabel *over = gen_new_label(); | ||
166 | gen_set_rm_chkfrm(s, rm); | ||
167 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
168 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
169 | |||
170 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
171 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) | ||
172 | gen_helper_vmv_v_x_d, | ||
173 | }; | ||
174 | TCGLabel *over = gen_new_label(); | ||
175 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
176 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
177 | |||
178 | t1 = tcg_temp_new_i64(); | ||
179 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
180 | }; \ | ||
181 | TCGLabel *over = gen_new_label(); \ | ||
182 | gen_set_rm_chkfrm(s, FRM); \ | ||
183 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
184 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
185 | \ | ||
186 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
187 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
188 | }; \ | ||
189 | TCGLabel *over = gen_new_label(); \ | ||
190 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
191 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
192 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
193 | \ | ||
194 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
195 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
196 | }; \ | ||
197 | TCGLabel *over = gen_new_label(); \ | ||
198 | gen_set_rm_chkfrm(s, FRM); \ | ||
199 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
200 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
201 | \ | ||
202 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
203 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
204 | }; \ | ||
205 | TCGLabel *over = gen_new_label(); \ | ||
206 | gen_set_rm_chkfrm(s, FRM); \ | ||
207 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
208 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
209 | \ | ||
210 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
211 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \ | ||
212 | uint32_t data = 0; \ | ||
213 | gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ | ||
214 | TCGLabel *over = gen_new_label(); \ | ||
215 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
216 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
217 | \ | ||
218 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
219 | @@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) | ||
220 | require_vm(a->vm, a->rd)) { | ||
221 | uint32_t data = 0; | ||
222 | TCGLabel *over = gen_new_label(); | ||
223 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
224 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
225 | |||
226 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
227 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) | ||
228 | TCGv s1; | ||
229 | TCGLabel *over = gen_new_label(); | ||
230 | |||
231 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
232 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
233 | |||
234 | t1 = tcg_temp_new_i64(); | ||
235 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) | ||
236 | TCGv_i64 t1; | ||
237 | TCGLabel *over = gen_new_label(); | ||
238 | |||
239 | - /* if vl == 0 or vstart >= vl, skip vector register write back */ | ||
240 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
241 | + /* if vstart >= vl, skip vector register write back */ | ||
242 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
243 | |||
244 | /* NaN-box f[rs1] */ | ||
245 | @@ -XXX,XX +XXX,XX @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) | ||
246 | uint32_t data = 0; | ||
247 | gen_helper_gvec_3_ptr *fn; | ||
248 | TCGLabel *over = gen_new_label(); | ||
249 | - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
250 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
251 | |||
252 | static gen_helper_gvec_3_ptr * const fns[6][4] = { | ||
141 | -- | 253 | -- |
142 | 2.27.0 | 254 | 2.41.0 |
143 | |||
144 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Lawrence Hunter <lawrence.hunter@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | vsetvl and vsetvli are two configure instructions for vl, vtype. TB flags | 3 | This commit adds support for the Zvbc vector-crypto extension, which |
4 | should update after configure instructions. The (ill, lmul, sew ) of vtype | 4 | consists of the following instructions: |
5 | and the bit of (VSTART == 0 && VL == VLMAX) will be placed within tb_flags. | 5 | |
6 | 6 | * vclmulh.[vx,vv] | |
7 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 7 | * vclmul.[vx,vv] |
8 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 8 | |
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Translation functions are defined in |
10 | Message-id: 20200623215920.2594-5-zhiwei_liu@c-sky.com | 10 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in |
11 | `target/riscv/vcrypto_helper.c`. | ||
12 | |||
13 | Co-authored-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
14 | Co-authored-by: Max Chou <max.chou@sifive.com> | ||
15 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
16 | Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
17 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
18 | [max.chou@sifive.com: Exposed x-zvbc property] | ||
19 | Message-ID: <20230711165917.2629866-5-max.chou@sifive.com> | ||
11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 20 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
12 | --- | 21 | --- |
13 | target/riscv/cpu.h | 63 +++++++++++++++++--- | 22 | target/riscv/cpu_cfg.h | 1 + |
14 | target/riscv/helper.h | 3 + | 23 | target/riscv/helper.h | 6 +++ |
15 | target/riscv/insn32.decode | 5 ++ | 24 | target/riscv/insn32.decode | 6 +++ |
16 | target/riscv/insn_trans/trans_rvv.inc.c | 79 +++++++++++++++++++++++++ | 25 | target/riscv/cpu.c | 9 ++++ |
17 | target/riscv/translate.c | 17 +++++- | 26 | target/riscv/translate.c | 1 + |
18 | target/riscv/vector_helper.c | 53 +++++++++++++++++ | 27 | target/riscv/vcrypto_helper.c | 59 ++++++++++++++++++++++ |
19 | target/riscv/Makefile.objs | 2 +- | 28 | target/riscv/insn_trans/trans_rvvk.c.inc | 62 ++++++++++++++++++++++++ |
20 | 7 files changed, 210 insertions(+), 12 deletions(-) | 29 | target/riscv/meson.build | 3 +- |
21 | create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c | 30 | 8 files changed, 146 insertions(+), 1 deletion(-) |
22 | create mode 100644 target/riscv/vector_helper.c | 31 | create mode 100644 target/riscv/vcrypto_helper.c |
23 | 32 | create mode 100644 target/riscv/insn_trans/trans_rvvk.c.inc | |
24 | diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h | 33 | |
25 | index XXXXXXX..XXXXXXX 100644 | 34 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h |
26 | --- a/target/riscv/cpu.h | 35 | index XXXXXXX..XXXXXXX 100644 |
27 | +++ b/target/riscv/cpu.h | 36 | --- a/target/riscv/cpu_cfg.h |
28 | @@ -XXX,XX +XXX,XX @@ | 37 | +++ b/target/riscv/cpu_cfg.h |
29 | #define RISCV_CPU_H | 38 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { |
30 | 39 | bool ext_zve32f; | |
31 | #include "hw/core/cpu.h" | 40 | bool ext_zve64f; |
32 | +#include "hw/registerfields.h" | 41 | bool ext_zve64d; |
33 | #include "exec/cpu-defs.h" | 42 | + bool ext_zvbc; |
34 | #include "fpu/softfloat-types.h" | 43 | bool ext_zmmul; |
35 | 44 | bool ext_zvfbfmin; | |
36 | @@ -XXX,XX +XXX,XX @@ typedef struct CPURISCVState CPURISCVState; | 45 | bool ext_zvfbfwma; |
37 | |||
38 | #define RV_VLEN_MAX 512 | ||
39 | |||
40 | +FIELD(VTYPE, VLMUL, 0, 2) | ||
41 | +FIELD(VTYPE, VSEW, 2, 3) | ||
42 | +FIELD(VTYPE, VEDIV, 5, 2) | ||
43 | +FIELD(VTYPE, RESERVED, 7, sizeof(target_ulong) * 8 - 9) | ||
44 | +FIELD(VTYPE, VILL, sizeof(target_ulong) * 8 - 2, 1) | ||
45 | + | ||
46 | struct CPURISCVState { | ||
47 | target_ulong gpr[32]; | ||
48 | uint64_t fpr[32]; /* assume both F and D extensions */ | ||
49 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong); | ||
50 | #define TB_FLAGS_MMU_MASK 3 | ||
51 | #define TB_FLAGS_MSTATUS_FS MSTATUS_FS | ||
52 | |||
53 | +typedef CPURISCVState CPUArchState; | ||
54 | +typedef RISCVCPU ArchCPU; | ||
55 | +#include "exec/cpu-all.h" | ||
56 | + | ||
57 | +FIELD(TB_FLAGS, VL_EQ_VLMAX, 2, 1) | ||
58 | +FIELD(TB_FLAGS, LMUL, 3, 2) | ||
59 | +FIELD(TB_FLAGS, SEW, 5, 3) | ||
60 | +FIELD(TB_FLAGS, VILL, 8, 1) | ||
61 | + | ||
62 | +/* | ||
63 | + * A simplification for VLMAX | ||
64 | + * = (1 << LMUL) * VLEN / (8 * (1 << SEW)) | ||
65 | + * = (VLEN << LMUL) / (8 << SEW) | ||
66 | + * = (VLEN << LMUL) >> (SEW + 3) | ||
67 | + * = VLEN >> (SEW + 3 - LMUL) | ||
68 | + */ | ||
69 | +static inline uint32_t vext_get_vlmax(RISCVCPU *cpu, target_ulong vtype) | ||
70 | +{ | ||
71 | + uint8_t sew, lmul; | ||
72 | + | ||
73 | + sew = FIELD_EX64(vtype, VTYPE, VSEW); | ||
74 | + lmul = FIELD_EX64(vtype, VTYPE, VLMUL); | ||
75 | + return cpu->cfg.vlen >> (sew + 3 - lmul); | ||
76 | +} | ||
77 | + | ||
78 | static inline void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc, | ||
79 | - target_ulong *cs_base, uint32_t *flags) | ||
80 | + target_ulong *cs_base, uint32_t *pflags) | ||
81 | { | ||
82 | + uint32_t flags = 0; | ||
83 | + | ||
84 | *pc = env->pc; | ||
85 | *cs_base = 0; | ||
86 | + | ||
87 | + if (riscv_has_ext(env, RVV)) { | ||
88 | + uint32_t vlmax = vext_get_vlmax(env_archcpu(env), env->vtype); | ||
89 | + bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl); | ||
90 | + flags = FIELD_DP32(flags, TB_FLAGS, VILL, | ||
91 | + FIELD_EX64(env->vtype, VTYPE, VILL)); | ||
92 | + flags = FIELD_DP32(flags, TB_FLAGS, SEW, | ||
93 | + FIELD_EX64(env->vtype, VTYPE, VSEW)); | ||
94 | + flags = FIELD_DP32(flags, TB_FLAGS, LMUL, | ||
95 | + FIELD_EX64(env->vtype, VTYPE, VLMUL)); | ||
96 | + flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax); | ||
97 | + } else { | ||
98 | + flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1); | ||
99 | + } | ||
100 | + | ||
101 | #ifdef CONFIG_USER_ONLY | ||
102 | - *flags = TB_FLAGS_MSTATUS_FS; | ||
103 | + flags |= TB_FLAGS_MSTATUS_FS; | ||
104 | #else | ||
105 | - *flags = cpu_mmu_index(env, 0); | ||
106 | + flags |= cpu_mmu_index(env, 0); | ||
107 | if (riscv_cpu_fp_enabled(env)) { | ||
108 | - *flags |= env->mstatus & MSTATUS_FS; | ||
109 | + flags |= env->mstatus & MSTATUS_FS; | ||
110 | } | ||
111 | #endif | ||
112 | + *pflags = flags; | ||
113 | } | ||
114 | |||
115 | int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value, | ||
116 | @@ -XXX,XX +XXX,XX @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops); | ||
117 | |||
118 | void riscv_cpu_register_gdb_regs_for_features(CPUState *cs); | ||
119 | |||
120 | -typedef CPURISCVState CPUArchState; | ||
121 | -typedef RISCVCPU ArchCPU; | ||
122 | - | ||
123 | -#include "exec/cpu-all.h" | ||
124 | - | ||
125 | #endif /* RISCV_CPU_H */ | ||
126 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 46 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h |
127 | index XXXXXXX..XXXXXXX 100644 | 47 | index XXXXXXX..XXXXXXX 100644 |
128 | --- a/target/riscv/helper.h | 48 | --- a/target/riscv/helper.h |
129 | +++ b/target/riscv/helper.h | 49 | +++ b/target/riscv/helper.h |
130 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_1(tlb_flush, void, env) | 50 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfwcvtbf16_f_f_v, void, ptr, ptr, ptr, env, i32) |
131 | #ifndef CONFIG_USER_ONLY | 51 | |
132 | DEF_HELPER_1(hyp_tlb_flush, void, env) | 52 | DEF_HELPER_6(vfwmaccbf16_vv, void, ptr, ptr, ptr, ptr, env, i32) |
133 | #endif | 53 | DEF_HELPER_6(vfwmaccbf16_vf, void, ptr, ptr, i64, ptr, env, i32) |
134 | + | 54 | + |
135 | +/* Vector functions */ | 55 | +/* Vector crypto functions */ |
136 | +DEF_HELPER_3(vsetvl, tl, env, tl, tl) | 56 | +DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32) |
57 | +DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32) | ||
58 | +DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32) | ||
59 | +DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32) | ||
137 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 60 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode |
138 | index XXXXXXX..XXXXXXX 100644 | 61 | index XXXXXXX..XXXXXXX 100644 |
139 | --- a/target/riscv/insn32.decode | 62 | --- a/target/riscv/insn32.decode |
140 | +++ b/target/riscv/insn32.decode | 63 | +++ b/target/riscv/insn32.decode |
141 | @@ -XXX,XX +XXX,XX @@ | 64 | @@ -XXX,XX +XXX,XX @@ vfwcvtbf16_f_f_v 010010 . ..... 01101 001 ..... 1010111 @r2_vm |
142 | @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd | 65 | # *** Zvfbfwma Standard Extension *** |
143 | @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd | 66 | vfwmaccbf16_vv 111011 . ..... ..... 001 ..... 1010111 @r_vm |
144 | @r2 ....... ..... ..... ... ..... ....... %rs1 %rd | 67 | vfwmaccbf16_vf 111011 . ..... ..... 101 ..... 1010111 @r_vm |
145 | +@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd | 68 | + |
146 | 69 | +# *** Zvbc vector crypto extension *** | |
147 | @hfence_gvma ....... ..... ..... ... ..... ....... %rs2 %rs1 | 70 | +vclmul_vv 001100 . ..... ..... 010 ..... 1010111 @r_vm |
148 | @hfence_vvma ....... ..... ..... ... ..... ....... %rs2 %rs1 | 71 | +vclmul_vx 001100 . ..... ..... 110 ..... 1010111 @r_vm |
149 | @@ -XXX,XX +XXX,XX @@ fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm | 72 | +vclmulh_vv 001101 . ..... ..... 010 ..... 1010111 @r_vm |
150 | # *** RV32H Base Instruction Set *** | 73 | +vclmulh_vx 001101 . ..... ..... 110 ..... 1010111 @r_vm |
151 | hfence_gvma 0110001 ..... ..... 000 00000 1110011 @hfence_gvma | 74 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
152 | hfence_vvma 0010001 ..... ..... 000 00000 1110011 @hfence_vvma | 75 | index XXXXXXX..XXXXXXX 100644 |
153 | + | 76 | --- a/target/riscv/cpu.c |
154 | +# *** RV32V Extension *** | 77 | +++ b/target/riscv/cpu.c |
155 | +vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | 78 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { |
156 | +vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | 79 | ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed), |
157 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | 80 | ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh), |
81 | ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt), | ||
82 | + ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc), | ||
83 | ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f), | ||
84 | ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f), | ||
85 | ISA_EXT_DATA_ENTRY(zve64d, PRIV_VERSION_1_10_0, ext_zve64d), | ||
86 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
87 | return; | ||
88 | } | ||
89 | |||
90 | + if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) { | ||
91 | + error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions"); | ||
92 | + return; | ||
93 | + } | ||
94 | + | ||
95 | if (cpu->cfg.ext_zk) { | ||
96 | cpu->cfg.ext_zkn = true; | ||
97 | cpu->cfg.ext_zkr = true; | ||
98 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
99 | DEFINE_PROP_BOOL("x-zvfbfmin", RISCVCPU, cfg.ext_zvfbfmin, false), | ||
100 | DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false), | ||
101 | |||
102 | + /* Vector cryptography extensions */ | ||
103 | + DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), | ||
104 | + | ||
105 | DEFINE_PROP_END_OF_LIST(), | ||
106 | }; | ||
107 | |||
108 | diff --git a/target/riscv/translate.c b/target/riscv/translate.c | ||
109 | index XXXXXXX..XXXXXXX 100644 | ||
110 | --- a/target/riscv/translate.c | ||
111 | +++ b/target/riscv/translate.c | ||
112 | @@ -XXX,XX +XXX,XX @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) | ||
113 | #include "insn_trans/trans_rvzfa.c.inc" | ||
114 | #include "insn_trans/trans_rvzfh.c.inc" | ||
115 | #include "insn_trans/trans_rvk.c.inc" | ||
116 | +#include "insn_trans/trans_rvvk.c.inc" | ||
117 | #include "insn_trans/trans_privileged.c.inc" | ||
118 | #include "insn_trans/trans_svinval.c.inc" | ||
119 | #include "insn_trans/trans_rvbf16.c.inc" | ||
120 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c | ||
158 | new file mode 100644 | 121 | new file mode 100644 |
159 | index XXXXXXX..XXXXXXX | 122 | index XXXXXXX..XXXXXXX |
160 | --- /dev/null | 123 | --- /dev/null |
161 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 124 | +++ b/target/riscv/vcrypto_helper.c |
162 | @@ -XXX,XX +XXX,XX @@ | 125 | @@ -XXX,XX +XXX,XX @@ |
163 | +/* | 126 | +/* |
164 | + * RISC-V translation routines for the RVV Standard Extension. | 127 | + * RISC-V Vector Crypto Extension Helpers for QEMU. |
165 | + * | 128 | + * |
166 | + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. | 129 | + * Copyright (C) 2023 SiFive, Inc. |
130 | + * Written by Codethink Ltd and SiFive. | ||
167 | + * | 131 | + * |
168 | + * This program is free software; you can redistribute it and/or modify it | 132 | + * This program is free software; you can redistribute it and/or modify it |
169 | + * under the terms and conditions of the GNU General Public License, | 133 | + * under the terms and conditions of the GNU General Public License, |
170 | + * version 2 or later, as published by the Free Software Foundation. | 134 | + * version 2 or later, as published by the Free Software Foundation. |
171 | + * | 135 | + * |
... | ... | ||
176 | + * | 140 | + * |
177 | + * You should have received a copy of the GNU General Public License along with | 141 | + * You should have received a copy of the GNU General Public License along with |
178 | + * this program. If not, see <http://www.gnu.org/licenses/>. | 142 | + * this program. If not, see <http://www.gnu.org/licenses/>. |
179 | + */ | 143 | + */ |
180 | + | 144 | + |
181 | +static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a) | 145 | +#include "qemu/osdep.h" |
182 | +{ | 146 | +#include "qemu/host-utils.h" |
183 | + TCGv s1, s2, dst; | 147 | +#include "qemu/bitops.h" |
184 | + | 148 | +#include "cpu.h" |
185 | + if (!has_ext(ctx, RVV)) { | 149 | +#include "exec/memop.h" |
186 | + return false; | 150 | +#include "exec/exec-all.h" |
187 | + } | 151 | +#include "exec/helper-proto.h" |
188 | + | 152 | +#include "internals.h" |
189 | + s2 = tcg_temp_new(); | 153 | +#include "vector_internals.h" |
190 | + dst = tcg_temp_new(); | 154 | + |
191 | + | 155 | +static uint64_t clmul64(uint64_t y, uint64_t x) |
192 | + /* Using x0 as the rs1 register specifier, encodes an infinite AVL */ | 156 | +{ |
193 | + if (a->rs1 == 0) { | 157 | + uint64_t result = 0; |
194 | + /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */ | 158 | + for (int j = 63; j >= 0; j--) { |
195 | + s1 = tcg_const_tl(RV_VLEN_MAX); | 159 | + if ((y >> j) & 1) { |
196 | + } else { | 160 | + result ^= (x << j); |
197 | + s1 = tcg_temp_new(); | 161 | + } |
198 | + gen_get_gpr(s1, a->rs1); | 162 | + } |
199 | + } | 163 | + return result; |
200 | + gen_get_gpr(s2, a->rs2); | 164 | +} |
201 | + gen_helper_vsetvl(dst, cpu_env, s1, s2); | 165 | + |
202 | + gen_set_gpr(a->rd, dst); | 166 | +static uint64_t clmulh64(uint64_t y, uint64_t x) |
203 | + tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn); | 167 | +{ |
204 | + lookup_and_goto_ptr(ctx); | 168 | + uint64_t result = 0; |
205 | + ctx->base.is_jmp = DISAS_NORETURN; | 169 | + for (int j = 63; j >= 1; j--) { |
206 | + | 170 | + if ((y >> j) & 1) { |
207 | + tcg_temp_free(s1); | 171 | + result ^= (x >> (64 - j)); |
208 | + tcg_temp_free(s2); | 172 | + } |
209 | + tcg_temp_free(dst); | 173 | + } |
210 | + return true; | 174 | + return result; |
211 | +} | 175 | +} |
212 | + | 176 | + |
213 | +static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a) | 177 | +RVVCALL(OPIVV2, vclmul_vv, OP_UUU_D, H8, H8, H8, clmul64) |
214 | +{ | 178 | +GEN_VEXT_VV(vclmul_vv, 8) |
215 | + TCGv s1, s2, dst; | 179 | +RVVCALL(OPIVX2, vclmul_vx, OP_UUU_D, H8, H8, clmul64) |
216 | + | 180 | +GEN_VEXT_VX(vclmul_vx, 8) |
217 | + if (!has_ext(ctx, RVV)) { | 181 | +RVVCALL(OPIVV2, vclmulh_vv, OP_UUU_D, H8, H8, H8, clmulh64) |
218 | + return false; | 182 | +GEN_VEXT_VV(vclmulh_vv, 8) |
219 | + } | 183 | +RVVCALL(OPIVX2, vclmulh_vx, OP_UUU_D, H8, H8, clmulh64) |
220 | + | 184 | +GEN_VEXT_VX(vclmulh_vx, 8) |
221 | + s2 = tcg_const_tl(a->zimm); | 185 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc |
222 | + dst = tcg_temp_new(); | ||
223 | + | ||
224 | + /* Using x0 as the rs1 register specifier, encodes an infinite AVL */ | ||
225 | + if (a->rs1 == 0) { | ||
226 | + /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */ | ||
227 | + s1 = tcg_const_tl(RV_VLEN_MAX); | ||
228 | + } else { | ||
229 | + s1 = tcg_temp_new(); | ||
230 | + gen_get_gpr(s1, a->rs1); | ||
231 | + } | ||
232 | + gen_helper_vsetvl(dst, cpu_env, s1, s2); | ||
233 | + gen_set_gpr(a->rd, dst); | ||
234 | + gen_goto_tb(ctx, 0, ctx->pc_succ_insn); | ||
235 | + ctx->base.is_jmp = DISAS_NORETURN; | ||
236 | + | ||
237 | + tcg_temp_free(s1); | ||
238 | + tcg_temp_free(s2); | ||
239 | + tcg_temp_free(dst); | ||
240 | + return true; | ||
241 | +} | ||
242 | diff --git a/target/riscv/translate.c b/target/riscv/translate.c | ||
243 | index XXXXXXX..XXXXXXX 100644 | ||
244 | --- a/target/riscv/translate.c | ||
245 | +++ b/target/riscv/translate.c | ||
246 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
247 | to reset this known value. */ | ||
248 | int frm; | ||
249 | bool ext_ifencei; | ||
250 | + /* vector extension */ | ||
251 | + bool vill; | ||
252 | + uint8_t lmul; | ||
253 | + uint8_t sew; | ||
254 | + uint16_t vlen; | ||
255 | + bool vl_eq_vlmax; | ||
256 | } DisasContext; | ||
257 | |||
258 | #ifdef TARGET_RISCV64 | ||
259 | @@ -XXX,XX +XXX,XX @@ static bool gen_shift(DisasContext *ctx, arg_r *a, | ||
260 | #include "insn_trans/trans_rvf.inc.c" | ||
261 | #include "insn_trans/trans_rvd.inc.c" | ||
262 | #include "insn_trans/trans_rvh.inc.c" | ||
263 | +#include "insn_trans/trans_rvv.inc.c" | ||
264 | #include "insn_trans/trans_privileged.inc.c" | ||
265 | |||
266 | /* Include the auto-generated decoder for 16 bit insn */ | ||
267 | @@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) | ||
268 | DisasContext *ctx = container_of(dcbase, DisasContext, base); | ||
269 | CPURISCVState *env = cs->env_ptr; | ||
270 | RISCVCPU *cpu = RISCV_CPU(cs); | ||
271 | + uint32_t tb_flags = ctx->base.tb->flags; | ||
272 | |||
273 | ctx->pc_succ_insn = ctx->base.pc_first; | ||
274 | - ctx->mem_idx = ctx->base.tb->flags & TB_FLAGS_MMU_MASK; | ||
275 | - ctx->mstatus_fs = ctx->base.tb->flags & TB_FLAGS_MSTATUS_FS; | ||
276 | + ctx->mem_idx = tb_flags & TB_FLAGS_MMU_MASK; | ||
277 | + ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS; | ||
278 | ctx->priv_ver = env->priv_ver; | ||
279 | #if !defined(CONFIG_USER_ONLY) | ||
280 | if (riscv_has_ext(env, RVH)) { | ||
281 | @@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) | ||
282 | ctx->misa = env->misa; | ||
283 | ctx->frm = -1; /* unknown rounding mode */ | ||
284 | ctx->ext_ifencei = cpu->cfg.ext_ifencei; | ||
285 | + ctx->vlen = cpu->cfg.vlen; | ||
286 | + ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL); | ||
287 | + ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); | ||
288 | + ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL); | ||
289 | + ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); | ||
290 | } | ||
291 | |||
292 | static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu) | ||
293 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
294 | new file mode 100644 | 186 | new file mode 100644 |
295 | index XXXXXXX..XXXXXXX | 187 | index XXXXXXX..XXXXXXX |
296 | --- /dev/null | 188 | --- /dev/null |
297 | +++ b/target/riscv/vector_helper.c | 189 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc |
298 | @@ -XXX,XX +XXX,XX @@ | 190 | @@ -XXX,XX +XXX,XX @@ |
299 | +/* | 191 | +/* |
300 | + * RISC-V Vector Extension Helpers for QEMU. | 192 | + * RISC-V translation routines for the vector crypto extension. |
301 | + * | 193 | + * |
302 | + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. | 194 | + * Copyright (C) 2023 SiFive, Inc. |
195 | + * Written by Codethink Ltd and SiFive. | ||
303 | + * | 196 | + * |
304 | + * This program is free software; you can redistribute it and/or modify it | 197 | + * This program is free software; you can redistribute it and/or modify it |
305 | + * under the terms and conditions of the GNU General Public License, | 198 | + * under the terms and conditions of the GNU General Public License, |
306 | + * version 2 or later, as published by the Free Software Foundation. | 199 | + * version 2 or later, as published by the Free Software Foundation. |
307 | + * | 200 | + * |
... | ... | ||
312 | + * | 205 | + * |
313 | + * You should have received a copy of the GNU General Public License along with | 206 | + * You should have received a copy of the GNU General Public License along with |
314 | + * this program. If not, see <http://www.gnu.org/licenses/>. | 207 | + * this program. If not, see <http://www.gnu.org/licenses/>. |
315 | + */ | 208 | + */ |
316 | + | 209 | + |
317 | +#include "qemu/osdep.h" | 210 | +/* |
318 | +#include "cpu.h" | 211 | + * Zvbc |
319 | +#include "exec/exec-all.h" | 212 | + */ |
320 | +#include "exec/helper-proto.h" | 213 | + |
321 | +#include <math.h> | 214 | +#define GEN_VV_MASKED_TRANS(NAME, CHECK) \ |
322 | + | 215 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
323 | +target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, | 216 | + { \ |
324 | + target_ulong s2) | 217 | + if (CHECK(s, a)) { \ |
325 | +{ | 218 | + return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, \ |
326 | + int vlmax, vl; | 219 | + gen_helper_##NAME, s); \ |
327 | + RISCVCPU *cpu = env_archcpu(env); | 220 | + } \ |
328 | + uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); | 221 | + return false; \ |
329 | + uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); | 222 | + } |
330 | + bool vill = FIELD_EX64(s2, VTYPE, VILL); | 223 | + |
331 | + target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); | 224 | +static bool vclmul_vv_check(DisasContext *s, arg_rmrr *a) |
332 | + | 225 | +{ |
333 | + if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { | 226 | + return opivv_check(s, a) && |
334 | + /* only set vill bit. */ | 227 | + s->cfg_ptr->ext_zvbc == true && |
335 | + env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); | 228 | + s->sew == MO_64; |
336 | + env->vl = 0; | 229 | +} |
337 | + env->vstart = 0; | 230 | + |
338 | + return 0; | 231 | +GEN_VV_MASKED_TRANS(vclmul_vv, vclmul_vv_check) |
339 | + } | 232 | +GEN_VV_MASKED_TRANS(vclmulh_vv, vclmul_vv_check) |
340 | + | 233 | + |
341 | + vlmax = vext_get_vlmax(cpu, s2); | 234 | +#define GEN_VX_MASKED_TRANS(NAME, CHECK) \ |
342 | + if (s1 <= vlmax) { | 235 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
343 | + vl = s1; | 236 | + { \ |
344 | + } else { | 237 | + if (CHECK(s, a)) { \ |
345 | + vl = vlmax; | 238 | + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, \ |
346 | + } | 239 | + gen_helper_##NAME, s); \ |
347 | + env->vl = vl; | 240 | + } \ |
348 | + env->vtype = s2; | 241 | + return false; \ |
349 | + env->vstart = 0; | 242 | + } |
350 | + return vl; | 243 | + |
351 | +} | 244 | +static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a) |
352 | diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs | 245 | +{ |
353 | index XXXXXXX..XXXXXXX 100644 | 246 | + return opivx_check(s, a) && |
354 | --- a/target/riscv/Makefile.objs | 247 | + s->cfg_ptr->ext_zvbc == true && |
355 | +++ b/target/riscv/Makefile.objs | 248 | + s->sew == MO_64; |
356 | @@ -XXX,XX +XXX,XX @@ | 249 | +} |
357 | -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o gdbstub.o | 250 | + |
358 | +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o vector_helper.o gdbstub.o | 251 | +GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check) |
359 | obj-$(CONFIG_SOFTMMU) += pmp.o | 252 | +GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check) |
360 | 253 | diff --git a/target/riscv/meson.build b/target/riscv/meson.build | |
361 | ifeq ($(CONFIG_SOFTMMU),y) | 254 | index XXXXXXX..XXXXXXX 100644 |
255 | --- a/target/riscv/meson.build | ||
256 | +++ b/target/riscv/meson.build | ||
257 | @@ -XXX,XX +XXX,XX @@ riscv_ss.add(files( | ||
258 | 'translate.c', | ||
259 | 'm128_helper.c', | ||
260 | 'crypto_helper.c', | ||
261 | - 'zce_helper.c' | ||
262 | + 'zce_helper.c', | ||
263 | + 'vcrypto_helper.c' | ||
264 | )) | ||
265 | riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: files('kvm-stub.c')) | ||
266 | |||
362 | -- | 267 | -- |
363 | 2.27.0 | 268 | 2.41.0 |
364 | |||
365 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Nazar Kazakov <nazar.kazakov@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Move the checks out of `do_opiv{v,x,i}_gvec{,_shift}` functions |
4 | and into the corresponding macros. This enables the functions to be | ||
5 | reused in proceeding commits without check duplication. | ||
6 | |||
7 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 9 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> |
6 | Message-id: 20200623215920.2594-49-zhiwei_liu@c-sky.com | 10 | Signed-off-by: Max Chou <max.chou@sifive.com> |
11 | Message-ID: <20230711165917.2629866-6-max.chou@sifive.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 13 | --- |
9 | target/riscv/helper.h | 3 ++ | 14 | target/riscv/insn_trans/trans_rvv.c.inc | 28 +++++++++++-------------- |
10 | target/riscv/insn32.decode | 2 ++ | 15 | 1 file changed, 12 insertions(+), 16 deletions(-) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 3 ++ | ||
12 | target/riscv/vector_helper.c | 46 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 54 insertions(+) | ||
14 | 16 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 17 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc |
16 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 19 | --- a/target/riscv/insn_trans/trans_rvv.c.inc |
18 | +++ b/target/riscv/helper.h | 20 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | 21 | @@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, |
20 | DEF_HELPER_6(vfredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | 22 | gen_helper_gvec_4_ptr *fn) |
21 | DEF_HELPER_6(vfredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | 23 | { |
22 | DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | 24 | TCGLabel *over = gen_new_label(); |
23 | + | 25 | - if (!opivv_check(s, a)) { |
24 | +DEF_HELPER_6(vfwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | 26 | - return false; |
25 | +DEF_HELPER_6(vfwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | 27 | - } |
26 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 28 | |
27 | index XXXXXXX..XXXXXXX 100644 | 29 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); |
28 | --- a/target/riscv/insn32.decode | 30 | |
29 | +++ b/target/riscv/insn32.decode | 31 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
30 | @@ -XXX,XX +XXX,XX @@ vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm | 32 | gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ |
31 | vfredsum_vs 0000-1 . ..... ..... 001 ..... 1010111 @r_vm | 33 | gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ |
32 | vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm | 34 | }; \ |
33 | vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm | 35 | + if (!opivv_check(s, a)) { \ |
34 | +# Vector widening ordered and unordered float reduction sum | 36 | + return false; \ |
35 | +vfwredsum_vs 1100-1 . ..... ..... 001 ..... 1010111 @r_vm | 37 | + } \ |
36 | 38 | return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ | |
37 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | 39 | } |
38 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | 40 | |
39 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | 41 | @@ -XXX,XX +XXX,XX @@ static inline bool |
40 | index XXXXXXX..XXXXXXX 100644 | 42 | do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, |
41 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 43 | gen_helper_opivx *fn) |
42 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 44 | { |
43 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_check) | 45 | - if (!opivx_check(s, a)) { |
44 | GEN_OPFVV_TRANS(vfredsum_vs, reduction_check) | 46 | - return false; |
45 | GEN_OPFVV_TRANS(vfredmax_vs, reduction_check) | 47 | - } |
46 | GEN_OPFVV_TRANS(vfredmin_vs, reduction_check) | 48 | - |
47 | + | 49 | if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { |
48 | +/* Vector Widening Floating-Point Reduction Instructions */ | 50 | TCGv_i64 src1 = tcg_temp_new_i64(); |
49 | +GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check) | 51 | |
50 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 52 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
51 | index XXXXXXX..XXXXXXX 100644 | 53 | gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ |
52 | --- a/target/riscv/vector_helper.c | 54 | gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ |
53 | +++ b/target/riscv/vector_helper.c | 55 | }; \ |
54 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, clearq) | 56 | + if (!opivx_check(s, a)) { \ |
55 | GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, clearh) | 57 | + return false; \ |
56 | GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, clearl) | 58 | + } \ |
57 | GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, clearq) | 59 | return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ |
58 | + | 60 | } |
59 | +/* Vector Widening Floating-Point Reduction Instructions */ | 61 | |
60 | +/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ | 62 | @@ -XXX,XX +XXX,XX @@ static inline bool |
61 | +void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, | 63 | do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, |
62 | + void *vs2, CPURISCVState *env, uint32_t desc) | 64 | gen_helper_opivx *fn, imm_mode_t imm_mode) |
63 | +{ | 65 | { |
64 | + uint32_t mlen = vext_mlen(desc); | 66 | - if (!opivx_check(s, a)) { |
65 | + uint32_t vm = vext_vm(desc); | 67 | - return false; |
66 | + uint32_t vl = env->vl; | 68 | - } |
67 | + uint32_t i; | 69 | - |
68 | + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; | 70 | if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { |
69 | + uint32_t s1 = *((uint32_t *)vs1 + H4(0)); | 71 | gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), |
70 | + | 72 | extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s)); |
71 | + for (i = 0; i < vl; i++) { | 73 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
72 | + uint16_t s2 = *((uint16_t *)vs2 + H2(i)); | 74 | gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ |
73 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | 75 | gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ |
74 | + continue; | 76 | }; \ |
75 | + } | 77 | + if (!opivx_check(s, a)) { \ |
76 | + s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), | 78 | + return false; \ |
77 | + &env->fp_status); | 79 | + } \ |
78 | + } | 80 | return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \ |
79 | + *((uint32_t *)vd + H4(0)) = s1; | 81 | fns[s->sew], IMM_MODE); \ |
80 | + clearl(vd, 1, sizeof(uint32_t), tot); | 82 | } |
81 | +} | 83 | @@ -XXX,XX +XXX,XX @@ static inline bool |
82 | + | 84 | do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, |
83 | +void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, | 85 | gen_helper_opivx *fn) |
84 | + void *vs2, CPURISCVState *env, uint32_t desc) | 86 | { |
85 | +{ | 87 | - if (!opivx_check(s, a)) { |
86 | + uint32_t mlen = vext_mlen(desc); | 88 | - return false; |
87 | + uint32_t vm = vext_vm(desc); | 89 | - } |
88 | + uint32_t vl = env->vl; | 90 | - |
89 | + uint32_t i; | 91 | if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { |
90 | + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; | 92 | TCGv_i32 src1 = tcg_temp_new_i32(); |
91 | + uint64_t s1 = *((uint64_t *)vs1); | 93 | |
92 | + | 94 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
93 | + for (i = 0; i < vl; i++) { | 95 | gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ |
94 | + uint32_t s2 = *((uint32_t *)vs2 + H4(i)); | 96 | gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ |
95 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | 97 | }; \ |
96 | + continue; | 98 | - \ |
97 | + } | 99 | + if (!opivx_check(s, a)) { \ |
98 | + s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), | 100 | + return false; \ |
99 | + &env->fp_status); | 101 | + } \ |
100 | + } | 102 | return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ |
101 | + *((uint64_t *)vd) = s1; | 103 | } |
102 | + clearq(vd, 1, sizeof(uint64_t), tot); | 104 | |
103 | +} | ||
104 | -- | 105 | -- |
105 | 2.27.0 | 106 | 2.41.0 |
106 | |||
107 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Dickon Hood <dickon.hood@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Zvbb (implemented in later commit) has a widening instruction, which |
4 | requires an extra check on the enabled extensions. Refactor | ||
5 | GEN_OPIVX_WIDEN_TRANS() to take a check function to avoid reimplementing | ||
6 | it. | ||
7 | |||
8 | Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> |
6 | Message-id: 20200623215920.2594-15-zhiwei_liu@c-sky.com | 11 | Signed-off-by: Max Chou <max.chou@sifive.com> |
12 | Message-ID: <20230711165917.2629866-7-max.chou@sifive.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 13 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 14 | --- |
9 | target/riscv/helper.h | 25 ++++++++ | 15 | target/riscv/insn_trans/trans_rvv.c.inc | 52 +++++++++++-------------- |
10 | target/riscv/insn32.decode | 9 +++ | 16 | 1 file changed, 23 insertions(+), 29 deletions(-) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 52 ++++++++++++++++ | ||
12 | target/riscv/vector_helper.c | 79 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 165 insertions(+) | ||
14 | 17 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 18 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc |
16 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 20 | --- a/target/riscv/insn_trans/trans_rvv.c.inc |
18 | +++ b/target/riscv/helper.h | 21 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vxor_vx_b, void, ptr, ptr, tl, ptr, env, i32) | 22 | @@ -XXX,XX +XXX,XX @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) |
20 | DEF_HELPER_6(vxor_vx_h, void, ptr, ptr, tl, ptr, env, i32) | 23 | vext_check_ds(s, a->rd, a->rs2, a->vm); |
21 | DEF_HELPER_6(vxor_vx_w, void, ptr, ptr, tl, ptr, env, i32) | 24 | } |
22 | DEF_HELPER_6(vxor_vx_d, void, ptr, ptr, tl, ptr, env, i32) | 25 | |
23 | + | 26 | -static bool do_opivx_widen(DisasContext *s, arg_rmrr *a, |
24 | +DEF_HELPER_6(vsll_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | 27 | - gen_helper_opivx *fn) |
25 | +DEF_HELPER_6(vsll_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 28 | -{ |
26 | +DEF_HELPER_6(vsll_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 29 | - if (opivx_widen_check(s, a)) { |
27 | +DEF_HELPER_6(vsll_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | 30 | - return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); |
28 | +DEF_HELPER_6(vsrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | 31 | - } |
29 | +DEF_HELPER_6(vsrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 32 | - return false; |
30 | +DEF_HELPER_6(vsrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 33 | -} |
31 | +DEF_HELPER_6(vsrl_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | 34 | - |
32 | +DEF_HELPER_6(vsra_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | 35 | -#define GEN_OPIVX_WIDEN_TRANS(NAME) \ |
33 | +DEF_HELPER_6(vsra_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 36 | -static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
34 | +DEF_HELPER_6(vsra_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 37 | -{ \ |
35 | +DEF_HELPER_6(vsra_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | 38 | - static gen_helper_opivx * const fns[3] = { \ |
36 | +DEF_HELPER_6(vsll_vx_b, void, ptr, ptr, tl, ptr, env, i32) | 39 | - gen_helper_##NAME##_b, \ |
37 | +DEF_HELPER_6(vsll_vx_h, void, ptr, ptr, tl, ptr, env, i32) | 40 | - gen_helper_##NAME##_h, \ |
38 | +DEF_HELPER_6(vsll_vx_w, void, ptr, ptr, tl, ptr, env, i32) | 41 | - gen_helper_##NAME##_w \ |
39 | +DEF_HELPER_6(vsll_vx_d, void, ptr, ptr, tl, ptr, env, i32) | 42 | - }; \ |
40 | +DEF_HELPER_6(vsrl_vx_b, void, ptr, ptr, tl, ptr, env, i32) | 43 | - return do_opivx_widen(s, a, fns[s->sew]); \ |
41 | +DEF_HELPER_6(vsrl_vx_h, void, ptr, ptr, tl, ptr, env, i32) | 44 | +#define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \ |
42 | +DEF_HELPER_6(vsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vsrl_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vsra_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vsra_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vsra_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vsra_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
48 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/riscv/insn32.decode | ||
51 | +++ b/target/riscv/insn32.decode | ||
52 | @@ -XXX,XX +XXX,XX @@ vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm | ||
53 | vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm | ||
54 | vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm | ||
55 | vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm | ||
56 | +vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm | ||
57 | +vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm | ||
58 | +vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm | ||
59 | +vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm | ||
60 | +vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm | ||
61 | +vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm | ||
62 | +vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm | ||
63 | +vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm | ||
64 | +vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm | ||
65 | |||
66 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
67 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
68 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
71 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
72 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_GVEC_TRANS(vxor_vx, xors) | ||
73 | GEN_OPIVI_GVEC_TRANS(vand_vi, 0, vand_vx, andi) | ||
74 | GEN_OPIVI_GVEC_TRANS(vor_vi, 0, vor_vx, ori) | ||
75 | GEN_OPIVI_GVEC_TRANS(vxor_vi, 0, vxor_vx, xori) | ||
76 | + | ||
77 | +/* Vector Single-Width Bit Shift Instructions */ | ||
78 | +GEN_OPIVV_GVEC_TRANS(vsll_vv, shlv) | ||
79 | +GEN_OPIVV_GVEC_TRANS(vsrl_vv, shrv) | ||
80 | +GEN_OPIVV_GVEC_TRANS(vsra_vv, sarv) | ||
81 | + | ||
82 | +typedef void GVecGen2sFn32(unsigned, uint32_t, uint32_t, TCGv_i32, | ||
83 | + uint32_t, uint32_t); | ||
84 | + | ||
85 | +static inline bool | ||
86 | +do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, | ||
87 | + gen_helper_opivx *fn) | ||
88 | +{ | ||
89 | + if (!opivx_check(s, a)) { | ||
90 | + return false; | ||
91 | + } | ||
92 | + | ||
93 | + if (a->vm && s->vl_eq_vlmax) { | ||
94 | + TCGv_i32 src1 = tcg_temp_new_i32(); | ||
95 | + TCGv tmp = tcg_temp_new(); | ||
96 | + | ||
97 | + gen_get_gpr(tmp, a->rs1); | ||
98 | + tcg_gen_trunc_tl_i32(src1, tmp); | ||
99 | + tcg_gen_extract_i32(src1, src1, 0, s->sew + 3); | ||
100 | + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), | ||
101 | + src1, MAXSZ(s), MAXSZ(s)); | ||
102 | + | ||
103 | + tcg_temp_free_i32(src1); | ||
104 | + tcg_temp_free(tmp); | ||
105 | + return true; | ||
106 | + } | ||
107 | + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); | ||
108 | +} | ||
109 | + | ||
110 | +#define GEN_OPIVX_GVEC_SHIFT_TRANS(NAME, SUF) \ | ||
111 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | 45 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
112 | +{ \ | 46 | +{ \ |
113 | + static gen_helper_opivx * const fns[4] = { \ | 47 | + if (CHECK(s, a)) { \ |
114 | + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ | 48 | + static gen_helper_opivx * const fns[3] = { \ |
115 | + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ | 49 | + gen_helper_##NAME##_b, \ |
116 | + }; \ | 50 | + gen_helper_##NAME##_h, \ |
117 | + \ | 51 | + gen_helper_##NAME##_w \ |
118 | + return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ | 52 | + }; \ |
119 | +} | 53 | + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); \ |
120 | + | ||
121 | +GEN_OPIVX_GVEC_SHIFT_TRANS(vsll_vx, shls) | ||
122 | +GEN_OPIVX_GVEC_SHIFT_TRANS(vsrl_vx, shrs) | ||
123 | +GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx, sars) | ||
124 | + | ||
125 | +GEN_OPIVI_GVEC_TRANS(vsll_vi, 1, vsll_vx, shli) | ||
126 | +GEN_OPIVI_GVEC_TRANS(vsrl_vi, 1, vsrl_vx, shri) | ||
127 | +GEN_OPIVI_GVEC_TRANS(vsra_vi, 1, vsra_vx, sari) | ||
128 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
129 | index XXXXXXX..XXXXXXX 100644 | ||
130 | --- a/target/riscv/vector_helper.c | ||
131 | +++ b/target/riscv/vector_helper.c | ||
132 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vxor_vx_b, 1, 1, clearb) | ||
133 | GEN_VEXT_VX(vxor_vx_h, 2, 2, clearh) | ||
134 | GEN_VEXT_VX(vxor_vx_w, 4, 4, clearl) | ||
135 | GEN_VEXT_VX(vxor_vx_d, 8, 8, clearq) | ||
136 | + | ||
137 | +/* Vector Single-Width Bit Shift Instructions */ | ||
138 | +#define DO_SLL(N, M) (N << (M)) | ||
139 | +#define DO_SRL(N, M) (N >> (M)) | ||
140 | + | ||
141 | +/* generate the helpers for shift instructions with two vector operators */ | ||
142 | +#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK, CLEAR_FN) \ | ||
143 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
144 | + void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
145 | +{ \ | ||
146 | + uint32_t mlen = vext_mlen(desc); \ | ||
147 | + uint32_t vm = vext_vm(desc); \ | ||
148 | + uint32_t vl = env->vl; \ | ||
149 | + uint32_t esz = sizeof(TS1); \ | ||
150 | + uint32_t vlmax = vext_maxsz(desc) / esz; \ | ||
151 | + uint32_t i; \ | ||
152 | + \ | ||
153 | + for (i = 0; i < vl; i++) { \ | ||
154 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
155 | + continue; \ | ||
156 | + } \ | ||
157 | + TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ | ||
158 | + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ | ||
159 | + *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ | ||
160 | + } \ | 54 | + } \ |
161 | + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ | 55 | + return false; \ |
162 | +} | 56 | } |
163 | + | 57 | |
164 | +GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7, clearb) | 58 | -GEN_OPIVX_WIDEN_TRANS(vwaddu_vx) |
165 | +GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf, clearh) | 59 | -GEN_OPIVX_WIDEN_TRANS(vwadd_vx) |
166 | +GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f, clearl) | 60 | -GEN_OPIVX_WIDEN_TRANS(vwsubu_vx) |
167 | +GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f, clearq) | 61 | -GEN_OPIVX_WIDEN_TRANS(vwsub_vx) |
168 | + | 62 | +GEN_OPIVX_WIDEN_TRANS(vwaddu_vx, opivx_widen_check) |
169 | +GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb) | 63 | +GEN_OPIVX_WIDEN_TRANS(vwadd_vx, opivx_widen_check) |
170 | +GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh) | 64 | +GEN_OPIVX_WIDEN_TRANS(vwsubu_vx, opivx_widen_check) |
171 | +GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl) | 65 | +GEN_OPIVX_WIDEN_TRANS(vwsub_vx, opivx_widen_check) |
172 | +GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq) | 66 | |
173 | + | 67 | /* WIDEN OPIVV with WIDEN */ |
174 | +GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb) | 68 | static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a) |
175 | +GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh) | 69 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vrem_vx, opivx_check) |
176 | +GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl) | 70 | GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check) |
177 | +GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq) | 71 | GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check) |
178 | + | 72 | GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check) |
179 | +/* generate the helpers for shift instructions with one vector and one scalar */ | 73 | -GEN_OPIVX_WIDEN_TRANS(vwmul_vx) |
180 | +#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK, CLEAR_FN) \ | 74 | -GEN_OPIVX_WIDEN_TRANS(vwmulu_vx) |
181 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | 75 | -GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx) |
182 | + void *vs2, CPURISCVState *env, uint32_t desc) \ | 76 | +GEN_OPIVX_WIDEN_TRANS(vwmul_vx, opivx_widen_check) |
183 | +{ \ | 77 | +GEN_OPIVX_WIDEN_TRANS(vwmulu_vx, opivx_widen_check) |
184 | + uint32_t mlen = vext_mlen(desc); \ | 78 | +GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx, opivx_widen_check) |
185 | + uint32_t vm = vext_vm(desc); \ | 79 | |
186 | + uint32_t vl = env->vl; \ | 80 | /* Vector Single-Width Integer Multiply-Add Instructions */ |
187 | + uint32_t esz = sizeof(TD); \ | 81 | GEN_OPIVV_TRANS(vmacc_vv, opivv_check) |
188 | + uint32_t vlmax = vext_maxsz(desc) / esz; \ | 82 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) |
189 | + uint32_t i; \ | 83 | GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check) |
190 | + \ | 84 | GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check) |
191 | + for (i = 0; i < vl; i++) { \ | 85 | GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check) |
192 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | 86 | -GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx) |
193 | + continue; \ | 87 | -GEN_OPIVX_WIDEN_TRANS(vwmacc_vx) |
194 | + } \ | 88 | -GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx) |
195 | + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ | 89 | -GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx) |
196 | + *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ | 90 | +GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_widen_check) |
197 | + } \ | 91 | +GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_widen_check) |
198 | + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ | 92 | +GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_widen_check) |
199 | +} | 93 | +GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_widen_check) |
200 | + | 94 | |
201 | +GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7, clearb) | 95 | /* Vector Integer Merge and Move Instructions */ |
202 | +GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf, clearh) | 96 | static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) |
203 | +GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f, clearl) | ||
204 | +GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f, clearq) | ||
205 | + | ||
206 | +GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb) | ||
207 | +GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh) | ||
208 | +GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl) | ||
209 | +GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq) | ||
210 | + | ||
211 | +GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb) | ||
212 | +GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh) | ||
213 | +GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl) | ||
214 | +GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq) | ||
215 | -- | 97 | -- |
216 | 2.27.0 | 98 | 2.41.0 |
217 | |||
218 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Move some macros out of `vector_helper` and into `vector_internals`. |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | This ensures they can be used by both vector and vector-crypto helpers |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 5 | (latter implemented in proceeding commits). |
6 | Message-id: 20200623215920.2594-41-zhiwei_liu@c-sky.com | 6 | |
7 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
8 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
9 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
10 | Message-ID: <20230711165917.2629866-8-max.chou@sifive.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 12 | --- |
9 | target/riscv/helper.h | 4 ++ | 13 | target/riscv/vector_internals.h | 46 +++++++++++++++++++++++++++++++++ |
10 | target/riscv/internals.h | 5 ++ | 14 | target/riscv/vector_helper.c | 42 ------------------------------ |
11 | target/riscv/insn32.decode | 1 + | 15 | 2 files changed, 46 insertions(+), 42 deletions(-) |
12 | target/riscv/fpu_helper.c | 33 +-------- | ||
13 | target/riscv/insn_trans/trans_rvv.inc.c | 3 + | ||
14 | target/riscv/vector_helper.c | 91 +++++++++++++++++++++++++ | ||
15 | 6 files changed, 107 insertions(+), 30 deletions(-) | ||
16 | 16 | ||
17 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 17 | diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h |
18 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/target/riscv/helper.h | 19 | --- a/target/riscv/vector_internals.h |
20 | +++ b/target/riscv/helper.h | 20 | +++ b/target/riscv/vector_internals.h |
21 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmford_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | 21 | @@ -XXX,XX +XXX,XX @@ void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, |
22 | DEF_HELPER_6(vmford_vf_h, void, ptr, ptr, i64, ptr, env, i32) | 22 | /* expand macro args before macro */ |
23 | DEF_HELPER_6(vmford_vf_w, void, ptr, ptr, i64, ptr, env, i32) | 23 | #define RVVCALL(macro, ...) macro(__VA_ARGS__) |
24 | DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, i32) | 24 | |
25 | +/* (TD, T2, TX2) */ | ||
26 | +#define OP_UU_B uint8_t, uint8_t, uint8_t | ||
27 | +#define OP_UU_H uint16_t, uint16_t, uint16_t | ||
28 | +#define OP_UU_W uint32_t, uint32_t, uint32_t | ||
29 | +#define OP_UU_D uint64_t, uint64_t, uint64_t | ||
25 | + | 30 | + |
26 | +DEF_HELPER_5(vfclass_v_h, void, ptr, ptr, ptr, env, i32) | 31 | /* (TD, T1, T2, TX1, TX2) */ |
27 | +DEF_HELPER_5(vfclass_v_w, void, ptr, ptr, ptr, env, i32) | 32 | #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t |
28 | +DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32) | 33 | #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t |
29 | diff --git a/target/riscv/internals.h b/target/riscv/internals.h | 34 | #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t |
30 | index XXXXXXX..XXXXXXX 100644 | 35 | #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t |
31 | --- a/target/riscv/internals.h | 36 | |
32 | +++ b/target/riscv/internals.h | ||
33 | @@ -XXX,XX +XXX,XX @@ FIELD(VDATA, VM, 8, 1) | ||
34 | FIELD(VDATA, LMUL, 9, 2) | ||
35 | FIELD(VDATA, NF, 11, 4) | ||
36 | FIELD(VDATA, WD, 11, 1) | ||
37 | + | ||
38 | +/* float point classify helpers */ | ||
39 | +target_ulong fclass_h(uint64_t frs1); | ||
40 | +target_ulong fclass_s(uint64_t frs1); | ||
41 | +target_ulong fclass_d(uint64_t frs1); | ||
42 | #endif | ||
43 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/target/riscv/insn32.decode | ||
46 | +++ b/target/riscv/insn32.decode | ||
47 | @@ -XXX,XX +XXX,XX @@ vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm | ||
48 | vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm | ||
49 | vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm | ||
50 | vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm | ||
51 | +vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm | ||
52 | |||
53 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
54 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
55 | diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/riscv/fpu_helper.c | ||
58 | +++ b/target/riscv/fpu_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ | ||
60 | #include "exec/exec-all.h" | ||
61 | #include "exec/helper-proto.h" | ||
62 | #include "fpu/softfloat.h" | ||
63 | +#include "internals.h" | ||
64 | |||
65 | target_ulong riscv_cpu_get_fflags(CPURISCVState *env) | ||
66 | { | ||
67 | @@ -XXX,XX +XXX,XX @@ uint64_t helper_fcvt_s_lu(CPURISCVState *env, uint64_t rs1) | ||
68 | |||
69 | target_ulong helper_fclass_s(uint64_t frs1) | ||
70 | { | ||
71 | - float32 f = frs1; | ||
72 | - bool sign = float32_is_neg(f); | ||
73 | - | ||
74 | - if (float32_is_infinity(f)) { | ||
75 | - return sign ? 1 << 0 : 1 << 7; | ||
76 | - } else if (float32_is_zero(f)) { | ||
77 | - return sign ? 1 << 3 : 1 << 4; | ||
78 | - } else if (float32_is_zero_or_denormal(f)) { | ||
79 | - return sign ? 1 << 2 : 1 << 5; | ||
80 | - } else if (float32_is_any_nan(f)) { | ||
81 | - float_status s = { }; /* for snan_bit_is_one */ | ||
82 | - return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; | ||
83 | - } else { | ||
84 | - return sign ? 1 << 1 : 1 << 6; | ||
85 | - } | ||
86 | + return fclass_s(frs1); | ||
87 | } | ||
88 | |||
89 | uint64_t helper_fadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2) | ||
90 | @@ -XXX,XX +XXX,XX @@ uint64_t helper_fcvt_d_lu(CPURISCVState *env, uint64_t rs1) | ||
91 | |||
92 | target_ulong helper_fclass_d(uint64_t frs1) | ||
93 | { | ||
94 | - float64 f = frs1; | ||
95 | - bool sign = float64_is_neg(f); | ||
96 | - | ||
97 | - if (float64_is_infinity(f)) { | ||
98 | - return sign ? 1 << 0 : 1 << 7; | ||
99 | - } else if (float64_is_zero(f)) { | ||
100 | - return sign ? 1 << 3 : 1 << 4; | ||
101 | - } else if (float64_is_zero_or_denormal(f)) { | ||
102 | - return sign ? 1 << 2 : 1 << 5; | ||
103 | - } else if (float64_is_any_nan(f)) { | ||
104 | - float_status s = { }; /* for snan_bit_is_one */ | ||
105 | - return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; | ||
106 | - } else { | ||
107 | - return sign ? 1 << 1 : 1 << 6; | ||
108 | - } | ||
109 | + return fclass_d(frs1); | ||
110 | } | ||
111 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
112 | index XXXXXXX..XXXXXXX 100644 | ||
113 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
114 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
115 | @@ -XXX,XX +XXX,XX @@ GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check) | ||
116 | GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check) | ||
117 | GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check) | ||
118 | GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check) | ||
119 | + | ||
120 | +/* Vector Floating-Point Classify Instruction */ | ||
121 | +GEN_OPFV_TRANS(vfclass_v, opfv_check) | ||
122 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
123 | index XXXXXXX..XXXXXXX 100644 | ||
124 | --- a/target/riscv/vector_helper.c | ||
125 | +++ b/target/riscv/vector_helper.c | ||
126 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) | ||
127 | GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) | ||
128 | GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) | ||
129 | GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) | ||
130 | + | ||
131 | +/* Vector Floating-Point Classify Instruction */ | ||
132 | +#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ | 37 | +#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ |
133 | +static void do_##NAME(void *vd, void *vs2, int i) \ | 38 | +static void do_##NAME(void *vd, void *vs2, int i) \ |
134 | +{ \ | 39 | +{ \ |
135 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | 40 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ |
136 | + *((TD *)vd + HD(i)) = OP(s2); \ | 41 | + *((TD *)vd + HD(i)) = OP(s2); \ |
137 | +} | 42 | +} |
138 | + | 43 | + |
139 | +#define GEN_VEXT_V(NAME, ESZ, DSZ, CLEAR_FN) \ | 44 | +#define GEN_VEXT_V(NAME, ESZ) \ |
140 | +void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | 45 | +void HELPER(NAME)(void *vd, void *v0, void *vs2, \ |
141 | + CPURISCVState *env, uint32_t desc) \ | 46 | + CPURISCVState *env, uint32_t desc) \ |
142 | +{ \ | 47 | +{ \ |
143 | + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ | ||
144 | + uint32_t mlen = vext_mlen(desc); \ | ||
145 | + uint32_t vm = vext_vm(desc); \ | 48 | + uint32_t vm = vext_vm(desc); \ |
146 | + uint32_t vl = env->vl; \ | 49 | + uint32_t vl = env->vl; \ |
50 | + uint32_t total_elems = \ | ||
51 | + vext_get_total_elems(env, desc, ESZ); \ | ||
52 | + uint32_t vta = vext_vta(desc); \ | ||
53 | + uint32_t vma = vext_vma(desc); \ | ||
147 | + uint32_t i; \ | 54 | + uint32_t i; \ |
148 | + \ | 55 | + \ |
149 | + for (i = 0; i < vl; i++) { \ | 56 | + for (i = env->vstart; i < vl; i++) { \ |
150 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | 57 | + if (!vm && !vext_elem_mask(v0, i)) { \ |
58 | + /* set masked-off elements to 1s */ \ | ||
59 | + vext_set_elems_1s(vd, vma, i * ESZ, \ | ||
60 | + (i + 1) * ESZ); \ | ||
151 | + continue; \ | 61 | + continue; \ |
152 | + } \ | 62 | + } \ |
153 | + do_##NAME(vd, vs2, i); \ | 63 | + do_##NAME(vd, vs2, i); \ |
154 | + } \ | 64 | + } \ |
155 | + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ | 65 | + env->vstart = 0; \ |
66 | + /* set tail elements to 1s */ \ | ||
67 | + vext_set_elems_1s(vd, vta, vl * ESZ, \ | ||
68 | + total_elems * ESZ); \ | ||
156 | +} | 69 | +} |
157 | + | 70 | + |
158 | +target_ulong fclass_h(uint64_t frs1) | 71 | /* operation of two vector elements */ |
159 | +{ | 72 | typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); |
160 | + float16 f = frs1; | 73 | |
161 | + bool sign = float16_is_neg(f); | 74 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ |
75 | do_##NAME, ESZ); \ | ||
76 | } | ||
77 | |||
78 | +/* Three of the widening shortening macros: */ | ||
79 | +/* (TD, T1, T2, TX1, TX2) */ | ||
80 | +#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t | ||
81 | +#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t | ||
82 | +#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t | ||
162 | + | 83 | + |
163 | + if (float16_is_infinity(f)) { | 84 | #endif /* TARGET_RISCV_VECTOR_INTERNALS_H */ |
164 | + return sign ? 1 << 0 : 1 << 7; | 85 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c |
165 | + } else if (float16_is_zero(f)) { | 86 | index XXXXXXX..XXXXXXX 100644 |
166 | + return sign ? 1 << 3 : 1 << 4; | 87 | --- a/target/riscv/vector_helper.c |
167 | + } else if (float16_is_zero_or_denormal(f)) { | 88 | +++ b/target/riscv/vector_helper.c |
168 | + return sign ? 1 << 2 : 1 << 5; | 89 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) |
169 | + } else if (float16_is_any_nan(f)) { | 90 | #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t |
170 | + float_status s = { }; /* for snan_bit_is_one */ | 91 | #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t |
171 | + return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; | 92 | #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t |
172 | + } else { | 93 | -#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t |
173 | + return sign ? 1 << 1 : 1 << 6; | 94 | -#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t |
174 | + } | 95 | -#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t |
175 | +} | 96 | #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t |
176 | + | 97 | #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t |
177 | +target_ulong fclass_s(uint64_t frs1) | 98 | #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t |
178 | +{ | 99 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VF(vfwnmsac_vf_h, 4) |
179 | + float32 f = frs1; | 100 | GEN_VEXT_VF(vfwnmsac_vf_w, 8) |
180 | + bool sign = float32_is_neg(f); | 101 | |
181 | + | 102 | /* Vector Floating-Point Square-Root Instruction */ |
182 | + if (float32_is_infinity(f)) { | 103 | -/* (TD, T2, TX2) */ |
183 | + return sign ? 1 << 0 : 1 << 7; | 104 | -#define OP_UU_H uint16_t, uint16_t, uint16_t |
184 | + } else if (float32_is_zero(f)) { | 105 | -#define OP_UU_W uint32_t, uint32_t, uint32_t |
185 | + return sign ? 1 << 3 : 1 << 4; | 106 | -#define OP_UU_D uint64_t, uint64_t, uint64_t |
186 | + } else if (float32_is_zero_or_denormal(f)) { | 107 | - |
187 | + return sign ? 1 << 2 : 1 << 5; | 108 | #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ |
188 | + } else if (float32_is_any_nan(f)) { | 109 | static void do_##NAME(void *vd, void *vs2, int i, \ |
189 | + float_status s = { }; /* for snan_bit_is_one */ | 110 | CPURISCVState *env) \ |
190 | + return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; | 111 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) |
191 | + } else { | 112 | GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) |
192 | + return sign ? 1 << 1 : 1 << 6; | 113 | |
193 | + } | 114 | /* Vector Floating-Point Classify Instruction */ |
194 | +} | 115 | -#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ |
195 | + | 116 | -static void do_##NAME(void *vd, void *vs2, int i) \ |
196 | +target_ulong fclass_d(uint64_t frs1) | 117 | -{ \ |
197 | +{ | 118 | - TX2 s2 = *((T2 *)vs2 + HS2(i)); \ |
198 | + float64 f = frs1; | 119 | - *((TD *)vd + HD(i)) = OP(s2); \ |
199 | + bool sign = float64_is_neg(f); | 120 | -} |
200 | + | 121 | - |
201 | + if (float64_is_infinity(f)) { | 122 | -#define GEN_VEXT_V(NAME, ESZ) \ |
202 | + return sign ? 1 << 0 : 1 << 7; | 123 | -void HELPER(NAME)(void *vd, void *v0, void *vs2, \ |
203 | + } else if (float64_is_zero(f)) { | 124 | - CPURISCVState *env, uint32_t desc) \ |
204 | + return sign ? 1 << 3 : 1 << 4; | 125 | -{ \ |
205 | + } else if (float64_is_zero_or_denormal(f)) { | 126 | - uint32_t vm = vext_vm(desc); \ |
206 | + return sign ? 1 << 2 : 1 << 5; | 127 | - uint32_t vl = env->vl; \ |
207 | + } else if (float64_is_any_nan(f)) { | 128 | - uint32_t total_elems = \ |
208 | + float_status s = { }; /* for snan_bit_is_one */ | 129 | - vext_get_total_elems(env, desc, ESZ); \ |
209 | + return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; | 130 | - uint32_t vta = vext_vta(desc); \ |
210 | + } else { | 131 | - uint32_t vma = vext_vma(desc); \ |
211 | + return sign ? 1 << 1 : 1 << 6; | 132 | - uint32_t i; \ |
212 | + } | 133 | - \ |
213 | +} | 134 | - for (i = env->vstart; i < vl; i++) { \ |
214 | + | 135 | - if (!vm && !vext_elem_mask(v0, i)) { \ |
215 | +RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) | 136 | - /* set masked-off elements to 1s */ \ |
216 | +RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) | 137 | - vext_set_elems_1s(vd, vma, i * ESZ, \ |
217 | +RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) | 138 | - (i + 1) * ESZ); \ |
218 | +GEN_VEXT_V(vfclass_v_h, 2, 2, clearh) | 139 | - continue; \ |
219 | +GEN_VEXT_V(vfclass_v_w, 4, 4, clearl) | 140 | - } \ |
220 | +GEN_VEXT_V(vfclass_v_d, 8, 8, clearq) | 141 | - do_##NAME(vd, vs2, i); \ |
142 | - } \ | ||
143 | - env->vstart = 0; \ | ||
144 | - /* set tail elements to 1s */ \ | ||
145 | - vext_set_elems_1s(vd, vta, vl * ESZ, \ | ||
146 | - total_elems * ESZ); \ | ||
147 | -} | ||
148 | - | ||
149 | target_ulong fclass_h(uint64_t frs1) | ||
150 | { | ||
151 | float16 f = frs1; | ||
221 | -- | 152 | -- |
222 | 2.27.0 | 153 | 2.41.0 |
223 | |||
224 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Dickon Hood <dickon.hood@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | This commit adds support for the Zvbb vector-crypto extension, which |
4 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 4 | consists of the following instructions: |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 5 | |
6 | Message-id: 20200623215920.2594-24-zhiwei_liu@c-sky.com | 6 | * vrol.[vv,vx] |
7 | * vror.[vv,vx,vi] | ||
8 | * vbrev8.v | ||
9 | * vrev8.v | ||
10 | * vandn.[vv,vx] | ||
11 | * vbrev.v | ||
12 | * vclz.v | ||
13 | * vctz.v | ||
14 | * vcpop.v | ||
15 | * vwsll.[vv,vx,vi] | ||
16 | |||
17 | Translation functions are defined in | ||
18 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in | ||
19 | `target/riscv/vcrypto_helper.c`. | ||
20 | |||
21 | Co-authored-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
22 | Co-authored-by: William Salmon <will.salmon@codethink.co.uk> | ||
23 | Co-authored-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
24 | [max.chou@sifive.com: Fix imm mode of vror.vi] | ||
25 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
26 | Signed-off-by: William Salmon <will.salmon@codethink.co.uk> | ||
27 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
28 | Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk> | ||
29 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
30 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
31 | [max.chou@sifive.com: Exposed x-zvbb property] | ||
32 | Message-ID: <20230711165917.2629866-9-max.chou@sifive.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 33 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 34 | --- |
9 | target/riscv/helper.h | 17 ++++ | 35 | target/riscv/cpu_cfg.h | 1 + |
10 | target/riscv/insn32.decode | 7 ++ | 36 | target/riscv/helper.h | 62 +++++++++ |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 113 ++++++++++++++++++++++++ | 37 | target/riscv/insn32.decode | 20 +++ |
12 | target/riscv/vector_helper.c | 88 ++++++++++++++++++ | 38 | target/riscv/cpu.c | 12 ++ |
13 | 4 files changed, 225 insertions(+) | 39 | target/riscv/vcrypto_helper.c | 138 +++++++++++++++++++ |
40 | target/riscv/insn_trans/trans_rvvk.c.inc | 164 +++++++++++++++++++++++ | ||
41 | 6 files changed, 397 insertions(+) | ||
14 | 42 | ||
43 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/target/riscv/cpu_cfg.h | ||
46 | +++ b/target/riscv/cpu_cfg.h | ||
47 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
48 | bool ext_zve32f; | ||
49 | bool ext_zve64f; | ||
50 | bool ext_zve64d; | ||
51 | + bool ext_zvbb; | ||
52 | bool ext_zvbc; | ||
53 | bool ext_zmmul; | ||
54 | bool ext_zvfbfmin; | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 55 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h |
16 | index XXXXXXX..XXXXXXX 100644 | 56 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 57 | --- a/target/riscv/helper.h |
18 | +++ b/target/riscv/helper.h | 58 | +++ b/target/riscv/helper.h |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | 59 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32) |
20 | DEF_HELPER_6(vwmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) | 60 | DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32) |
21 | DEF_HELPER_6(vwmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) | 61 | DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32) |
22 | DEF_HELPER_6(vwmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) | 62 | DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32) |
23 | + | 63 | + |
24 | +DEF_HELPER_6(vmerge_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) | 64 | +DEF_HELPER_6(vror_vv_b, void, ptr, ptr, ptr, ptr, env, i32) |
25 | +DEF_HELPER_6(vmerge_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) | 65 | +DEF_HELPER_6(vror_vv_h, void, ptr, ptr, ptr, ptr, env, i32) |
26 | +DEF_HELPER_6(vmerge_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) | 66 | +DEF_HELPER_6(vror_vv_w, void, ptr, ptr, ptr, ptr, env, i32) |
27 | +DEF_HELPER_6(vmerge_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) | 67 | +DEF_HELPER_6(vror_vv_d, void, ptr, ptr, ptr, ptr, env, i32) |
28 | +DEF_HELPER_6(vmerge_vxm_b, void, ptr, ptr, tl, ptr, env, i32) | 68 | + |
29 | +DEF_HELPER_6(vmerge_vxm_h, void, ptr, ptr, tl, ptr, env, i32) | 69 | +DEF_HELPER_6(vror_vx_b, void, ptr, ptr, tl, ptr, env, i32) |
30 | +DEF_HELPER_6(vmerge_vxm_w, void, ptr, ptr, tl, ptr, env, i32) | 70 | +DEF_HELPER_6(vror_vx_h, void, ptr, ptr, tl, ptr, env, i32) |
31 | +DEF_HELPER_6(vmerge_vxm_d, void, ptr, ptr, tl, ptr, env, i32) | 71 | +DEF_HELPER_6(vror_vx_w, void, ptr, ptr, tl, ptr, env, i32) |
32 | +DEF_HELPER_4(vmv_v_v_b, void, ptr, ptr, env, i32) | 72 | +DEF_HELPER_6(vror_vx_d, void, ptr, ptr, tl, ptr, env, i32) |
33 | +DEF_HELPER_4(vmv_v_v_h, void, ptr, ptr, env, i32) | 73 | + |
34 | +DEF_HELPER_4(vmv_v_v_w, void, ptr, ptr, env, i32) | 74 | +DEF_HELPER_6(vrol_vv_b, void, ptr, ptr, ptr, ptr, env, i32) |
35 | +DEF_HELPER_4(vmv_v_v_d, void, ptr, ptr, env, i32) | 75 | +DEF_HELPER_6(vrol_vv_h, void, ptr, ptr, ptr, ptr, env, i32) |
36 | +DEF_HELPER_4(vmv_v_x_b, void, ptr, i64, env, i32) | 76 | +DEF_HELPER_6(vrol_vv_w, void, ptr, ptr, ptr, ptr, env, i32) |
37 | +DEF_HELPER_4(vmv_v_x_h, void, ptr, i64, env, i32) | 77 | +DEF_HELPER_6(vrol_vv_d, void, ptr, ptr, ptr, ptr, env, i32) |
38 | +DEF_HELPER_4(vmv_v_x_w, void, ptr, i64, env, i32) | 78 | + |
39 | +DEF_HELPER_4(vmv_v_x_d, void, ptr, i64, env, i32) | 79 | +DEF_HELPER_6(vrol_vx_b, void, ptr, ptr, tl, ptr, env, i32) |
80 | +DEF_HELPER_6(vrol_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
81 | +DEF_HELPER_6(vrol_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
82 | +DEF_HELPER_6(vrol_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
83 | + | ||
84 | +DEF_HELPER_5(vrev8_v_b, void, ptr, ptr, ptr, env, i32) | ||
85 | +DEF_HELPER_5(vrev8_v_h, void, ptr, ptr, ptr, env, i32) | ||
86 | +DEF_HELPER_5(vrev8_v_w, void, ptr, ptr, ptr, env, i32) | ||
87 | +DEF_HELPER_5(vrev8_v_d, void, ptr, ptr, ptr, env, i32) | ||
88 | +DEF_HELPER_5(vbrev8_v_b, void, ptr, ptr, ptr, env, i32) | ||
89 | +DEF_HELPER_5(vbrev8_v_h, void, ptr, ptr, ptr, env, i32) | ||
90 | +DEF_HELPER_5(vbrev8_v_w, void, ptr, ptr, ptr, env, i32) | ||
91 | +DEF_HELPER_5(vbrev8_v_d, void, ptr, ptr, ptr, env, i32) | ||
92 | +DEF_HELPER_5(vbrev_v_b, void, ptr, ptr, ptr, env, i32) | ||
93 | +DEF_HELPER_5(vbrev_v_h, void, ptr, ptr, ptr, env, i32) | ||
94 | +DEF_HELPER_5(vbrev_v_w, void, ptr, ptr, ptr, env, i32) | ||
95 | +DEF_HELPER_5(vbrev_v_d, void, ptr, ptr, ptr, env, i32) | ||
96 | + | ||
97 | +DEF_HELPER_5(vclz_v_b, void, ptr, ptr, ptr, env, i32) | ||
98 | +DEF_HELPER_5(vclz_v_h, void, ptr, ptr, ptr, env, i32) | ||
99 | +DEF_HELPER_5(vclz_v_w, void, ptr, ptr, ptr, env, i32) | ||
100 | +DEF_HELPER_5(vclz_v_d, void, ptr, ptr, ptr, env, i32) | ||
101 | +DEF_HELPER_5(vctz_v_b, void, ptr, ptr, ptr, env, i32) | ||
102 | +DEF_HELPER_5(vctz_v_h, void, ptr, ptr, ptr, env, i32) | ||
103 | +DEF_HELPER_5(vctz_v_w, void, ptr, ptr, ptr, env, i32) | ||
104 | +DEF_HELPER_5(vctz_v_d, void, ptr, ptr, ptr, env, i32) | ||
105 | +DEF_HELPER_5(vcpop_v_b, void, ptr, ptr, ptr, env, i32) | ||
106 | +DEF_HELPER_5(vcpop_v_h, void, ptr, ptr, ptr, env, i32) | ||
107 | +DEF_HELPER_5(vcpop_v_w, void, ptr, ptr, ptr, env, i32) | ||
108 | +DEF_HELPER_5(vcpop_v_d, void, ptr, ptr, ptr, env, i32) | ||
109 | + | ||
110 | +DEF_HELPER_6(vwsll_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
111 | +DEF_HELPER_6(vwsll_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
112 | +DEF_HELPER_6(vwsll_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
113 | +DEF_HELPER_6(vwsll_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
114 | +DEF_HELPER_6(vwsll_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
115 | +DEF_HELPER_6(vwsll_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
116 | + | ||
117 | +DEF_HELPER_6(vandn_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
118 | +DEF_HELPER_6(vandn_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
119 | +DEF_HELPER_6(vandn_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
120 | +DEF_HELPER_6(vandn_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
121 | +DEF_HELPER_6(vandn_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
122 | +DEF_HELPER_6(vandn_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
123 | +DEF_HELPER_6(vandn_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
124 | +DEF_HELPER_6(vandn_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
40 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 125 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode |
41 | index XXXXXXX..XXXXXXX 100644 | 126 | index XXXXXXX..XXXXXXX 100644 |
42 | --- a/target/riscv/insn32.decode | 127 | --- a/target/riscv/insn32.decode |
43 | +++ b/target/riscv/insn32.decode | 128 | +++ b/target/riscv/insn32.decode |
44 | @@ -XXX,XX +XXX,XX @@ | 129 | @@ -XXX,XX +XXX,XX @@ |
45 | @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd | 130 | %imm_u 12:s20 !function=ex_shift_12 |
131 | %imm_bs 30:2 !function=ex_shift_3 | ||
132 | %imm_rnum 20:4 | ||
133 | +%imm_z6 26:1 15:5 | ||
134 | |||
135 | # Argument sets: | ||
136 | &empty | ||
137 | @@ -XXX,XX +XXX,XX @@ | ||
46 | @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd | 138 | @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd |
47 | @r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd | 139 | @r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd |
48 | +@r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd | 140 | @r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd |
49 | @r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd | 141 | +@r2_zimm6 ..... . vm:1 ..... ..... ... ..... ....... &rmrr %rs2 rs1=%imm_z6 %rd |
50 | @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd | 142 | @r2_zimm11 . zimm:11 ..... ... ..... ....... %rs1 %rd |
51 | 143 | @r2_zimm10 .. zimm:10 ..... ... ..... ....... %rs1 %rd | |
52 | @@ -XXX,XX +XXX,XX @@ vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm | 144 | @r2_s ....... ..... ..... ... ..... ....... %rs2 %rs1 |
53 | vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm | 145 | @@ -XXX,XX +XXX,XX @@ vclmul_vv 001100 . ..... ..... 010 ..... 1010111 @r_vm |
54 | vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm | 146 | vclmul_vx 001100 . ..... ..... 110 ..... 1010111 @r_vm |
55 | vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm | 147 | vclmulh_vv 001101 . ..... ..... 010 ..... 1010111 @r_vm |
56 | +vmv_v_v 010111 1 00000 ..... 000 ..... 1010111 @r2 | 148 | vclmulh_vx 001101 . ..... ..... 110 ..... 1010111 @r_vm |
57 | +vmv_v_x 010111 1 00000 ..... 100 ..... 1010111 @r2 | 149 | + |
58 | +vmv_v_i 010111 1 00000 ..... 011 ..... 1010111 @r2 | 150 | +# *** Zvbb vector crypto extension *** |
59 | +vmerge_vvm 010111 0 ..... ..... 000 ..... 1010111 @r_vm_0 | 151 | +vrol_vv 010101 . ..... ..... 000 ..... 1010111 @r_vm |
60 | +vmerge_vxm 010111 0 ..... ..... 100 ..... 1010111 @r_vm_0 | 152 | +vrol_vx 010101 . ..... ..... 100 ..... 1010111 @r_vm |
61 | +vmerge_vim 010111 0 ..... ..... 011 ..... 1010111 @r_vm_0 | 153 | +vror_vv 010100 . ..... ..... 000 ..... 1010111 @r_vm |
62 | 154 | +vror_vx 010100 . ..... ..... 100 ..... 1010111 @r_vm | |
63 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | 155 | +vror_vi 01010. . ..... ..... 011 ..... 1010111 @r2_zimm6 |
64 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | 156 | +vbrev8_v 010010 . ..... 01000 010 ..... 1010111 @r2_vm |
65 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | 157 | +vrev8_v 010010 . ..... 01001 010 ..... 1010111 @r2_vm |
158 | +vandn_vv 000001 . ..... ..... 000 ..... 1010111 @r_vm | ||
159 | +vandn_vx 000001 . ..... ..... 100 ..... 1010111 @r_vm | ||
160 | +vbrev_v 010010 . ..... 01010 010 ..... 1010111 @r2_vm | ||
161 | +vclz_v 010010 . ..... 01100 010 ..... 1010111 @r2_vm | ||
162 | +vctz_v 010010 . ..... 01101 010 ..... 1010111 @r2_vm | ||
163 | +vcpop_v 010010 . ..... 01110 010 ..... 1010111 @r2_vm | ||
164 | +vwsll_vv 110101 . ..... ..... 000 ..... 1010111 @r_vm | ||
165 | +vwsll_vx 110101 . ..... ..... 100 ..... 1010111 @r_vm | ||
166 | +vwsll_vi 110101 . ..... ..... 011 ..... 1010111 @r_vm | ||
167 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
66 | index XXXXXXX..XXXXXXX 100644 | 168 | index XXXXXXX..XXXXXXX 100644 |
67 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 169 | --- a/target/riscv/cpu.c |
68 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 170 | +++ b/target/riscv/cpu.c |
69 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx) | 171 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { |
70 | GEN_OPIVX_WIDEN_TRANS(vwmacc_vx) | 172 | ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed), |
71 | GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx) | 173 | ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh), |
72 | GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx) | 174 | ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt), |
73 | + | 175 | + ISA_EXT_DATA_ENTRY(zvbb, PRIV_VERSION_1_12_0, ext_zvbb), |
74 | +/* Vector Integer Merge and Move Instructions */ | 176 | ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc), |
75 | +static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) | 177 | ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f), |
178 | ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f), | ||
179 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
180 | return; | ||
181 | } | ||
182 | |||
183 | + /* | ||
184 | + * In principle Zve*x would also suffice here, were they supported | ||
185 | + * in qemu | ||
186 | + */ | ||
187 | + if (cpu->cfg.ext_zvbb && !cpu->cfg.ext_zve32f) { | ||
188 | + error_setg(errp, | ||
189 | + "Vector crypto extensions require V or Zve* extensions"); | ||
190 | + return; | ||
191 | + } | ||
192 | + | ||
193 | if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) { | ||
194 | error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions"); | ||
195 | return; | ||
196 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
197 | DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false), | ||
198 | |||
199 | /* Vector cryptography extensions */ | ||
200 | + DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), | ||
201 | DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), | ||
202 | |||
203 | DEFINE_PROP_END_OF_LIST(), | ||
204 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c | ||
205 | index XXXXXXX..XXXXXXX 100644 | ||
206 | --- a/target/riscv/vcrypto_helper.c | ||
207 | +++ b/target/riscv/vcrypto_helper.c | ||
208 | @@ -XXX,XX +XXX,XX @@ | ||
209 | #include "qemu/osdep.h" | ||
210 | #include "qemu/host-utils.h" | ||
211 | #include "qemu/bitops.h" | ||
212 | +#include "qemu/bswap.h" | ||
213 | #include "cpu.h" | ||
214 | #include "exec/memop.h" | ||
215 | #include "exec/exec-all.h" | ||
216 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vclmulh_vv, OP_UUU_D, H8, H8, H8, clmulh64) | ||
217 | GEN_VEXT_VV(vclmulh_vv, 8) | ||
218 | RVVCALL(OPIVX2, vclmulh_vx, OP_UUU_D, H8, H8, clmulh64) | ||
219 | GEN_VEXT_VX(vclmulh_vx, 8) | ||
220 | + | ||
221 | +RVVCALL(OPIVV2, vror_vv_b, OP_UUU_B, H1, H1, H1, ror8) | ||
222 | +RVVCALL(OPIVV2, vror_vv_h, OP_UUU_H, H2, H2, H2, ror16) | ||
223 | +RVVCALL(OPIVV2, vror_vv_w, OP_UUU_W, H4, H4, H4, ror32) | ||
224 | +RVVCALL(OPIVV2, vror_vv_d, OP_UUU_D, H8, H8, H8, ror64) | ||
225 | +GEN_VEXT_VV(vror_vv_b, 1) | ||
226 | +GEN_VEXT_VV(vror_vv_h, 2) | ||
227 | +GEN_VEXT_VV(vror_vv_w, 4) | ||
228 | +GEN_VEXT_VV(vror_vv_d, 8) | ||
229 | + | ||
230 | +RVVCALL(OPIVX2, vror_vx_b, OP_UUU_B, H1, H1, ror8) | ||
231 | +RVVCALL(OPIVX2, vror_vx_h, OP_UUU_H, H2, H2, ror16) | ||
232 | +RVVCALL(OPIVX2, vror_vx_w, OP_UUU_W, H4, H4, ror32) | ||
233 | +RVVCALL(OPIVX2, vror_vx_d, OP_UUU_D, H8, H8, ror64) | ||
234 | +GEN_VEXT_VX(vror_vx_b, 1) | ||
235 | +GEN_VEXT_VX(vror_vx_h, 2) | ||
236 | +GEN_VEXT_VX(vror_vx_w, 4) | ||
237 | +GEN_VEXT_VX(vror_vx_d, 8) | ||
238 | + | ||
239 | +RVVCALL(OPIVV2, vrol_vv_b, OP_UUU_B, H1, H1, H1, rol8) | ||
240 | +RVVCALL(OPIVV2, vrol_vv_h, OP_UUU_H, H2, H2, H2, rol16) | ||
241 | +RVVCALL(OPIVV2, vrol_vv_w, OP_UUU_W, H4, H4, H4, rol32) | ||
242 | +RVVCALL(OPIVV2, vrol_vv_d, OP_UUU_D, H8, H8, H8, rol64) | ||
243 | +GEN_VEXT_VV(vrol_vv_b, 1) | ||
244 | +GEN_VEXT_VV(vrol_vv_h, 2) | ||
245 | +GEN_VEXT_VV(vrol_vv_w, 4) | ||
246 | +GEN_VEXT_VV(vrol_vv_d, 8) | ||
247 | + | ||
248 | +RVVCALL(OPIVX2, vrol_vx_b, OP_UUU_B, H1, H1, rol8) | ||
249 | +RVVCALL(OPIVX2, vrol_vx_h, OP_UUU_H, H2, H2, rol16) | ||
250 | +RVVCALL(OPIVX2, vrol_vx_w, OP_UUU_W, H4, H4, rol32) | ||
251 | +RVVCALL(OPIVX2, vrol_vx_d, OP_UUU_D, H8, H8, rol64) | ||
252 | +GEN_VEXT_VX(vrol_vx_b, 1) | ||
253 | +GEN_VEXT_VX(vrol_vx_h, 2) | ||
254 | +GEN_VEXT_VX(vrol_vx_w, 4) | ||
255 | +GEN_VEXT_VX(vrol_vx_d, 8) | ||
256 | + | ||
257 | +static uint64_t brev8(uint64_t val) | ||
76 | +{ | 258 | +{ |
77 | + if (vext_check_isa_ill(s) && | 259 | + val = ((val & 0x5555555555555555ull) << 1) | |
78 | + vext_check_reg(s, a->rd, false) && | 260 | + ((val & 0xAAAAAAAAAAAAAAAAull) >> 1); |
79 | + vext_check_reg(s, a->rs1, false)) { | 261 | + val = ((val & 0x3333333333333333ull) << 2) | |
80 | + | 262 | + ((val & 0xCCCCCCCCCCCCCCCCull) >> 2); |
81 | + if (s->vl_eq_vlmax) { | 263 | + val = ((val & 0x0F0F0F0F0F0F0F0Full) << 4) | |
82 | + tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd), | 264 | + ((val & 0xF0F0F0F0F0F0F0F0ull) >> 4); |
83 | + vreg_ofs(s, a->rs1), | 265 | + |
84 | + MAXSZ(s), MAXSZ(s)); | 266 | + return val; |
85 | + } else { | ||
86 | + uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); | ||
87 | + static gen_helper_gvec_2_ptr * const fns[4] = { | ||
88 | + gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, | ||
89 | + gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, | ||
90 | + }; | ||
91 | + TCGLabel *over = gen_new_label(); | ||
92 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
93 | + | ||
94 | + tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), | ||
95 | + cpu_env, 0, s->vlen / 8, data, fns[s->sew]); | ||
96 | + gen_set_label(over); | ||
97 | + } | ||
98 | + return true; | ||
99 | + } | ||
100 | + return false; | ||
101 | +} | 267 | +} |
102 | + | 268 | + |
103 | +typedef void gen_helper_vmv_vx(TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32); | 269 | +RVVCALL(OPIVV1, vbrev8_v_b, OP_UU_B, H1, H1, brev8) |
104 | +static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) | 270 | +RVVCALL(OPIVV1, vbrev8_v_h, OP_UU_H, H2, H2, brev8) |
271 | +RVVCALL(OPIVV1, vbrev8_v_w, OP_UU_W, H4, H4, brev8) | ||
272 | +RVVCALL(OPIVV1, vbrev8_v_d, OP_UU_D, H8, H8, brev8) | ||
273 | +GEN_VEXT_V(vbrev8_v_b, 1) | ||
274 | +GEN_VEXT_V(vbrev8_v_h, 2) | ||
275 | +GEN_VEXT_V(vbrev8_v_w, 4) | ||
276 | +GEN_VEXT_V(vbrev8_v_d, 8) | ||
277 | + | ||
278 | +#define DO_IDENTITY(a) (a) | ||
279 | +RVVCALL(OPIVV1, vrev8_v_b, OP_UU_B, H1, H1, DO_IDENTITY) | ||
280 | +RVVCALL(OPIVV1, vrev8_v_h, OP_UU_H, H2, H2, bswap16) | ||
281 | +RVVCALL(OPIVV1, vrev8_v_w, OP_UU_W, H4, H4, bswap32) | ||
282 | +RVVCALL(OPIVV1, vrev8_v_d, OP_UU_D, H8, H8, bswap64) | ||
283 | +GEN_VEXT_V(vrev8_v_b, 1) | ||
284 | +GEN_VEXT_V(vrev8_v_h, 2) | ||
285 | +GEN_VEXT_V(vrev8_v_w, 4) | ||
286 | +GEN_VEXT_V(vrev8_v_d, 8) | ||
287 | + | ||
288 | +#define DO_ANDN(a, b) ((a) & ~(b)) | ||
289 | +RVVCALL(OPIVV2, vandn_vv_b, OP_UUU_B, H1, H1, H1, DO_ANDN) | ||
290 | +RVVCALL(OPIVV2, vandn_vv_h, OP_UUU_H, H2, H2, H2, DO_ANDN) | ||
291 | +RVVCALL(OPIVV2, vandn_vv_w, OP_UUU_W, H4, H4, H4, DO_ANDN) | ||
292 | +RVVCALL(OPIVV2, vandn_vv_d, OP_UUU_D, H8, H8, H8, DO_ANDN) | ||
293 | +GEN_VEXT_VV(vandn_vv_b, 1) | ||
294 | +GEN_VEXT_VV(vandn_vv_h, 2) | ||
295 | +GEN_VEXT_VV(vandn_vv_w, 4) | ||
296 | +GEN_VEXT_VV(vandn_vv_d, 8) | ||
297 | + | ||
298 | +RVVCALL(OPIVX2, vandn_vx_b, OP_UUU_B, H1, H1, DO_ANDN) | ||
299 | +RVVCALL(OPIVX2, vandn_vx_h, OP_UUU_H, H2, H2, DO_ANDN) | ||
300 | +RVVCALL(OPIVX2, vandn_vx_w, OP_UUU_W, H4, H4, DO_ANDN) | ||
301 | +RVVCALL(OPIVX2, vandn_vx_d, OP_UUU_D, H8, H8, DO_ANDN) | ||
302 | +GEN_VEXT_VX(vandn_vx_b, 1) | ||
303 | +GEN_VEXT_VX(vandn_vx_h, 2) | ||
304 | +GEN_VEXT_VX(vandn_vx_w, 4) | ||
305 | +GEN_VEXT_VX(vandn_vx_d, 8) | ||
306 | + | ||
307 | +RVVCALL(OPIVV1, vbrev_v_b, OP_UU_B, H1, H1, revbit8) | ||
308 | +RVVCALL(OPIVV1, vbrev_v_h, OP_UU_H, H2, H2, revbit16) | ||
309 | +RVVCALL(OPIVV1, vbrev_v_w, OP_UU_W, H4, H4, revbit32) | ||
310 | +RVVCALL(OPIVV1, vbrev_v_d, OP_UU_D, H8, H8, revbit64) | ||
311 | +GEN_VEXT_V(vbrev_v_b, 1) | ||
312 | +GEN_VEXT_V(vbrev_v_h, 2) | ||
313 | +GEN_VEXT_V(vbrev_v_w, 4) | ||
314 | +GEN_VEXT_V(vbrev_v_d, 8) | ||
315 | + | ||
316 | +RVVCALL(OPIVV1, vclz_v_b, OP_UU_B, H1, H1, clz8) | ||
317 | +RVVCALL(OPIVV1, vclz_v_h, OP_UU_H, H2, H2, clz16) | ||
318 | +RVVCALL(OPIVV1, vclz_v_w, OP_UU_W, H4, H4, clz32) | ||
319 | +RVVCALL(OPIVV1, vclz_v_d, OP_UU_D, H8, H8, clz64) | ||
320 | +GEN_VEXT_V(vclz_v_b, 1) | ||
321 | +GEN_VEXT_V(vclz_v_h, 2) | ||
322 | +GEN_VEXT_V(vclz_v_w, 4) | ||
323 | +GEN_VEXT_V(vclz_v_d, 8) | ||
324 | + | ||
325 | +RVVCALL(OPIVV1, vctz_v_b, OP_UU_B, H1, H1, ctz8) | ||
326 | +RVVCALL(OPIVV1, vctz_v_h, OP_UU_H, H2, H2, ctz16) | ||
327 | +RVVCALL(OPIVV1, vctz_v_w, OP_UU_W, H4, H4, ctz32) | ||
328 | +RVVCALL(OPIVV1, vctz_v_d, OP_UU_D, H8, H8, ctz64) | ||
329 | +GEN_VEXT_V(vctz_v_b, 1) | ||
330 | +GEN_VEXT_V(vctz_v_h, 2) | ||
331 | +GEN_VEXT_V(vctz_v_w, 4) | ||
332 | +GEN_VEXT_V(vctz_v_d, 8) | ||
333 | + | ||
334 | +RVVCALL(OPIVV1, vcpop_v_b, OP_UU_B, H1, H1, ctpop8) | ||
335 | +RVVCALL(OPIVV1, vcpop_v_h, OP_UU_H, H2, H2, ctpop16) | ||
336 | +RVVCALL(OPIVV1, vcpop_v_w, OP_UU_W, H4, H4, ctpop32) | ||
337 | +RVVCALL(OPIVV1, vcpop_v_d, OP_UU_D, H8, H8, ctpop64) | ||
338 | +GEN_VEXT_V(vcpop_v_b, 1) | ||
339 | +GEN_VEXT_V(vcpop_v_h, 2) | ||
340 | +GEN_VEXT_V(vcpop_v_w, 4) | ||
341 | +GEN_VEXT_V(vcpop_v_d, 8) | ||
342 | + | ||
343 | +#define DO_SLL(N, M) (N << (M & (sizeof(N) * 8 - 1))) | ||
344 | +RVVCALL(OPIVV2, vwsll_vv_b, WOP_UUU_B, H2, H1, H1, DO_SLL) | ||
345 | +RVVCALL(OPIVV2, vwsll_vv_h, WOP_UUU_H, H4, H2, H2, DO_SLL) | ||
346 | +RVVCALL(OPIVV2, vwsll_vv_w, WOP_UUU_W, H8, H4, H4, DO_SLL) | ||
347 | +GEN_VEXT_VV(vwsll_vv_b, 2) | ||
348 | +GEN_VEXT_VV(vwsll_vv_h, 4) | ||
349 | +GEN_VEXT_VV(vwsll_vv_w, 8) | ||
350 | + | ||
351 | +RVVCALL(OPIVX2, vwsll_vx_b, WOP_UUU_B, H2, H1, DO_SLL) | ||
352 | +RVVCALL(OPIVX2, vwsll_vx_h, WOP_UUU_H, H4, H2, DO_SLL) | ||
353 | +RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4, DO_SLL) | ||
354 | +GEN_VEXT_VX(vwsll_vx_b, 2) | ||
355 | +GEN_VEXT_VX(vwsll_vx_h, 4) | ||
356 | +GEN_VEXT_VX(vwsll_vx_w, 8) | ||
357 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
358 | index XXXXXXX..XXXXXXX 100644 | ||
359 | --- a/target/riscv/insn_trans/trans_rvvk.c.inc | ||
360 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
361 | @@ -XXX,XX +XXX,XX @@ static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a) | ||
362 | |||
363 | GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check) | ||
364 | GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check) | ||
365 | + | ||
366 | +/* | ||
367 | + * Zvbb | ||
368 | + */ | ||
369 | + | ||
370 | +#define GEN_OPIVI_GVEC_TRANS_CHECK(NAME, IMM_MODE, OPIVX, SUF, CHECK) \ | ||
371 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
372 | + { \ | ||
373 | + if (CHECK(s, a)) { \ | ||
374 | + static gen_helper_opivx *const fns[4] = { \ | ||
375 | + gen_helper_##OPIVX##_b, \ | ||
376 | + gen_helper_##OPIVX##_h, \ | ||
377 | + gen_helper_##OPIVX##_w, \ | ||
378 | + gen_helper_##OPIVX##_d, \ | ||
379 | + }; \ | ||
380 | + return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew], \ | ||
381 | + IMM_MODE); \ | ||
382 | + } \ | ||
383 | + return false; \ | ||
384 | + } | ||
385 | + | ||
386 | +#define GEN_OPIVV_GVEC_TRANS_CHECK(NAME, SUF, CHECK) \ | ||
387 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
388 | + { \ | ||
389 | + if (CHECK(s, a)) { \ | ||
390 | + static gen_helper_gvec_4_ptr *const fns[4] = { \ | ||
391 | + gen_helper_##NAME##_b, \ | ||
392 | + gen_helper_##NAME##_h, \ | ||
393 | + gen_helper_##NAME##_w, \ | ||
394 | + gen_helper_##NAME##_d, \ | ||
395 | + }; \ | ||
396 | + return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ | ||
397 | + } \ | ||
398 | + return false; \ | ||
399 | + } | ||
400 | + | ||
401 | +#define GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(NAME, SUF, CHECK) \ | ||
402 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
403 | + { \ | ||
404 | + if (CHECK(s, a)) { \ | ||
405 | + static gen_helper_opivx *const fns[4] = { \ | ||
406 | + gen_helper_##NAME##_b, \ | ||
407 | + gen_helper_##NAME##_h, \ | ||
408 | + gen_helper_##NAME##_w, \ | ||
409 | + gen_helper_##NAME##_d, \ | ||
410 | + }; \ | ||
411 | + return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, \ | ||
412 | + fns[s->sew]); \ | ||
413 | + } \ | ||
414 | + return false; \ | ||
415 | + } | ||
416 | + | ||
417 | +static bool zvbb_vv_check(DisasContext *s, arg_rmrr *a) | ||
105 | +{ | 418 | +{ |
106 | + if (vext_check_isa_ill(s) && | 419 | + return opivv_check(s, a) && s->cfg_ptr->ext_zvbb == true; |
107 | + vext_check_reg(s, a->rd, false)) { | ||
108 | + | ||
109 | + TCGv s1; | ||
110 | + TCGLabel *over = gen_new_label(); | ||
111 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
112 | + | ||
113 | + s1 = tcg_temp_new(); | ||
114 | + gen_get_gpr(s1, a->rs1); | ||
115 | + | ||
116 | + if (s->vl_eq_vlmax) { | ||
117 | + tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), | ||
118 | + MAXSZ(s), MAXSZ(s), s1); | ||
119 | + } else { | ||
120 | + TCGv_i32 desc ; | ||
121 | + TCGv_i64 s1_i64 = tcg_temp_new_i64(); | ||
122 | + TCGv_ptr dest = tcg_temp_new_ptr(); | ||
123 | + uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); | ||
124 | + static gen_helper_vmv_vx * const fns[4] = { | ||
125 | + gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, | ||
126 | + gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, | ||
127 | + }; | ||
128 | + | ||
129 | + tcg_gen_ext_tl_i64(s1_i64, s1); | ||
130 | + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); | ||
131 | + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd)); | ||
132 | + fns[s->sew](dest, s1_i64, cpu_env, desc); | ||
133 | + | ||
134 | + tcg_temp_free_ptr(dest); | ||
135 | + tcg_temp_free_i32(desc); | ||
136 | + tcg_temp_free_i64(s1_i64); | ||
137 | + } | ||
138 | + | ||
139 | + tcg_temp_free(s1); | ||
140 | + gen_set_label(over); | ||
141 | + return true; | ||
142 | + } | ||
143 | + return false; | ||
144 | +} | 420 | +} |
145 | + | 421 | + |
146 | +static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) | 422 | +static bool zvbb_vx_check(DisasContext *s, arg_rmrr *a) |
147 | +{ | 423 | +{ |
148 | + if (vext_check_isa_ill(s) && | 424 | + return opivx_check(s, a) && s->cfg_ptr->ext_zvbb == true; |
149 | + vext_check_reg(s, a->rd, false)) { | ||
150 | + | ||
151 | + int64_t simm = sextract64(a->rs1, 0, 5); | ||
152 | + if (s->vl_eq_vlmax) { | ||
153 | + tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd), | ||
154 | + MAXSZ(s), MAXSZ(s), simm); | ||
155 | + } else { | ||
156 | + TCGv_i32 desc; | ||
157 | + TCGv_i64 s1; | ||
158 | + TCGv_ptr dest; | ||
159 | + uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); | ||
160 | + static gen_helper_vmv_vx * const fns[4] = { | ||
161 | + gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, | ||
162 | + gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, | ||
163 | + }; | ||
164 | + TCGLabel *over = gen_new_label(); | ||
165 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
166 | + | ||
167 | + s1 = tcg_const_i64(simm); | ||
168 | + dest = tcg_temp_new_ptr(); | ||
169 | + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); | ||
170 | + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd)); | ||
171 | + fns[s->sew](dest, s1, cpu_env, desc); | ||
172 | + | ||
173 | + tcg_temp_free_ptr(dest); | ||
174 | + tcg_temp_free_i32(desc); | ||
175 | + tcg_temp_free_i64(s1); | ||
176 | + gen_set_label(over); | ||
177 | + } | ||
178 | + return true; | ||
179 | + } | ||
180 | + return false; | ||
181 | +} | 425 | +} |
182 | + | 426 | + |
183 | +GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check) | 427 | +/* vrol.v[vx] */ |
184 | +GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check) | 428 | +GEN_OPIVV_GVEC_TRANS_CHECK(vrol_vv, rotlv, zvbb_vv_check) |
185 | +GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vadc_check) | 429 | +GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vrol_vx, rotls, zvbb_vx_check) |
186 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 430 | + |
187 | index XXXXXXX..XXXXXXX 100644 | 431 | +/* vror.v[vxi] */ |
188 | --- a/target/riscv/vector_helper.c | 432 | +GEN_OPIVV_GVEC_TRANS_CHECK(vror_vv, rotrv, zvbb_vv_check) |
189 | +++ b/target/riscv/vector_helper.c | 433 | +GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vror_vx, rotrs, zvbb_vx_check) |
190 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq) | 434 | +GEN_OPIVI_GVEC_TRANS_CHECK(vror_vi, IMM_TRUNC_SEW, vror_vx, rotri, zvbb_vx_check) |
191 | GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh) | 435 | + |
192 | GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl) | 436 | +#define GEN_OPIVX_GVEC_TRANS_CHECK(NAME, SUF, CHECK) \ |
193 | GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq) | 437 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
194 | + | 438 | + { \ |
195 | +/* Vector Integer Merge and Move Instructions */ | 439 | + if (CHECK(s, a)) { \ |
196 | +#define GEN_VEXT_VMV_VV(NAME, ETYPE, H, CLEAR_FN) \ | 440 | + static gen_helper_opivx *const fns[4] = { \ |
197 | +void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ | 441 | + gen_helper_##NAME##_b, \ |
198 | + uint32_t desc) \ | 442 | + gen_helper_##NAME##_h, \ |
199 | +{ \ | 443 | + gen_helper_##NAME##_w, \ |
200 | + uint32_t vl = env->vl; \ | 444 | + gen_helper_##NAME##_d, \ |
201 | + uint32_t esz = sizeof(ETYPE); \ | 445 | + }; \ |
202 | + uint32_t vlmax = vext_maxsz(desc) / esz; \ | 446 | + return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ |
203 | + uint32_t i; \ | 447 | + } \ |
204 | + \ | 448 | + return false; \ |
205 | + for (i = 0; i < vl; i++) { \ | 449 | + } |
206 | + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ | 450 | + |
207 | + *((ETYPE *)vd + H(i)) = s1; \ | 451 | +/* vandn.v[vx] */ |
208 | + } \ | 452 | +GEN_OPIVV_GVEC_TRANS_CHECK(vandn_vv, andc, zvbb_vv_check) |
209 | + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ | 453 | +GEN_OPIVX_GVEC_TRANS_CHECK(vandn_vx, andcs, zvbb_vx_check) |
454 | + | ||
455 | +#define GEN_OPIV_TRANS(NAME, CHECK) \ | ||
456 | + static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
457 | + { \ | ||
458 | + if (CHECK(s, a)) { \ | ||
459 | + uint32_t data = 0; \ | ||
460 | + static gen_helper_gvec_3_ptr *const fns[4] = { \ | ||
461 | + gen_helper_##NAME##_b, \ | ||
462 | + gen_helper_##NAME##_h, \ | ||
463 | + gen_helper_##NAME##_w, \ | ||
464 | + gen_helper_##NAME##_d, \ | ||
465 | + }; \ | ||
466 | + TCGLabel *over = gen_new_label(); \ | ||
467 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
468 | + \ | ||
469 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
470 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
471 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
472 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ | ||
473 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ | ||
474 | + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
475 | + vreg_ofs(s, a->rs2), cpu_env, \ | ||
476 | + s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, \ | ||
477 | + data, fns[s->sew]); \ | ||
478 | + mark_vs_dirty(s); \ | ||
479 | + gen_set_label(over); \ | ||
480 | + return true; \ | ||
481 | + } \ | ||
482 | + return false; \ | ||
483 | + } | ||
484 | + | ||
485 | +static bool zvbb_opiv_check(DisasContext *s, arg_rmr *a) | ||
486 | +{ | ||
487 | + return s->cfg_ptr->ext_zvbb == true && | ||
488 | + require_rvv(s) && | ||
489 | + vext_check_isa_ill(s) && | ||
490 | + vext_check_ss(s, a->rd, a->rs2, a->vm); | ||
210 | +} | 491 | +} |
211 | + | 492 | + |
212 | +GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1, clearb) | 493 | +GEN_OPIV_TRANS(vbrev8_v, zvbb_opiv_check) |
213 | +GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2, clearh) | 494 | +GEN_OPIV_TRANS(vrev8_v, zvbb_opiv_check) |
214 | +GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4, clearl) | 495 | +GEN_OPIV_TRANS(vbrev_v, zvbb_opiv_check) |
215 | +GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8, clearq) | 496 | +GEN_OPIV_TRANS(vclz_v, zvbb_opiv_check) |
216 | + | 497 | +GEN_OPIV_TRANS(vctz_v, zvbb_opiv_check) |
217 | +#define GEN_VEXT_VMV_VX(NAME, ETYPE, H, CLEAR_FN) \ | 498 | +GEN_OPIV_TRANS(vcpop_v, zvbb_opiv_check) |
218 | +void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ | 499 | + |
219 | + uint32_t desc) \ | 500 | +static bool vwsll_vv_check(DisasContext *s, arg_rmrr *a) |
220 | +{ \ | 501 | +{ |
221 | + uint32_t vl = env->vl; \ | 502 | + return s->cfg_ptr->ext_zvbb && opivv_widen_check(s, a); |
222 | + uint32_t esz = sizeof(ETYPE); \ | ||
223 | + uint32_t vlmax = vext_maxsz(desc) / esz; \ | ||
224 | + uint32_t i; \ | ||
225 | + \ | ||
226 | + for (i = 0; i < vl; i++) { \ | ||
227 | + *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ | ||
228 | + } \ | ||
229 | + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ | ||
230 | +} | 503 | +} |
231 | + | 504 | + |
232 | +GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1, clearb) | 505 | +static bool vwsll_vx_check(DisasContext *s, arg_rmrr *a) |
233 | +GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2, clearh) | 506 | +{ |
234 | +GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4, clearl) | 507 | + return s->cfg_ptr->ext_zvbb && opivx_widen_check(s, a); |
235 | +GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8, clearq) | ||
236 | + | ||
237 | +#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H, CLEAR_FN) \ | ||
238 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
239 | + CPURISCVState *env, uint32_t desc) \ | ||
240 | +{ \ | ||
241 | + uint32_t mlen = vext_mlen(desc); \ | ||
242 | + uint32_t vl = env->vl; \ | ||
243 | + uint32_t esz = sizeof(ETYPE); \ | ||
244 | + uint32_t vlmax = vext_maxsz(desc) / esz; \ | ||
245 | + uint32_t i; \ | ||
246 | + \ | ||
247 | + for (i = 0; i < vl; i++) { \ | ||
248 | + ETYPE *vt = (!vext_elem_mask(v0, mlen, i) ? vs2 : vs1); \ | ||
249 | + *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ | ||
250 | + } \ | ||
251 | + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ | ||
252 | +} | 508 | +} |
253 | + | 509 | + |
254 | +GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1, clearb) | 510 | +/* OPIVI without GVEC IR */ |
255 | +GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2, clearh) | 511 | +#define GEN_OPIVI_WIDEN_TRANS(NAME, IMM_MODE, OPIVX, CHECK) \ |
256 | +GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4, clearl) | 512 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
257 | +GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8, clearq) | 513 | + { \ |
258 | + | 514 | + if (CHECK(s, a)) { \ |
259 | +#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H, CLEAR_FN) \ | 515 | + static gen_helper_opivx *const fns[3] = { \ |
260 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | 516 | + gen_helper_##OPIVX##_b, \ |
261 | + void *vs2, CPURISCVState *env, uint32_t desc) \ | 517 | + gen_helper_##OPIVX##_h, \ |
262 | +{ \ | 518 | + gen_helper_##OPIVX##_w, \ |
263 | + uint32_t mlen = vext_mlen(desc); \ | 519 | + }; \ |
264 | + uint32_t vl = env->vl; \ | 520 | + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, \ |
265 | + uint32_t esz = sizeof(ETYPE); \ | 521 | + IMM_MODE); \ |
266 | + uint32_t vlmax = vext_maxsz(desc) / esz; \ | 522 | + } \ |
267 | + uint32_t i; \ | 523 | + return false; \ |
268 | + \ | 524 | + } |
269 | + for (i = 0; i < vl; i++) { \ | 525 | + |
270 | + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ | 526 | +GEN_OPIVV_WIDEN_TRANS(vwsll_vv, vwsll_vv_check) |
271 | + ETYPE d = (!vext_elem_mask(v0, mlen, i) ? s2 : \ | 527 | +GEN_OPIVX_WIDEN_TRANS(vwsll_vx, vwsll_vx_check) |
272 | + (ETYPE)(target_long)s1); \ | 528 | +GEN_OPIVI_WIDEN_TRANS(vwsll_vi, IMM_ZX, vwsll_vx, vwsll_vx_check) |
273 | + *((ETYPE *)vd + H(i)) = d; \ | ||
274 | + } \ | ||
275 | + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ | ||
276 | +} | ||
277 | + | ||
278 | +GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1, clearb) | ||
279 | +GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh) | ||
280 | +GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl) | ||
281 | +GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq) | ||
282 | -- | 529 | -- |
283 | 2.27.0 | 530 | 2.41.0 |
284 | |||
285 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Nazar Kazakov <nazar.kazakov@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Vector strided operations access the first memory element at the base address, | 3 | This commit adds support for the Zvkned vector-crypto extension, which |
4 | and then access subsequent elements at address increments given by the byte | 4 | consists of the following instructions: |
5 | offset contained in the x register specified by rs2. | ||
6 | 5 | ||
7 | Vector unit-stride operations access elements stored contiguously in memory | 6 | * vaesef.[vv,vs] |
8 | starting from the base effective address. It can been seen as a special | 7 | * vaesdf.[vv,vs] |
9 | case of strided operations. | 8 | * vaesdm.[vv,vs] |
9 | * vaesz.vs | ||
10 | * vaesem.[vv,vs] | ||
11 | * vaeskf1.vi | ||
12 | * vaeskf2.vi | ||
10 | 13 | ||
11 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 14 | Translation functions are defined in |
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 15 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in |
13 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 16 | `target/riscv/vcrypto_helper.c`. |
14 | Message-id: 20200623215920.2594-7-zhiwei_liu@c-sky.com | 17 | |
18 | Co-authored-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
19 | Co-authored-by: William Salmon <will.salmon@codethink.co.uk> | ||
20 | [max.chou@sifive.com: Replaced vstart checking by TCG op] | ||
21 | Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
22 | Signed-off-by: William Salmon <will.salmon@codethink.co.uk> | ||
23 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
24 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
25 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
26 | [max.chou@sifive.com: Imported aes-round.h and exposed x-zvkned | ||
27 | property] | ||
28 | [max.chou@sifive.com: Fixed endian issues and replaced the vstart & vl | ||
29 | egs checking by helper function] | ||
30 | [max.chou@sifive.com: Replaced bswap32 calls in aes key expanding] | ||
31 | Message-ID: <20230711165917.2629866-10-max.chou@sifive.com> | ||
15 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 32 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
16 | --- | 33 | --- |
17 | target/riscv/helper.h | 105 ++++++ | 34 | target/riscv/cpu_cfg.h | 1 + |
18 | target/riscv/internals.h | 5 + | 35 | target/riscv/helper.h | 14 ++ |
19 | target/riscv/insn32.decode | 32 ++ | 36 | target/riscv/insn32.decode | 14 ++ |
20 | target/riscv/insn_trans/trans_rvv.inc.c | 355 ++++++++++++++++++++ | 37 | target/riscv/cpu.c | 4 +- |
21 | target/riscv/translate.c | 7 + | 38 | target/riscv/vcrypto_helper.c | 202 +++++++++++++++++++++++ |
22 | target/riscv/vector_helper.c | 410 ++++++++++++++++++++++++ | 39 | target/riscv/insn_trans/trans_rvvk.c.inc | 147 +++++++++++++++++ |
23 | 6 files changed, 914 insertions(+) | 40 | 6 files changed, 381 insertions(+), 1 deletion(-) |
24 | 41 | ||
42 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/riscv/cpu_cfg.h | ||
45 | +++ b/target/riscv/cpu_cfg.h | ||
46 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
47 | bool ext_zve64d; | ||
48 | bool ext_zvbb; | ||
49 | bool ext_zvbc; | ||
50 | + bool ext_zvkned; | ||
51 | bool ext_zmmul; | ||
52 | bool ext_zvfbfmin; | ||
53 | bool ext_zvfbfwma; | ||
25 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 54 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h |
26 | index XXXXXXX..XXXXXXX 100644 | 55 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/target/riscv/helper.h | 56 | --- a/target/riscv/helper.h |
28 | +++ b/target/riscv/helper.h | 57 | +++ b/target/riscv/helper.h |
29 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_1(hyp_tlb_flush, void, env) | 58 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vandn_vx_b, void, ptr, ptr, tl, ptr, env, i32) |
30 | 59 | DEF_HELPER_6(vandn_vx_h, void, ptr, ptr, tl, ptr, env, i32) | |
31 | /* Vector functions */ | 60 | DEF_HELPER_6(vandn_vx_w, void, ptr, ptr, tl, ptr, env, i32) |
32 | DEF_HELPER_3(vsetvl, tl, env, tl, tl) | 61 | DEF_HELPER_6(vandn_vx_d, void, ptr, ptr, tl, ptr, env, i32) |
33 | +DEF_HELPER_5(vlb_v_b, void, ptr, ptr, tl, env, i32) | 62 | + |
34 | +DEF_HELPER_5(vlb_v_b_mask, void, ptr, ptr, tl, env, i32) | 63 | +DEF_HELPER_2(egs_check, void, i32, env) |
35 | +DEF_HELPER_5(vlb_v_h, void, ptr, ptr, tl, env, i32) | 64 | + |
36 | +DEF_HELPER_5(vlb_v_h_mask, void, ptr, ptr, tl, env, i32) | 65 | +DEF_HELPER_4(vaesef_vv, void, ptr, ptr, env, i32) |
37 | +DEF_HELPER_5(vlb_v_w, void, ptr, ptr, tl, env, i32) | 66 | +DEF_HELPER_4(vaesef_vs, void, ptr, ptr, env, i32) |
38 | +DEF_HELPER_5(vlb_v_w_mask, void, ptr, ptr, tl, env, i32) | 67 | +DEF_HELPER_4(vaesdf_vv, void, ptr, ptr, env, i32) |
39 | +DEF_HELPER_5(vlb_v_d, void, ptr, ptr, tl, env, i32) | 68 | +DEF_HELPER_4(vaesdf_vs, void, ptr, ptr, env, i32) |
40 | +DEF_HELPER_5(vlb_v_d_mask, void, ptr, ptr, tl, env, i32) | 69 | +DEF_HELPER_4(vaesem_vv, void, ptr, ptr, env, i32) |
41 | +DEF_HELPER_5(vlh_v_h, void, ptr, ptr, tl, env, i32) | 70 | +DEF_HELPER_4(vaesem_vs, void, ptr, ptr, env, i32) |
42 | +DEF_HELPER_5(vlh_v_h_mask, void, ptr, ptr, tl, env, i32) | 71 | +DEF_HELPER_4(vaesdm_vv, void, ptr, ptr, env, i32) |
43 | +DEF_HELPER_5(vlh_v_w, void, ptr, ptr, tl, env, i32) | 72 | +DEF_HELPER_4(vaesdm_vs, void, ptr, ptr, env, i32) |
44 | +DEF_HELPER_5(vlh_v_w_mask, void, ptr, ptr, tl, env, i32) | 73 | +DEF_HELPER_4(vaesz_vs, void, ptr, ptr, env, i32) |
45 | +DEF_HELPER_5(vlh_v_d, void, ptr, ptr, tl, env, i32) | 74 | +DEF_HELPER_5(vaeskf1_vi, void, ptr, ptr, i32, env, i32) |
46 | +DEF_HELPER_5(vlh_v_d_mask, void, ptr, ptr, tl, env, i32) | 75 | +DEF_HELPER_5(vaeskf2_vi, void, ptr, ptr, i32, env, i32) |
47 | +DEF_HELPER_5(vlw_v_w, void, ptr, ptr, tl, env, i32) | ||
48 | +DEF_HELPER_5(vlw_v_w_mask, void, ptr, ptr, tl, env, i32) | ||
49 | +DEF_HELPER_5(vlw_v_d, void, ptr, ptr, tl, env, i32) | ||
50 | +DEF_HELPER_5(vlw_v_d_mask, void, ptr, ptr, tl, env, i32) | ||
51 | +DEF_HELPER_5(vle_v_b, void, ptr, ptr, tl, env, i32) | ||
52 | +DEF_HELPER_5(vle_v_b_mask, void, ptr, ptr, tl, env, i32) | ||
53 | +DEF_HELPER_5(vle_v_h, void, ptr, ptr, tl, env, i32) | ||
54 | +DEF_HELPER_5(vle_v_h_mask, void, ptr, ptr, tl, env, i32) | ||
55 | +DEF_HELPER_5(vle_v_w, void, ptr, ptr, tl, env, i32) | ||
56 | +DEF_HELPER_5(vle_v_w_mask, void, ptr, ptr, tl, env, i32) | ||
57 | +DEF_HELPER_5(vle_v_d, void, ptr, ptr, tl, env, i32) | ||
58 | +DEF_HELPER_5(vle_v_d_mask, void, ptr, ptr, tl, env, i32) | ||
59 | +DEF_HELPER_5(vlbu_v_b, void, ptr, ptr, tl, env, i32) | ||
60 | +DEF_HELPER_5(vlbu_v_b_mask, void, ptr, ptr, tl, env, i32) | ||
61 | +DEF_HELPER_5(vlbu_v_h, void, ptr, ptr, tl, env, i32) | ||
62 | +DEF_HELPER_5(vlbu_v_h_mask, void, ptr, ptr, tl, env, i32) | ||
63 | +DEF_HELPER_5(vlbu_v_w, void, ptr, ptr, tl, env, i32) | ||
64 | +DEF_HELPER_5(vlbu_v_w_mask, void, ptr, ptr, tl, env, i32) | ||
65 | +DEF_HELPER_5(vlbu_v_d, void, ptr, ptr, tl, env, i32) | ||
66 | +DEF_HELPER_5(vlbu_v_d_mask, void, ptr, ptr, tl, env, i32) | ||
67 | +DEF_HELPER_5(vlhu_v_h, void, ptr, ptr, tl, env, i32) | ||
68 | +DEF_HELPER_5(vlhu_v_h_mask, void, ptr, ptr, tl, env, i32) | ||
69 | +DEF_HELPER_5(vlhu_v_w, void, ptr, ptr, tl, env, i32) | ||
70 | +DEF_HELPER_5(vlhu_v_w_mask, void, ptr, ptr, tl, env, i32) | ||
71 | +DEF_HELPER_5(vlhu_v_d, void, ptr, ptr, tl, env, i32) | ||
72 | +DEF_HELPER_5(vlhu_v_d_mask, void, ptr, ptr, tl, env, i32) | ||
73 | +DEF_HELPER_5(vlwu_v_w, void, ptr, ptr, tl, env, i32) | ||
74 | +DEF_HELPER_5(vlwu_v_w_mask, void, ptr, ptr, tl, env, i32) | ||
75 | +DEF_HELPER_5(vlwu_v_d, void, ptr, ptr, tl, env, i32) | ||
76 | +DEF_HELPER_5(vlwu_v_d_mask, void, ptr, ptr, tl, env, i32) | ||
77 | +DEF_HELPER_5(vsb_v_b, void, ptr, ptr, tl, env, i32) | ||
78 | +DEF_HELPER_5(vsb_v_b_mask, void, ptr, ptr, tl, env, i32) | ||
79 | +DEF_HELPER_5(vsb_v_h, void, ptr, ptr, tl, env, i32) | ||
80 | +DEF_HELPER_5(vsb_v_h_mask, void, ptr, ptr, tl, env, i32) | ||
81 | +DEF_HELPER_5(vsb_v_w, void, ptr, ptr, tl, env, i32) | ||
82 | +DEF_HELPER_5(vsb_v_w_mask, void, ptr, ptr, tl, env, i32) | ||
83 | +DEF_HELPER_5(vsb_v_d, void, ptr, ptr, tl, env, i32) | ||
84 | +DEF_HELPER_5(vsb_v_d_mask, void, ptr, ptr, tl, env, i32) | ||
85 | +DEF_HELPER_5(vsh_v_h, void, ptr, ptr, tl, env, i32) | ||
86 | +DEF_HELPER_5(vsh_v_h_mask, void, ptr, ptr, tl, env, i32) | ||
87 | +DEF_HELPER_5(vsh_v_w, void, ptr, ptr, tl, env, i32) | ||
88 | +DEF_HELPER_5(vsh_v_w_mask, void, ptr, ptr, tl, env, i32) | ||
89 | +DEF_HELPER_5(vsh_v_d, void, ptr, ptr, tl, env, i32) | ||
90 | +DEF_HELPER_5(vsh_v_d_mask, void, ptr, ptr, tl, env, i32) | ||
91 | +DEF_HELPER_5(vsw_v_w, void, ptr, ptr, tl, env, i32) | ||
92 | +DEF_HELPER_5(vsw_v_w_mask, void, ptr, ptr, tl, env, i32) | ||
93 | +DEF_HELPER_5(vsw_v_d, void, ptr, ptr, tl, env, i32) | ||
94 | +DEF_HELPER_5(vsw_v_d_mask, void, ptr, ptr, tl, env, i32) | ||
95 | +DEF_HELPER_5(vse_v_b, void, ptr, ptr, tl, env, i32) | ||
96 | +DEF_HELPER_5(vse_v_b_mask, void, ptr, ptr, tl, env, i32) | ||
97 | +DEF_HELPER_5(vse_v_h, void, ptr, ptr, tl, env, i32) | ||
98 | +DEF_HELPER_5(vse_v_h_mask, void, ptr, ptr, tl, env, i32) | ||
99 | +DEF_HELPER_5(vse_v_w, void, ptr, ptr, tl, env, i32) | ||
100 | +DEF_HELPER_5(vse_v_w_mask, void, ptr, ptr, tl, env, i32) | ||
101 | +DEF_HELPER_5(vse_v_d, void, ptr, ptr, tl, env, i32) | ||
102 | +DEF_HELPER_5(vse_v_d_mask, void, ptr, ptr, tl, env, i32) | ||
103 | +DEF_HELPER_6(vlsb_v_b, void, ptr, ptr, tl, tl, env, i32) | ||
104 | +DEF_HELPER_6(vlsb_v_h, void, ptr, ptr, tl, tl, env, i32) | ||
105 | +DEF_HELPER_6(vlsb_v_w, void, ptr, ptr, tl, tl, env, i32) | ||
106 | +DEF_HELPER_6(vlsb_v_d, void, ptr, ptr, tl, tl, env, i32) | ||
107 | +DEF_HELPER_6(vlsh_v_h, void, ptr, ptr, tl, tl, env, i32) | ||
108 | +DEF_HELPER_6(vlsh_v_w, void, ptr, ptr, tl, tl, env, i32) | ||
109 | +DEF_HELPER_6(vlsh_v_d, void, ptr, ptr, tl, tl, env, i32) | ||
110 | +DEF_HELPER_6(vlsw_v_w, void, ptr, ptr, tl, tl, env, i32) | ||
111 | +DEF_HELPER_6(vlsw_v_d, void, ptr, ptr, tl, tl, env, i32) | ||
112 | +DEF_HELPER_6(vlse_v_b, void, ptr, ptr, tl, tl, env, i32) | ||
113 | +DEF_HELPER_6(vlse_v_h, void, ptr, ptr, tl, tl, env, i32) | ||
114 | +DEF_HELPER_6(vlse_v_w, void, ptr, ptr, tl, tl, env, i32) | ||
115 | +DEF_HELPER_6(vlse_v_d, void, ptr, ptr, tl, tl, env, i32) | ||
116 | +DEF_HELPER_6(vlsbu_v_b, void, ptr, ptr, tl, tl, env, i32) | ||
117 | +DEF_HELPER_6(vlsbu_v_h, void, ptr, ptr, tl, tl, env, i32) | ||
118 | +DEF_HELPER_6(vlsbu_v_w, void, ptr, ptr, tl, tl, env, i32) | ||
119 | +DEF_HELPER_6(vlsbu_v_d, void, ptr, ptr, tl, tl, env, i32) | ||
120 | +DEF_HELPER_6(vlshu_v_h, void, ptr, ptr, tl, tl, env, i32) | ||
121 | +DEF_HELPER_6(vlshu_v_w, void, ptr, ptr, tl, tl, env, i32) | ||
122 | +DEF_HELPER_6(vlshu_v_d, void, ptr, ptr, tl, tl, env, i32) | ||
123 | +DEF_HELPER_6(vlswu_v_w, void, ptr, ptr, tl, tl, env, i32) | ||
124 | +DEF_HELPER_6(vlswu_v_d, void, ptr, ptr, tl, tl, env, i32) | ||
125 | +DEF_HELPER_6(vssb_v_b, void, ptr, ptr, tl, tl, env, i32) | ||
126 | +DEF_HELPER_6(vssb_v_h, void, ptr, ptr, tl, tl, env, i32) | ||
127 | +DEF_HELPER_6(vssb_v_w, void, ptr, ptr, tl, tl, env, i32) | ||
128 | +DEF_HELPER_6(vssb_v_d, void, ptr, ptr, tl, tl, env, i32) | ||
129 | +DEF_HELPER_6(vssh_v_h, void, ptr, ptr, tl, tl, env, i32) | ||
130 | +DEF_HELPER_6(vssh_v_w, void, ptr, ptr, tl, tl, env, i32) | ||
131 | +DEF_HELPER_6(vssh_v_d, void, ptr, ptr, tl, tl, env, i32) | ||
132 | +DEF_HELPER_6(vssw_v_w, void, ptr, ptr, tl, tl, env, i32) | ||
133 | +DEF_HELPER_6(vssw_v_d, void, ptr, ptr, tl, tl, env, i32) | ||
134 | +DEF_HELPER_6(vsse_v_b, void, ptr, ptr, tl, tl, env, i32) | ||
135 | +DEF_HELPER_6(vsse_v_h, void, ptr, ptr, tl, tl, env, i32) | ||
136 | +DEF_HELPER_6(vsse_v_w, void, ptr, ptr, tl, tl, env, i32) | ||
137 | +DEF_HELPER_6(vsse_v_d, void, ptr, ptr, tl, tl, env, i32) | ||
138 | diff --git a/target/riscv/internals.h b/target/riscv/internals.h | ||
139 | index XXXXXXX..XXXXXXX 100644 | ||
140 | --- a/target/riscv/internals.h | ||
141 | +++ b/target/riscv/internals.h | ||
142 | @@ -XXX,XX +XXX,XX @@ | ||
143 | |||
144 | #include "hw/registerfields.h" | ||
145 | |||
146 | +/* share data between vector helpers and decode code */ | ||
147 | +FIELD(VDATA, MLEN, 0, 8) | ||
148 | +FIELD(VDATA, VM, 8, 1) | ||
149 | +FIELD(VDATA, LMUL, 9, 2) | ||
150 | +FIELD(VDATA, NF, 11, 4) | ||
151 | #endif | ||
152 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 76 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode |
153 | index XXXXXXX..XXXXXXX 100644 | 77 | index XXXXXXX..XXXXXXX 100644 |
154 | --- a/target/riscv/insn32.decode | 78 | --- a/target/riscv/insn32.decode |
155 | +++ b/target/riscv/insn32.decode | 79 | +++ b/target/riscv/insn32.decode |
156 | @@ -XXX,XX +XXX,XX @@ | 80 | @@ -XXX,XX +XXX,XX @@ |
157 | %sh10 20:10 | ||
158 | %csr 20:12 | ||
159 | %rm 12:3 | ||
160 | +%nf 29:3 !function=ex_plus_1 | ||
161 | |||
162 | # immediates: | ||
163 | %imm_i 20:s12 | ||
164 | @@ -XXX,XX +XXX,XX @@ | ||
165 | &u imm rd | ||
166 | &shift shamt rs1 rd | ||
167 | &atomic aq rl rs2 rs1 rd | ||
168 | +&r2nfvm vm rd rs1 nf | ||
169 | +&rnfvm vm rd rs1 rs2 nf | ||
170 | |||
171 | # Formats 32: | ||
172 | @r ....... ..... ..... ... ..... ....... &r %rs2 %rs1 %rd | ||
173 | @@ -XXX,XX +XXX,XX @@ | ||
174 | @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd | 81 | @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd |
175 | @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd | 82 | @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd |
176 | @r2 ....... ..... ..... ... ..... ....... %rs1 %rd | 83 | @r2 ....... ..... ..... ... ..... ....... &r2 %rs1 %rd |
177 | +@r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd | 84 | +@r2_vm_1 ...... . ..... ..... ... ..... ....... &rmr vm=1 %rs2 %rd |
178 | +@r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd | 85 | @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd |
179 | @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd | 86 | @r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd |
180 | 87 | @r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd | |
181 | @hfence_gvma ....... ..... ..... ... ..... ....... %rs2 %rs1 | 88 | @@ -XXX,XX +XXX,XX @@ vcpop_v 010010 . ..... 01110 010 ..... 1010111 @r2_vm |
182 | @@ -XXX,XX +XXX,XX @@ hfence_gvma 0110001 ..... ..... 000 00000 1110011 @hfence_gvma | 89 | vwsll_vv 110101 . ..... ..... 000 ..... 1010111 @r_vm |
183 | hfence_vvma 0010001 ..... ..... 000 00000 1110011 @hfence_vvma | 90 | vwsll_vx 110101 . ..... ..... 100 ..... 1010111 @r_vm |
184 | 91 | vwsll_vi 110101 . ..... ..... 011 ..... 1010111 @r_vm | |
185 | # *** RV32V Extension *** | 92 | + |
186 | + | 93 | +# *** Zvkned vector crypto extension *** |
187 | +# *** Vector loads and stores are encoded within LOADFP/STORE-FP *** | 94 | +vaesef_vv 101000 1 ..... 00011 010 ..... 1110111 @r2_vm_1 |
188 | +vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm | 95 | +vaesef_vs 101001 1 ..... 00011 010 ..... 1110111 @r2_vm_1 |
189 | +vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm | 96 | +vaesdf_vv 101000 1 ..... 00001 010 ..... 1110111 @r2_vm_1 |
190 | +vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm | 97 | +vaesdf_vs 101001 1 ..... 00001 010 ..... 1110111 @r2_vm_1 |
191 | +vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm | 98 | +vaesem_vv 101000 1 ..... 00010 010 ..... 1110111 @r2_vm_1 |
192 | +vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm | 99 | +vaesem_vs 101001 1 ..... 00010 010 ..... 1110111 @r2_vm_1 |
193 | +vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm | 100 | +vaesdm_vv 101000 1 ..... 00000 010 ..... 1110111 @r2_vm_1 |
194 | +vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm | 101 | +vaesdm_vs 101001 1 ..... 00000 010 ..... 1110111 @r2_vm_1 |
195 | +vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm | 102 | +vaesz_vs 101001 1 ..... 00111 010 ..... 1110111 @r2_vm_1 |
196 | +vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm | 103 | +vaeskf1_vi 100010 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
197 | +vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm | 104 | +vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
198 | +vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm | 105 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
199 | + | ||
200 | +vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm | ||
201 | +vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm | ||
202 | +vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm | ||
203 | +vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm | ||
204 | +vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm | ||
205 | +vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm | ||
206 | +vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm | ||
207 | +vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm | ||
208 | +vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm | ||
209 | +vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm | ||
210 | +vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm | ||
211 | + | ||
212 | +# *** new major opcode OP-V *** | ||
213 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
214 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
215 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
216 | index XXXXXXX..XXXXXXX 100644 | 106 | index XXXXXXX..XXXXXXX 100644 |
217 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 107 | --- a/target/riscv/cpu.c |
218 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 108 | +++ b/target/riscv/cpu.c |
109 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { | ||
110 | ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma), | ||
111 | ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh), | ||
112 | ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin), | ||
113 | + ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), | ||
114 | ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), | ||
115 | ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), | ||
116 | ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia), | ||
117 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
118 | * In principle Zve*x would also suffice here, were they supported | ||
119 | * in qemu | ||
120 | */ | ||
121 | - if (cpu->cfg.ext_zvbb && !cpu->cfg.ext_zve32f) { | ||
122 | + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned) && !cpu->cfg.ext_zve32f) { | ||
123 | error_setg(errp, | ||
124 | "Vector crypto extensions require V or Zve* extensions"); | ||
125 | return; | ||
126 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
127 | /* Vector cryptography extensions */ | ||
128 | DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), | ||
129 | DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), | ||
130 | + DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), | ||
131 | |||
132 | DEFINE_PROP_END_OF_LIST(), | ||
133 | }; | ||
134 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c | ||
135 | index XXXXXXX..XXXXXXX 100644 | ||
136 | --- a/target/riscv/vcrypto_helper.c | ||
137 | +++ b/target/riscv/vcrypto_helper.c | ||
219 | @@ -XXX,XX +XXX,XX @@ | 138 | @@ -XXX,XX +XXX,XX @@ |
220 | * You should have received a copy of the GNU General Public License along with | 139 | #include "qemu/bitops.h" |
221 | * this program. If not, see <http://www.gnu.org/licenses/>. | 140 | #include "qemu/bswap.h" |
222 | */ | ||
223 | +#include "tcg/tcg-op-gvec.h" | ||
224 | +#include "tcg/tcg-gvec-desc.h" | ||
225 | +#include "internals.h" | ||
226 | |||
227 | static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a) | ||
228 | { | ||
229 | @@ -XXX,XX +XXX,XX @@ static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a) | ||
230 | tcg_temp_free(dst); | ||
231 | return true; | ||
232 | } | ||
233 | + | ||
234 | +/* vector register offset from env */ | ||
235 | +static uint32_t vreg_ofs(DisasContext *s, int reg) | ||
236 | +{ | ||
237 | + return offsetof(CPURISCVState, vreg) + reg * s->vlen / 8; | ||
238 | +} | ||
239 | + | ||
240 | +/* check functions */ | ||
241 | + | ||
242 | +/* | ||
243 | + * In cpu_get_tb_cpu_state(), set VILL if RVV was not present. | ||
244 | + * So RVV is also be checked in this function. | ||
245 | + */ | ||
246 | +static bool vext_check_isa_ill(DisasContext *s) | ||
247 | +{ | ||
248 | + return !s->vill; | ||
249 | +} | ||
250 | + | ||
251 | +/* | ||
252 | + * There are two rules check here. | ||
253 | + * | ||
254 | + * 1. Vector register numbers are multiples of LMUL. (Section 3.2) | ||
255 | + * | ||
256 | + * 2. For all widening instructions, the destination LMUL value must also be | ||
257 | + * a supported LMUL value. (Section 11.2) | ||
258 | + */ | ||
259 | +static bool vext_check_reg(DisasContext *s, uint32_t reg, bool widen) | ||
260 | +{ | ||
261 | + /* | ||
262 | + * The destination vector register group results are arranged as if both | ||
263 | + * SEW and LMUL were at twice their current settings. (Section 11.2). | ||
264 | + */ | ||
265 | + int legal = widen ? 2 << s->lmul : 1 << s->lmul; | ||
266 | + | ||
267 | + return !((s->lmul == 0x3 && widen) || (reg % legal)); | ||
268 | +} | ||
269 | + | ||
270 | +/* | ||
271 | + * There are two rules check here. | ||
272 | + * | ||
273 | + * 1. The destination vector register group for a masked vector instruction can | ||
274 | + * only overlap the source mask register (v0) when LMUL=1. (Section 5.3) | ||
275 | + * | ||
276 | + * 2. In widen instructions and some other insturctions, like vslideup.vx, | ||
277 | + * there is no need to check whether LMUL=1. | ||
278 | + */ | ||
279 | +static bool vext_check_overlap_mask(DisasContext *s, uint32_t vd, bool vm, | ||
280 | + bool force) | ||
281 | +{ | ||
282 | + return (vm != 0 || vd != 0) || (!force && (s->lmul == 0)); | ||
283 | +} | ||
284 | + | ||
285 | +/* The LMUL setting must be such that LMUL * NFIELDS <= 8. (Section 7.8) */ | ||
286 | +static bool vext_check_nf(DisasContext *s, uint32_t nf) | ||
287 | +{ | ||
288 | + return (1 << s->lmul) * nf <= 8; | ||
289 | +} | ||
290 | + | ||
291 | +/* common translation macro */ | ||
292 | +#define GEN_VEXT_TRANS(NAME, SEQ, ARGTYPE, OP, CHECK) \ | ||
293 | +static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE *a)\ | ||
294 | +{ \ | ||
295 | + if (CHECK(s, a)) { \ | ||
296 | + return OP(s, a, SEQ); \ | ||
297 | + } \ | ||
298 | + return false; \ | ||
299 | +} | ||
300 | + | ||
301 | +/* | ||
302 | + *** unit stride load and store | ||
303 | + */ | ||
304 | +typedef void gen_helper_ldst_us(TCGv_ptr, TCGv_ptr, TCGv, | ||
305 | + TCGv_env, TCGv_i32); | ||
306 | + | ||
307 | +static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data, | ||
308 | + gen_helper_ldst_us *fn, DisasContext *s) | ||
309 | +{ | ||
310 | + TCGv_ptr dest, mask; | ||
311 | + TCGv base; | ||
312 | + TCGv_i32 desc; | ||
313 | + | ||
314 | + TCGLabel *over = gen_new_label(); | ||
315 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
316 | + | ||
317 | + dest = tcg_temp_new_ptr(); | ||
318 | + mask = tcg_temp_new_ptr(); | ||
319 | + base = tcg_temp_new(); | ||
320 | + | ||
321 | + /* | ||
322 | + * As simd_desc supports at most 256 bytes, and in this implementation, | ||
323 | + * the max vector group length is 2048 bytes. So split it into two parts. | ||
324 | + * | ||
325 | + * The first part is vlen in bytes, encoded in maxsz of simd_desc. | ||
326 | + * The second part is lmul, encoded in data of simd_desc. | ||
327 | + */ | ||
328 | + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); | ||
329 | + | ||
330 | + gen_get_gpr(base, rs1); | ||
331 | + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); | ||
332 | + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); | ||
333 | + | ||
334 | + fn(dest, mask, base, cpu_env, desc); | ||
335 | + | ||
336 | + tcg_temp_free_ptr(dest); | ||
337 | + tcg_temp_free_ptr(mask); | ||
338 | + tcg_temp_free(base); | ||
339 | + tcg_temp_free_i32(desc); | ||
340 | + gen_set_label(over); | ||
341 | + return true; | ||
342 | +} | ||
343 | + | ||
344 | +static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq) | ||
345 | +{ | ||
346 | + uint32_t data = 0; | ||
347 | + gen_helper_ldst_us *fn; | ||
348 | + static gen_helper_ldst_us * const fns[2][7][4] = { | ||
349 | + /* masked unit stride load */ | ||
350 | + { { gen_helper_vlb_v_b_mask, gen_helper_vlb_v_h_mask, | ||
351 | + gen_helper_vlb_v_w_mask, gen_helper_vlb_v_d_mask }, | ||
352 | + { NULL, gen_helper_vlh_v_h_mask, | ||
353 | + gen_helper_vlh_v_w_mask, gen_helper_vlh_v_d_mask }, | ||
354 | + { NULL, NULL, | ||
355 | + gen_helper_vlw_v_w_mask, gen_helper_vlw_v_d_mask }, | ||
356 | + { gen_helper_vle_v_b_mask, gen_helper_vle_v_h_mask, | ||
357 | + gen_helper_vle_v_w_mask, gen_helper_vle_v_d_mask }, | ||
358 | + { gen_helper_vlbu_v_b_mask, gen_helper_vlbu_v_h_mask, | ||
359 | + gen_helper_vlbu_v_w_mask, gen_helper_vlbu_v_d_mask }, | ||
360 | + { NULL, gen_helper_vlhu_v_h_mask, | ||
361 | + gen_helper_vlhu_v_w_mask, gen_helper_vlhu_v_d_mask }, | ||
362 | + { NULL, NULL, | ||
363 | + gen_helper_vlwu_v_w_mask, gen_helper_vlwu_v_d_mask } }, | ||
364 | + /* unmasked unit stride load */ | ||
365 | + { { gen_helper_vlb_v_b, gen_helper_vlb_v_h, | ||
366 | + gen_helper_vlb_v_w, gen_helper_vlb_v_d }, | ||
367 | + { NULL, gen_helper_vlh_v_h, | ||
368 | + gen_helper_vlh_v_w, gen_helper_vlh_v_d }, | ||
369 | + { NULL, NULL, | ||
370 | + gen_helper_vlw_v_w, gen_helper_vlw_v_d }, | ||
371 | + { gen_helper_vle_v_b, gen_helper_vle_v_h, | ||
372 | + gen_helper_vle_v_w, gen_helper_vle_v_d }, | ||
373 | + { gen_helper_vlbu_v_b, gen_helper_vlbu_v_h, | ||
374 | + gen_helper_vlbu_v_w, gen_helper_vlbu_v_d }, | ||
375 | + { NULL, gen_helper_vlhu_v_h, | ||
376 | + gen_helper_vlhu_v_w, gen_helper_vlhu_v_d }, | ||
377 | + { NULL, NULL, | ||
378 | + gen_helper_vlwu_v_w, gen_helper_vlwu_v_d } } | ||
379 | + }; | ||
380 | + | ||
381 | + fn = fns[a->vm][seq][s->sew]; | ||
382 | + if (fn == NULL) { | ||
383 | + return false; | ||
384 | + } | ||
385 | + | ||
386 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | ||
387 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
388 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
389 | + data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
390 | + return ldst_us_trans(a->rd, a->rs1, data, fn, s); | ||
391 | +} | ||
392 | + | ||
393 | +static bool ld_us_check(DisasContext *s, arg_r2nfvm* a) | ||
394 | +{ | ||
395 | + return (vext_check_isa_ill(s) && | ||
396 | + vext_check_overlap_mask(s, a->rd, a->vm, false) && | ||
397 | + vext_check_reg(s, a->rd, false) && | ||
398 | + vext_check_nf(s, a->nf)); | ||
399 | +} | ||
400 | + | ||
401 | +GEN_VEXT_TRANS(vlb_v, 0, r2nfvm, ld_us_op, ld_us_check) | ||
402 | +GEN_VEXT_TRANS(vlh_v, 1, r2nfvm, ld_us_op, ld_us_check) | ||
403 | +GEN_VEXT_TRANS(vlw_v, 2, r2nfvm, ld_us_op, ld_us_check) | ||
404 | +GEN_VEXT_TRANS(vle_v, 3, r2nfvm, ld_us_op, ld_us_check) | ||
405 | +GEN_VEXT_TRANS(vlbu_v, 4, r2nfvm, ld_us_op, ld_us_check) | ||
406 | +GEN_VEXT_TRANS(vlhu_v, 5, r2nfvm, ld_us_op, ld_us_check) | ||
407 | +GEN_VEXT_TRANS(vlwu_v, 6, r2nfvm, ld_us_op, ld_us_check) | ||
408 | + | ||
409 | +static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq) | ||
410 | +{ | ||
411 | + uint32_t data = 0; | ||
412 | + gen_helper_ldst_us *fn; | ||
413 | + static gen_helper_ldst_us * const fns[2][4][4] = { | ||
414 | + /* masked unit stride load and store */ | ||
415 | + { { gen_helper_vsb_v_b_mask, gen_helper_vsb_v_h_mask, | ||
416 | + gen_helper_vsb_v_w_mask, gen_helper_vsb_v_d_mask }, | ||
417 | + { NULL, gen_helper_vsh_v_h_mask, | ||
418 | + gen_helper_vsh_v_w_mask, gen_helper_vsh_v_d_mask }, | ||
419 | + { NULL, NULL, | ||
420 | + gen_helper_vsw_v_w_mask, gen_helper_vsw_v_d_mask }, | ||
421 | + { gen_helper_vse_v_b_mask, gen_helper_vse_v_h_mask, | ||
422 | + gen_helper_vse_v_w_mask, gen_helper_vse_v_d_mask } }, | ||
423 | + /* unmasked unit stride store */ | ||
424 | + { { gen_helper_vsb_v_b, gen_helper_vsb_v_h, | ||
425 | + gen_helper_vsb_v_w, gen_helper_vsb_v_d }, | ||
426 | + { NULL, gen_helper_vsh_v_h, | ||
427 | + gen_helper_vsh_v_w, gen_helper_vsh_v_d }, | ||
428 | + { NULL, NULL, | ||
429 | + gen_helper_vsw_v_w, gen_helper_vsw_v_d }, | ||
430 | + { gen_helper_vse_v_b, gen_helper_vse_v_h, | ||
431 | + gen_helper_vse_v_w, gen_helper_vse_v_d } } | ||
432 | + }; | ||
433 | + | ||
434 | + fn = fns[a->vm][seq][s->sew]; | ||
435 | + if (fn == NULL) { | ||
436 | + return false; | ||
437 | + } | ||
438 | + | ||
439 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | ||
440 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
441 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
442 | + data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
443 | + return ldst_us_trans(a->rd, a->rs1, data, fn, s); | ||
444 | +} | ||
445 | + | ||
446 | +static bool st_us_check(DisasContext *s, arg_r2nfvm* a) | ||
447 | +{ | ||
448 | + return (vext_check_isa_ill(s) && | ||
449 | + vext_check_reg(s, a->rd, false) && | ||
450 | + vext_check_nf(s, a->nf)); | ||
451 | +} | ||
452 | + | ||
453 | +GEN_VEXT_TRANS(vsb_v, 0, r2nfvm, st_us_op, st_us_check) | ||
454 | +GEN_VEXT_TRANS(vsh_v, 1, r2nfvm, st_us_op, st_us_check) | ||
455 | +GEN_VEXT_TRANS(vsw_v, 2, r2nfvm, st_us_op, st_us_check) | ||
456 | +GEN_VEXT_TRANS(vse_v, 3, r2nfvm, st_us_op, st_us_check) | ||
457 | + | ||
458 | +/* | ||
459 | + *** stride load and store | ||
460 | + */ | ||
461 | +typedef void gen_helper_ldst_stride(TCGv_ptr, TCGv_ptr, TCGv, | ||
462 | + TCGv, TCGv_env, TCGv_i32); | ||
463 | + | ||
464 | +static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, | ||
465 | + uint32_t data, gen_helper_ldst_stride *fn, | ||
466 | + DisasContext *s) | ||
467 | +{ | ||
468 | + TCGv_ptr dest, mask; | ||
469 | + TCGv base, stride; | ||
470 | + TCGv_i32 desc; | ||
471 | + | ||
472 | + TCGLabel *over = gen_new_label(); | ||
473 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
474 | + | ||
475 | + dest = tcg_temp_new_ptr(); | ||
476 | + mask = tcg_temp_new_ptr(); | ||
477 | + base = tcg_temp_new(); | ||
478 | + stride = tcg_temp_new(); | ||
479 | + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); | ||
480 | + | ||
481 | + gen_get_gpr(base, rs1); | ||
482 | + gen_get_gpr(stride, rs2); | ||
483 | + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); | ||
484 | + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); | ||
485 | + | ||
486 | + fn(dest, mask, base, stride, cpu_env, desc); | ||
487 | + | ||
488 | + tcg_temp_free_ptr(dest); | ||
489 | + tcg_temp_free_ptr(mask); | ||
490 | + tcg_temp_free(base); | ||
491 | + tcg_temp_free(stride); | ||
492 | + tcg_temp_free_i32(desc); | ||
493 | + gen_set_label(over); | ||
494 | + return true; | ||
495 | +} | ||
496 | + | ||
497 | +static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) | ||
498 | +{ | ||
499 | + uint32_t data = 0; | ||
500 | + gen_helper_ldst_stride *fn; | ||
501 | + static gen_helper_ldst_stride * const fns[7][4] = { | ||
502 | + { gen_helper_vlsb_v_b, gen_helper_vlsb_v_h, | ||
503 | + gen_helper_vlsb_v_w, gen_helper_vlsb_v_d }, | ||
504 | + { NULL, gen_helper_vlsh_v_h, | ||
505 | + gen_helper_vlsh_v_w, gen_helper_vlsh_v_d }, | ||
506 | + { NULL, NULL, | ||
507 | + gen_helper_vlsw_v_w, gen_helper_vlsw_v_d }, | ||
508 | + { gen_helper_vlse_v_b, gen_helper_vlse_v_h, | ||
509 | + gen_helper_vlse_v_w, gen_helper_vlse_v_d }, | ||
510 | + { gen_helper_vlsbu_v_b, gen_helper_vlsbu_v_h, | ||
511 | + gen_helper_vlsbu_v_w, gen_helper_vlsbu_v_d }, | ||
512 | + { NULL, gen_helper_vlshu_v_h, | ||
513 | + gen_helper_vlshu_v_w, gen_helper_vlshu_v_d }, | ||
514 | + { NULL, NULL, | ||
515 | + gen_helper_vlswu_v_w, gen_helper_vlswu_v_d }, | ||
516 | + }; | ||
517 | + | ||
518 | + fn = fns[seq][s->sew]; | ||
519 | + if (fn == NULL) { | ||
520 | + return false; | ||
521 | + } | ||
522 | + | ||
523 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | ||
524 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
525 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
526 | + data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
527 | + return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s); | ||
528 | +} | ||
529 | + | ||
530 | +static bool ld_stride_check(DisasContext *s, arg_rnfvm* a) | ||
531 | +{ | ||
532 | + return (vext_check_isa_ill(s) && | ||
533 | + vext_check_overlap_mask(s, a->rd, a->vm, false) && | ||
534 | + vext_check_reg(s, a->rd, false) && | ||
535 | + vext_check_nf(s, a->nf)); | ||
536 | +} | ||
537 | + | ||
538 | +GEN_VEXT_TRANS(vlsb_v, 0, rnfvm, ld_stride_op, ld_stride_check) | ||
539 | +GEN_VEXT_TRANS(vlsh_v, 1, rnfvm, ld_stride_op, ld_stride_check) | ||
540 | +GEN_VEXT_TRANS(vlsw_v, 2, rnfvm, ld_stride_op, ld_stride_check) | ||
541 | +GEN_VEXT_TRANS(vlse_v, 3, rnfvm, ld_stride_op, ld_stride_check) | ||
542 | +GEN_VEXT_TRANS(vlsbu_v, 4, rnfvm, ld_stride_op, ld_stride_check) | ||
543 | +GEN_VEXT_TRANS(vlshu_v, 5, rnfvm, ld_stride_op, ld_stride_check) | ||
544 | +GEN_VEXT_TRANS(vlswu_v, 6, rnfvm, ld_stride_op, ld_stride_check) | ||
545 | + | ||
546 | +static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) | ||
547 | +{ | ||
548 | + uint32_t data = 0; | ||
549 | + gen_helper_ldst_stride *fn; | ||
550 | + static gen_helper_ldst_stride * const fns[4][4] = { | ||
551 | + /* masked stride store */ | ||
552 | + { gen_helper_vssb_v_b, gen_helper_vssb_v_h, | ||
553 | + gen_helper_vssb_v_w, gen_helper_vssb_v_d }, | ||
554 | + { NULL, gen_helper_vssh_v_h, | ||
555 | + gen_helper_vssh_v_w, gen_helper_vssh_v_d }, | ||
556 | + { NULL, NULL, | ||
557 | + gen_helper_vssw_v_w, gen_helper_vssw_v_d }, | ||
558 | + { gen_helper_vsse_v_b, gen_helper_vsse_v_h, | ||
559 | + gen_helper_vsse_v_w, gen_helper_vsse_v_d } | ||
560 | + }; | ||
561 | + | ||
562 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | ||
563 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
564 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
565 | + data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
566 | + fn = fns[seq][s->sew]; | ||
567 | + if (fn == NULL) { | ||
568 | + return false; | ||
569 | + } | ||
570 | + | ||
571 | + return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s); | ||
572 | +} | ||
573 | + | ||
574 | +static bool st_stride_check(DisasContext *s, arg_rnfvm* a) | ||
575 | +{ | ||
576 | + return (vext_check_isa_ill(s) && | ||
577 | + vext_check_reg(s, a->rd, false) && | ||
578 | + vext_check_nf(s, a->nf)); | ||
579 | +} | ||
580 | + | ||
581 | +GEN_VEXT_TRANS(vssb_v, 0, rnfvm, st_stride_op, st_stride_check) | ||
582 | +GEN_VEXT_TRANS(vssh_v, 1, rnfvm, st_stride_op, st_stride_check) | ||
583 | +GEN_VEXT_TRANS(vssw_v, 2, rnfvm, st_stride_op, st_stride_check) | ||
584 | +GEN_VEXT_TRANS(vsse_v, 3, rnfvm, st_stride_op, st_stride_check) | ||
585 | diff --git a/target/riscv/translate.c b/target/riscv/translate.c | ||
586 | index XXXXXXX..XXXXXXX 100644 | ||
587 | --- a/target/riscv/translate.c | ||
588 | +++ b/target/riscv/translate.c | ||
589 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
590 | uint8_t lmul; | ||
591 | uint8_t sew; | ||
592 | uint16_t vlen; | ||
593 | + uint16_t mlen; | ||
594 | bool vl_eq_vlmax; | ||
595 | } DisasContext; | ||
596 | |||
597 | @@ -XXX,XX +XXX,XX @@ static void decode_RV32_64C(DisasContext *ctx, uint16_t opcode) | ||
598 | } | ||
599 | } | ||
600 | |||
601 | +static int ex_plus_1(DisasContext *ctx, int nf) | ||
602 | +{ | ||
603 | + return nf + 1; | ||
604 | +} | ||
605 | + | ||
606 | #define EX_SH(amount) \ | ||
607 | static int ex_shift_##amount(DisasContext *ctx, int imm) \ | ||
608 | { \ | ||
609 | @@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) | ||
610 | ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL); | ||
611 | ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); | ||
612 | ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL); | ||
613 | + ctx->mlen = 1 << (ctx->sew + 3 - ctx->lmul); | ||
614 | ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); | ||
615 | } | ||
616 | |||
617 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
618 | index XXXXXXX..XXXXXXX 100644 | ||
619 | --- a/target/riscv/vector_helper.c | ||
620 | +++ b/target/riscv/vector_helper.c | ||
621 | @@ -XXX,XX +XXX,XX @@ | ||
622 | |||
623 | #include "qemu/osdep.h" | ||
624 | #include "cpu.h" | 141 | #include "cpu.h" |
625 | +#include "exec/memop.h" | 142 | +#include "crypto/aes.h" |
143 | +#include "crypto/aes-round.h" | ||
144 | #include "exec/memop.h" | ||
626 | #include "exec/exec-all.h" | 145 | #include "exec/exec-all.h" |
627 | #include "exec/helper-proto.h" | 146 | #include "exec/helper-proto.h" |
628 | +#include "tcg/tcg-gvec-desc.h" | 147 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4, DO_SLL) |
629 | +#include "internals.h" | 148 | GEN_VEXT_VX(vwsll_vx_b, 2) |
630 | #include <math.h> | 149 | GEN_VEXT_VX(vwsll_vx_h, 4) |
631 | 150 | GEN_VEXT_VX(vwsll_vx_w, 8) | |
632 | target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, | 151 | + |
633 | @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, | 152 | +void HELPER(egs_check)(uint32_t egs, CPURISCVState *env) |
634 | env->vstart = 0; | 153 | +{ |
635 | return vl; | 154 | + uint32_t vl = env->vl; |
636 | } | 155 | + uint32_t vstart = env->vstart; |
156 | + | ||
157 | + if (vl % egs != 0 || vstart % egs != 0) { | ||
158 | + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); | ||
159 | + } | ||
160 | +} | ||
161 | + | ||
162 | +static inline void xor_round_key(AESState *round_state, AESState *round_key) | ||
163 | +{ | ||
164 | + round_state->v = round_state->v ^ round_key->v; | ||
165 | +} | ||
166 | + | ||
167 | +#define GEN_ZVKNED_HELPER_VV(NAME, ...) \ | ||
168 | + void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ | ||
169 | + uint32_t desc) \ | ||
170 | + { \ | ||
171 | + uint32_t vl = env->vl; \ | ||
172 | + uint32_t total_elems = vext_get_total_elems(env, desc, 4); \ | ||
173 | + uint32_t vta = vext_vta(desc); \ | ||
174 | + \ | ||
175 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \ | ||
176 | + AESState round_key; \ | ||
177 | + round_key.d[0] = *((uint64_t *)vs2 + H8(i * 2 + 0)); \ | ||
178 | + round_key.d[1] = *((uint64_t *)vs2 + H8(i * 2 + 1)); \ | ||
179 | + AESState round_state; \ | ||
180 | + round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0)); \ | ||
181 | + round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1)); \ | ||
182 | + __VA_ARGS__; \ | ||
183 | + *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0]; \ | ||
184 | + *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1]; \ | ||
185 | + } \ | ||
186 | + env->vstart = 0; \ | ||
187 | + /* set tail elements to 1s */ \ | ||
188 | + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); \ | ||
189 | + } | ||
190 | + | ||
191 | +#define GEN_ZVKNED_HELPER_VS(NAME, ...) \ | ||
192 | + void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ | ||
193 | + uint32_t desc) \ | ||
194 | + { \ | ||
195 | + uint32_t vl = env->vl; \ | ||
196 | + uint32_t total_elems = vext_get_total_elems(env, desc, 4); \ | ||
197 | + uint32_t vta = vext_vta(desc); \ | ||
198 | + \ | ||
199 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \ | ||
200 | + AESState round_key; \ | ||
201 | + round_key.d[0] = *((uint64_t *)vs2 + H8(0)); \ | ||
202 | + round_key.d[1] = *((uint64_t *)vs2 + H8(1)); \ | ||
203 | + AESState round_state; \ | ||
204 | + round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0)); \ | ||
205 | + round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1)); \ | ||
206 | + __VA_ARGS__; \ | ||
207 | + *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0]; \ | ||
208 | + *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1]; \ | ||
209 | + } \ | ||
210 | + env->vstart = 0; \ | ||
211 | + /* set tail elements to 1s */ \ | ||
212 | + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); \ | ||
213 | + } | ||
214 | + | ||
215 | +GEN_ZVKNED_HELPER_VV(vaesef_vv, aesenc_SB_SR_AK(&round_state, | ||
216 | + &round_state, | ||
217 | + &round_key, | ||
218 | + false);) | ||
219 | +GEN_ZVKNED_HELPER_VS(vaesef_vs, aesenc_SB_SR_AK(&round_state, | ||
220 | + &round_state, | ||
221 | + &round_key, | ||
222 | + false);) | ||
223 | +GEN_ZVKNED_HELPER_VV(vaesdf_vv, aesdec_ISB_ISR_AK(&round_state, | ||
224 | + &round_state, | ||
225 | + &round_key, | ||
226 | + false);) | ||
227 | +GEN_ZVKNED_HELPER_VS(vaesdf_vs, aesdec_ISB_ISR_AK(&round_state, | ||
228 | + &round_state, | ||
229 | + &round_key, | ||
230 | + false);) | ||
231 | +GEN_ZVKNED_HELPER_VV(vaesem_vv, aesenc_SB_SR_MC_AK(&round_state, | ||
232 | + &round_state, | ||
233 | + &round_key, | ||
234 | + false);) | ||
235 | +GEN_ZVKNED_HELPER_VS(vaesem_vs, aesenc_SB_SR_MC_AK(&round_state, | ||
236 | + &round_state, | ||
237 | + &round_key, | ||
238 | + false);) | ||
239 | +GEN_ZVKNED_HELPER_VV(vaesdm_vv, aesdec_ISB_ISR_AK_IMC(&round_state, | ||
240 | + &round_state, | ||
241 | + &round_key, | ||
242 | + false);) | ||
243 | +GEN_ZVKNED_HELPER_VS(vaesdm_vs, aesdec_ISB_ISR_AK_IMC(&round_state, | ||
244 | + &round_state, | ||
245 | + &round_key, | ||
246 | + false);) | ||
247 | +GEN_ZVKNED_HELPER_VS(vaesz_vs, xor_round_key(&round_state, &round_key);) | ||
248 | + | ||
249 | +void HELPER(vaeskf1_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, | ||
250 | + CPURISCVState *env, uint32_t desc) | ||
251 | +{ | ||
252 | + uint32_t *vd = vd_vptr; | ||
253 | + uint32_t *vs2 = vs2_vptr; | ||
254 | + uint32_t vl = env->vl; | ||
255 | + uint32_t total_elems = vext_get_total_elems(env, desc, 4); | ||
256 | + uint32_t vta = vext_vta(desc); | ||
257 | + | ||
258 | + uimm &= 0b1111; | ||
259 | + if (uimm > 10 || uimm == 0) { | ||
260 | + uimm ^= 0b1000; | ||
261 | + } | ||
262 | + | ||
263 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
264 | + uint32_t rk[8], tmp; | ||
265 | + static const uint32_t rcon[] = { | ||
266 | + 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, | ||
267 | + 0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036, | ||
268 | + }; | ||
269 | + | ||
270 | + rk[0] = vs2[i * 4 + H4(0)]; | ||
271 | + rk[1] = vs2[i * 4 + H4(1)]; | ||
272 | + rk[2] = vs2[i * 4 + H4(2)]; | ||
273 | + rk[3] = vs2[i * 4 + H4(3)]; | ||
274 | + tmp = ror32(rk[3], 8); | ||
275 | + | ||
276 | + rk[4] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) | | ||
277 | + ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) | | ||
278 | + ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) | | ||
279 | + ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0)) | ||
280 | + ^ rcon[uimm - 1]; | ||
281 | + rk[5] = rk[1] ^ rk[4]; | ||
282 | + rk[6] = rk[2] ^ rk[5]; | ||
283 | + rk[7] = rk[3] ^ rk[6]; | ||
284 | + | ||
285 | + vd[i * 4 + H4(0)] = rk[4]; | ||
286 | + vd[i * 4 + H4(1)] = rk[5]; | ||
287 | + vd[i * 4 + H4(2)] = rk[6]; | ||
288 | + vd[i * 4 + H4(3)] = rk[7]; | ||
289 | + } | ||
290 | + env->vstart = 0; | ||
291 | + /* set tail elements to 1s */ | ||
292 | + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); | ||
293 | +} | ||
294 | + | ||
295 | +void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, | ||
296 | + CPURISCVState *env, uint32_t desc) | ||
297 | +{ | ||
298 | + uint32_t *vd = vd_vptr; | ||
299 | + uint32_t *vs2 = vs2_vptr; | ||
300 | + uint32_t vl = env->vl; | ||
301 | + uint32_t total_elems = vext_get_total_elems(env, desc, 4); | ||
302 | + uint32_t vta = vext_vta(desc); | ||
303 | + | ||
304 | + uimm &= 0b1111; | ||
305 | + if (uimm > 14 || uimm < 2) { | ||
306 | + uimm ^= 0b1000; | ||
307 | + } | ||
308 | + | ||
309 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
310 | + uint32_t rk[12], tmp; | ||
311 | + static const uint32_t rcon[] = { | ||
312 | + 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, | ||
313 | + 0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036, | ||
314 | + }; | ||
315 | + | ||
316 | + rk[0] = vd[i * 4 + H4(0)]; | ||
317 | + rk[1] = vd[i * 4 + H4(1)]; | ||
318 | + rk[2] = vd[i * 4 + H4(2)]; | ||
319 | + rk[3] = vd[i * 4 + H4(3)]; | ||
320 | + rk[4] = vs2[i * 4 + H4(0)]; | ||
321 | + rk[5] = vs2[i * 4 + H4(1)]; | ||
322 | + rk[6] = vs2[i * 4 + H4(2)]; | ||
323 | + rk[7] = vs2[i * 4 + H4(3)]; | ||
324 | + | ||
325 | + if (uimm % 2 == 0) { | ||
326 | + tmp = ror32(rk[7], 8); | ||
327 | + rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) | | ||
328 | + ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) | | ||
329 | + ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) | | ||
330 | + ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0)) | ||
331 | + ^ rcon[(uimm - 1) / 2]; | ||
332 | + } else { | ||
333 | + rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(rk[7] >> 24) & 0xff] << 24) | | ||
334 | + ((uint32_t)AES_sbox[(rk[7] >> 16) & 0xff] << 16) | | ||
335 | + ((uint32_t)AES_sbox[(rk[7] >> 8) & 0xff] << 8) | | ||
336 | + ((uint32_t)AES_sbox[(rk[7] >> 0) & 0xff] << 0)); | ||
337 | + } | ||
338 | + rk[9] = rk[1] ^ rk[8]; | ||
339 | + rk[10] = rk[2] ^ rk[9]; | ||
340 | + rk[11] = rk[3] ^ rk[10]; | ||
341 | + | ||
342 | + vd[i * 4 + H4(0)] = rk[8]; | ||
343 | + vd[i * 4 + H4(1)] = rk[9]; | ||
344 | + vd[i * 4 + H4(2)] = rk[10]; | ||
345 | + vd[i * 4 + H4(3)] = rk[11]; | ||
346 | + } | ||
347 | + env->vstart = 0; | ||
348 | + /* set tail elements to 1s */ | ||
349 | + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); | ||
350 | +} | ||
351 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
352 | index XXXXXXX..XXXXXXX 100644 | ||
353 | --- a/target/riscv/insn_trans/trans_rvvk.c.inc | ||
354 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
355 | @@ -XXX,XX +XXX,XX @@ static bool vwsll_vx_check(DisasContext *s, arg_rmrr *a) | ||
356 | GEN_OPIVV_WIDEN_TRANS(vwsll_vv, vwsll_vv_check) | ||
357 | GEN_OPIVX_WIDEN_TRANS(vwsll_vx, vwsll_vx_check) | ||
358 | GEN_OPIVI_WIDEN_TRANS(vwsll_vi, IMM_ZX, vwsll_vx, vwsll_vx_check) | ||
637 | + | 359 | + |
638 | +/* | 360 | +/* |
639 | + * Note that vector data is stored in host-endian 64-bit chunks, | 361 | + * Zvkned |
640 | + * so addressing units smaller than that needs a host-endian fixup. | ||
641 | + */ | 362 | + */ |
642 | +#ifdef HOST_WORDS_BIGENDIAN | 363 | + |
643 | +#define H1(x) ((x) ^ 7) | 364 | +#define ZVKNED_EGS 4 |
644 | +#define H1_2(x) ((x) ^ 6) | 365 | + |
645 | +#define H1_4(x) ((x) ^ 4) | 366 | +#define GEN_V_UNMASKED_TRANS(NAME, CHECK, EGS) \ |
646 | +#define H2(x) ((x) ^ 3) | 367 | + static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ |
647 | +#define H4(x) ((x) ^ 1) | 368 | + { \ |
648 | +#define H8(x) ((x)) | 369 | + if (CHECK(s, a)) { \ |
649 | +#else | 370 | + TCGv_ptr rd_v, rs2_v; \ |
650 | +#define H1(x) (x) | 371 | + TCGv_i32 desc, egs; \ |
651 | +#define H1_2(x) (x) | 372 | + uint32_t data = 0; \ |
652 | +#define H1_4(x) (x) | 373 | + TCGLabel *over = gen_new_label(); \ |
653 | +#define H2(x) (x) | 374 | + \ |
654 | +#define H4(x) (x) | 375 | + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ |
655 | +#define H8(x) (x) | 376 | + /* save opcode for unwinding in case we throw an exception */ \ |
656 | +#endif | 377 | + decode_save_opc(s); \ |
657 | + | 378 | + egs = tcg_constant_i32(EGS); \ |
658 | +static inline uint32_t vext_nf(uint32_t desc) | 379 | + gen_helper_egs_check(egs, cpu_env); \ |
659 | +{ | 380 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ |
660 | + return FIELD_EX32(simd_data(desc), VDATA, NF); | 381 | + } \ |
661 | +} | 382 | + \ |
662 | + | 383 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ |
663 | +static inline uint32_t vext_mlen(uint32_t desc) | 384 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ |
664 | +{ | 385 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ |
665 | + return FIELD_EX32(simd_data(desc), VDATA, MLEN); | 386 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ |
666 | +} | 387 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ |
667 | + | 388 | + rd_v = tcg_temp_new_ptr(); \ |
668 | +static inline uint32_t vext_vm(uint32_t desc) | 389 | + rs2_v = tcg_temp_new_ptr(); \ |
669 | +{ | 390 | + desc = tcg_constant_i32( \ |
670 | + return FIELD_EX32(simd_data(desc), VDATA, VM); | 391 | + simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \ |
671 | +} | 392 | + tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd)); \ |
672 | + | 393 | + tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2)); \ |
673 | +static inline uint32_t vext_lmul(uint32_t desc) | 394 | + gen_helper_##NAME(rd_v, rs2_v, cpu_env, desc); \ |
674 | +{ | 395 | + mark_vs_dirty(s); \ |
675 | + return FIELD_EX32(simd_data(desc), VDATA, LMUL); | 396 | + gen_set_label(over); \ |
676 | +} | 397 | + return true; \ |
677 | + | 398 | + } \ |
678 | +/* | 399 | + return false; \ |
679 | + * Get vector group length in bytes. Its range is [64, 2048]. | 400 | + } |
680 | + * | 401 | + |
681 | + * As simd_desc support at most 256, the max vlen is 512 bits. | 402 | +static bool vaes_check_vv(DisasContext *s, arg_rmr *a) |
682 | + * So vlen in bytes is encoded as maxsz. | 403 | +{ |
683 | + */ | 404 | + int egw_bytes = ZVKNED_EGS << s->sew; |
684 | +static inline uint32_t vext_maxsz(uint32_t desc) | 405 | + return s->cfg_ptr->ext_zvkned == true && |
685 | +{ | 406 | + require_rvv(s) && |
686 | + return simd_maxsz(desc) << vext_lmul(desc); | 407 | + vext_check_isa_ill(s) && |
687 | +} | 408 | + MAXSZ(s) >= egw_bytes && |
688 | + | 409 | + require_align(a->rd, s->lmul) && |
689 | +/* | 410 | + require_align(a->rs2, s->lmul) && |
690 | + * This function checks watchpoint before real load operation. | 411 | + s->sew == MO_32; |
691 | + * | 412 | +} |
692 | + * In softmmu mode, the TLB API probe_access is enough for watchpoint check. | 413 | + |
693 | + * In user mode, there is no watchpoint support now. | 414 | +static bool vaes_check_overlap(DisasContext *s, int vd, int vs2) |
694 | + * | 415 | +{ |
695 | + * It will trigger an exception if there is no mapping in TLB | 416 | + int8_t op_size = s->lmul <= 0 ? 1 : 1 << s->lmul; |
696 | + * and page table walk can't fill the TLB entry. Then the guest | 417 | + return !is_overlapped(vd, op_size, vs2, 1); |
697 | + * software can return here after process the exception or never return. | 418 | +} |
698 | + */ | 419 | + |
699 | +static void probe_pages(CPURISCVState *env, target_ulong addr, | 420 | +static bool vaes_check_vs(DisasContext *s, arg_rmr *a) |
700 | + target_ulong len, uintptr_t ra, | 421 | +{ |
701 | + MMUAccessType access_type) | 422 | + int egw_bytes = ZVKNED_EGS << s->sew; |
702 | +{ | 423 | + return vaes_check_overlap(s, a->rd, a->rs2) && |
703 | + target_ulong pagelen = -(addr | TARGET_PAGE_MASK); | 424 | + MAXSZ(s) >= egw_bytes && |
704 | + target_ulong curlen = MIN(pagelen, len); | 425 | + s->cfg_ptr->ext_zvkned == true && |
705 | + | 426 | + require_rvv(s) && |
706 | + probe_access(env, addr, curlen, access_type, | 427 | + vext_check_isa_ill(s) && |
707 | + cpu_mmu_index(env, false), ra); | 428 | + require_align(a->rd, s->lmul) && |
708 | + if (len > curlen) { | 429 | + s->sew == MO_32; |
709 | + addr += curlen; | 430 | +} |
710 | + curlen = len - curlen; | 431 | + |
711 | + probe_access(env, addr, curlen, access_type, | 432 | +GEN_V_UNMASKED_TRANS(vaesef_vv, vaes_check_vv, ZVKNED_EGS) |
712 | + cpu_mmu_index(env, false), ra); | 433 | +GEN_V_UNMASKED_TRANS(vaesef_vs, vaes_check_vs, ZVKNED_EGS) |
713 | + } | 434 | +GEN_V_UNMASKED_TRANS(vaesdf_vv, vaes_check_vv, ZVKNED_EGS) |
714 | +} | 435 | +GEN_V_UNMASKED_TRANS(vaesdf_vs, vaes_check_vs, ZVKNED_EGS) |
715 | + | 436 | +GEN_V_UNMASKED_TRANS(vaesdm_vv, vaes_check_vv, ZVKNED_EGS) |
716 | +#ifdef HOST_WORDS_BIGENDIAN | 437 | +GEN_V_UNMASKED_TRANS(vaesdm_vs, vaes_check_vs, ZVKNED_EGS) |
717 | +static void vext_clear(void *tail, uint32_t cnt, uint32_t tot) | 438 | +GEN_V_UNMASKED_TRANS(vaesz_vs, vaes_check_vs, ZVKNED_EGS) |
718 | +{ | 439 | +GEN_V_UNMASKED_TRANS(vaesem_vv, vaes_check_vv, ZVKNED_EGS) |
719 | + /* | 440 | +GEN_V_UNMASKED_TRANS(vaesem_vs, vaes_check_vs, ZVKNED_EGS) |
720 | + * Split the remaining range to two parts. | 441 | + |
721 | + * The first part is in the last uint64_t unit. | 442 | +#define GEN_VI_UNMASKED_TRANS(NAME, CHECK, EGS) \ |
722 | + * The second part start from the next uint64_t unit. | 443 | + static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ |
723 | + */ | 444 | + { \ |
724 | + int part1 = 0, part2 = tot - cnt; | 445 | + if (CHECK(s, a)) { \ |
725 | + if (cnt % 8) { | 446 | + TCGv_ptr rd_v, rs2_v; \ |
726 | + part1 = 8 - (cnt % 8); | 447 | + TCGv_i32 uimm_v, desc, egs; \ |
727 | + part2 = tot - cnt - part1; | 448 | + uint32_t data = 0; \ |
728 | + memset(tail & ~(7ULL), 0, part1); | 449 | + TCGLabel *over = gen_new_label(); \ |
729 | + memset((tail + 8) & ~(7ULL), 0, part2); | 450 | + \ |
730 | + } else { | 451 | + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ |
731 | + memset(tail, 0, part2); | 452 | + /* save opcode for unwinding in case we throw an exception */ \ |
732 | + } | 453 | + decode_save_opc(s); \ |
733 | +} | 454 | + egs = tcg_constant_i32(EGS); \ |
734 | +#else | 455 | + gen_helper_egs_check(egs, cpu_env); \ |
735 | +static void vext_clear(void *tail, uint32_t cnt, uint32_t tot) | 456 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ |
736 | +{ | 457 | + } \ |
737 | + memset(tail, 0, tot - cnt); | 458 | + \ |
738 | +} | 459 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ |
739 | +#endif | 460 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ |
740 | + | 461 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ |
741 | +static void clearb(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) | 462 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ |
742 | +{ | 463 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ |
743 | + int8_t *cur = ((int8_t *)vd + H1(idx)); | 464 | + \ |
744 | + vext_clear(cur, cnt, tot); | 465 | + rd_v = tcg_temp_new_ptr(); \ |
745 | +} | 466 | + rs2_v = tcg_temp_new_ptr(); \ |
746 | + | 467 | + uimm_v = tcg_constant_i32(a->rs1); \ |
747 | +static void clearh(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) | 468 | + desc = tcg_constant_i32( \ |
748 | +{ | 469 | + simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \ |
749 | + int16_t *cur = ((int16_t *)vd + H2(idx)); | 470 | + tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd)); \ |
750 | + vext_clear(cur, cnt, tot); | 471 | + tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2)); \ |
751 | +} | 472 | + gen_helper_##NAME(rd_v, rs2_v, uimm_v, cpu_env, desc); \ |
752 | + | 473 | + mark_vs_dirty(s); \ |
753 | +static void clearl(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) | 474 | + gen_set_label(over); \ |
754 | +{ | 475 | + return true; \ |
755 | + int32_t *cur = ((int32_t *)vd + H4(idx)); | 476 | + } \ |
756 | + vext_clear(cur, cnt, tot); | 477 | + return false; \ |
757 | +} | 478 | + } |
758 | + | 479 | + |
759 | +static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) | 480 | +static bool vaeskf1_check(DisasContext *s, arg_vaeskf1_vi *a) |
760 | +{ | 481 | +{ |
761 | + int64_t *cur = (int64_t *)vd + idx; | 482 | + int egw_bytes = ZVKNED_EGS << s->sew; |
762 | + vext_clear(cur, cnt, tot); | 483 | + return s->cfg_ptr->ext_zvkned == true && |
763 | +} | 484 | + require_rvv(s) && |
764 | + | 485 | + vext_check_isa_ill(s) && |
765 | + | 486 | + MAXSZ(s) >= egw_bytes && |
766 | +static inline int vext_elem_mask(void *v0, int mlen, int index) | 487 | + s->sew == MO_32 && |
767 | +{ | 488 | + require_align(a->rd, s->lmul) && |
768 | + int idx = (index * mlen) / 64; | 489 | + require_align(a->rs2, s->lmul); |
769 | + int pos = (index * mlen) % 64; | 490 | +} |
770 | + return (((uint64_t *)v0)[idx] >> pos) & 1; | 491 | + |
771 | +} | 492 | +static bool vaeskf2_check(DisasContext *s, arg_vaeskf2_vi *a) |
772 | + | 493 | +{ |
773 | +/* elements operations for load and store */ | 494 | + int egw_bytes = ZVKNED_EGS << s->sew; |
774 | +typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, | 495 | + return s->cfg_ptr->ext_zvkned == true && |
775 | + uint32_t idx, void *vd, uintptr_t retaddr); | 496 | + require_rvv(s) && |
776 | +typedef void clear_fn(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot); | 497 | + vext_check_isa_ill(s) && |
777 | + | 498 | + MAXSZ(s) >= egw_bytes && |
778 | +#define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF) \ | 499 | + s->sew == MO_32 && |
779 | +static void NAME(CPURISCVState *env, abi_ptr addr, \ | 500 | + require_align(a->rd, s->lmul) && |
780 | + uint32_t idx, void *vd, uintptr_t retaddr)\ | 501 | + require_align(a->rs2, s->lmul); |
781 | +{ \ | 502 | +} |
782 | + MTYPE data; \ | 503 | + |
783 | + ETYPE *cur = ((ETYPE *)vd + H(idx)); \ | 504 | +GEN_VI_UNMASKED_TRANS(vaeskf1_vi, vaeskf1_check, ZVKNED_EGS) |
784 | + data = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ | 505 | +GEN_VI_UNMASKED_TRANS(vaeskf2_vi, vaeskf2_check, ZVKNED_EGS) |
785 | + *cur = data; \ | ||
786 | +} \ | ||
787 | + | ||
788 | +GEN_VEXT_LD_ELEM(ldb_b, int8_t, int8_t, H1, ldsb) | ||
789 | +GEN_VEXT_LD_ELEM(ldb_h, int8_t, int16_t, H2, ldsb) | ||
790 | +GEN_VEXT_LD_ELEM(ldb_w, int8_t, int32_t, H4, ldsb) | ||
791 | +GEN_VEXT_LD_ELEM(ldb_d, int8_t, int64_t, H8, ldsb) | ||
792 | +GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw) | ||
793 | +GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw) | ||
794 | +GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw) | ||
795 | +GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl) | ||
796 | +GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl) | ||
797 | +GEN_VEXT_LD_ELEM(lde_b, int8_t, int8_t, H1, ldsb) | ||
798 | +GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw) | ||
799 | +GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl) | ||
800 | +GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq) | ||
801 | +GEN_VEXT_LD_ELEM(ldbu_b, uint8_t, uint8_t, H1, ldub) | ||
802 | +GEN_VEXT_LD_ELEM(ldbu_h, uint8_t, uint16_t, H2, ldub) | ||
803 | +GEN_VEXT_LD_ELEM(ldbu_w, uint8_t, uint32_t, H4, ldub) | ||
804 | +GEN_VEXT_LD_ELEM(ldbu_d, uint8_t, uint64_t, H8, ldub) | ||
805 | +GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw) | ||
806 | +GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw) | ||
807 | +GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw) | ||
808 | +GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl) | ||
809 | +GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl) | ||
810 | + | ||
811 | +#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ | ||
812 | +static void NAME(CPURISCVState *env, abi_ptr addr, \ | ||
813 | + uint32_t idx, void *vd, uintptr_t retaddr)\ | ||
814 | +{ \ | ||
815 | + ETYPE data = *((ETYPE *)vd + H(idx)); \ | ||
816 | + cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ | ||
817 | +} | ||
818 | + | ||
819 | +GEN_VEXT_ST_ELEM(stb_b, int8_t, H1, stb) | ||
820 | +GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb) | ||
821 | +GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb) | ||
822 | +GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb) | ||
823 | +GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw) | ||
824 | +GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw) | ||
825 | +GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw) | ||
826 | +GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl) | ||
827 | +GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl) | ||
828 | +GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) | ||
829 | +GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) | ||
830 | +GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) | ||
831 | +GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) | ||
832 | + | ||
833 | +/* | ||
834 | + *** stride: access vector element from strided memory | ||
835 | + */ | ||
836 | +static void | ||
837 | +vext_ldst_stride(void *vd, void *v0, target_ulong base, | ||
838 | + target_ulong stride, CPURISCVState *env, | ||
839 | + uint32_t desc, uint32_t vm, | ||
840 | + vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem, | ||
841 | + uint32_t esz, uint32_t msz, uintptr_t ra, | ||
842 | + MMUAccessType access_type) | ||
843 | +{ | ||
844 | + uint32_t i, k; | ||
845 | + uint32_t nf = vext_nf(desc); | ||
846 | + uint32_t mlen = vext_mlen(desc); | ||
847 | + uint32_t vlmax = vext_maxsz(desc) / esz; | ||
848 | + | ||
849 | + /* probe every access*/ | ||
850 | + for (i = 0; i < env->vl; i++) { | ||
851 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | ||
852 | + continue; | ||
853 | + } | ||
854 | + probe_pages(env, base + stride * i, nf * msz, ra, access_type); | ||
855 | + } | ||
856 | + /* do real access */ | ||
857 | + for (i = 0; i < env->vl; i++) { | ||
858 | + k = 0; | ||
859 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | ||
860 | + continue; | ||
861 | + } | ||
862 | + while (k < nf) { | ||
863 | + target_ulong addr = base + stride * i + k * msz; | ||
864 | + ldst_elem(env, addr, i + k * vlmax, vd, ra); | ||
865 | + k++; | ||
866 | + } | ||
867 | + } | ||
868 | + /* clear tail elements */ | ||
869 | + if (clear_elem) { | ||
870 | + for (k = 0; k < nf; k++) { | ||
871 | + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); | ||
872 | + } | ||
873 | + } | ||
874 | +} | ||
875 | + | ||
876 | +#define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ | ||
877 | +void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ | ||
878 | + target_ulong stride, CPURISCVState *env, \ | ||
879 | + uint32_t desc) \ | ||
880 | +{ \ | ||
881 | + uint32_t vm = vext_vm(desc); \ | ||
882 | + vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ | ||
883 | + CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ | ||
884 | + GETPC(), MMU_DATA_LOAD); \ | ||
885 | +} | ||
886 | + | ||
887 | +GEN_VEXT_LD_STRIDE(vlsb_v_b, int8_t, int8_t, ldb_b, clearb) | ||
888 | +GEN_VEXT_LD_STRIDE(vlsb_v_h, int8_t, int16_t, ldb_h, clearh) | ||
889 | +GEN_VEXT_LD_STRIDE(vlsb_v_w, int8_t, int32_t, ldb_w, clearl) | ||
890 | +GEN_VEXT_LD_STRIDE(vlsb_v_d, int8_t, int64_t, ldb_d, clearq) | ||
891 | +GEN_VEXT_LD_STRIDE(vlsh_v_h, int16_t, int16_t, ldh_h, clearh) | ||
892 | +GEN_VEXT_LD_STRIDE(vlsh_v_w, int16_t, int32_t, ldh_w, clearl) | ||
893 | +GEN_VEXT_LD_STRIDE(vlsh_v_d, int16_t, int64_t, ldh_d, clearq) | ||
894 | +GEN_VEXT_LD_STRIDE(vlsw_v_w, int32_t, int32_t, ldw_w, clearl) | ||
895 | +GEN_VEXT_LD_STRIDE(vlsw_v_d, int32_t, int64_t, ldw_d, clearq) | ||
896 | +GEN_VEXT_LD_STRIDE(vlse_v_b, int8_t, int8_t, lde_b, clearb) | ||
897 | +GEN_VEXT_LD_STRIDE(vlse_v_h, int16_t, int16_t, lde_h, clearh) | ||
898 | +GEN_VEXT_LD_STRIDE(vlse_v_w, int32_t, int32_t, lde_w, clearl) | ||
899 | +GEN_VEXT_LD_STRIDE(vlse_v_d, int64_t, int64_t, lde_d, clearq) | ||
900 | +GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t, uint8_t, ldbu_b, clearb) | ||
901 | +GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t, uint16_t, ldbu_h, clearh) | ||
902 | +GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t, uint32_t, ldbu_w, clearl) | ||
903 | +GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t, uint64_t, ldbu_d, clearq) | ||
904 | +GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h, clearh) | ||
905 | +GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w, clearl) | ||
906 | +GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d, clearq) | ||
907 | +GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w, clearl) | ||
908 | +GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d, clearq) | ||
909 | + | ||
910 | +#define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN) \ | ||
911 | +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
912 | + target_ulong stride, CPURISCVState *env, \ | ||
913 | + uint32_t desc) \ | ||
914 | +{ \ | ||
915 | + uint32_t vm = vext_vm(desc); \ | ||
916 | + vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ | ||
917 | + NULL, sizeof(ETYPE), sizeof(MTYPE), \ | ||
918 | + GETPC(), MMU_DATA_STORE); \ | ||
919 | +} | ||
920 | + | ||
921 | +GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t, int8_t, stb_b) | ||
922 | +GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t, int16_t, stb_h) | ||
923 | +GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t, int32_t, stb_w) | ||
924 | +GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t, int64_t, stb_d) | ||
925 | +GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h) | ||
926 | +GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w) | ||
927 | +GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d) | ||
928 | +GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w) | ||
929 | +GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d) | ||
930 | +GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t, int8_t, ste_b) | ||
931 | +GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h) | ||
932 | +GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w) | ||
933 | +GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d) | ||
934 | + | ||
935 | +/* | ||
936 | + *** unit-stride: access elements stored contiguously in memory | ||
937 | + */ | ||
938 | + | ||
939 | +/* unmasked unit-stride load and store operation*/ | ||
940 | +static void | ||
941 | +vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
942 | + vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem, | ||
943 | + uint32_t esz, uint32_t msz, uintptr_t ra, | ||
944 | + MMUAccessType access_type) | ||
945 | +{ | ||
946 | + uint32_t i, k; | ||
947 | + uint32_t nf = vext_nf(desc); | ||
948 | + uint32_t vlmax = vext_maxsz(desc) / esz; | ||
949 | + | ||
950 | + /* probe every access */ | ||
951 | + probe_pages(env, base, env->vl * nf * msz, ra, access_type); | ||
952 | + /* load bytes from guest memory */ | ||
953 | + for (i = 0; i < env->vl; i++) { | ||
954 | + k = 0; | ||
955 | + while (k < nf) { | ||
956 | + target_ulong addr = base + (i * nf + k) * msz; | ||
957 | + ldst_elem(env, addr, i + k * vlmax, vd, ra); | ||
958 | + k++; | ||
959 | + } | ||
960 | + } | ||
961 | + /* clear tail elements */ | ||
962 | + if (clear_elem) { | ||
963 | + for (k = 0; k < nf; k++) { | ||
964 | + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); | ||
965 | + } | ||
966 | + } | ||
967 | +} | ||
968 | + | ||
969 | +/* | ||
970 | + * masked unit-stride load and store operation will be a special case of stride, | ||
971 | + * stride = NF * sizeof (MTYPE) | ||
972 | + */ | ||
973 | + | ||
974 | +#define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ | ||
975 | +void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ | ||
976 | + CPURISCVState *env, uint32_t desc) \ | ||
977 | +{ \ | ||
978 | + uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ | ||
979 | + vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ | ||
980 | + CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ | ||
981 | + GETPC(), MMU_DATA_LOAD); \ | ||
982 | +} \ | ||
983 | + \ | ||
984 | +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
985 | + CPURISCVState *env, uint32_t desc) \ | ||
986 | +{ \ | ||
987 | + vext_ldst_us(vd, base, env, desc, LOAD_FN, CLEAR_FN, \ | ||
988 | + sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \ | ||
989 | +} | ||
990 | + | ||
991 | +GEN_VEXT_LD_US(vlb_v_b, int8_t, int8_t, ldb_b, clearb) | ||
992 | +GEN_VEXT_LD_US(vlb_v_h, int8_t, int16_t, ldb_h, clearh) | ||
993 | +GEN_VEXT_LD_US(vlb_v_w, int8_t, int32_t, ldb_w, clearl) | ||
994 | +GEN_VEXT_LD_US(vlb_v_d, int8_t, int64_t, ldb_d, clearq) | ||
995 | +GEN_VEXT_LD_US(vlh_v_h, int16_t, int16_t, ldh_h, clearh) | ||
996 | +GEN_VEXT_LD_US(vlh_v_w, int16_t, int32_t, ldh_w, clearl) | ||
997 | +GEN_VEXT_LD_US(vlh_v_d, int16_t, int64_t, ldh_d, clearq) | ||
998 | +GEN_VEXT_LD_US(vlw_v_w, int32_t, int32_t, ldw_w, clearl) | ||
999 | +GEN_VEXT_LD_US(vlw_v_d, int32_t, int64_t, ldw_d, clearq) | ||
1000 | +GEN_VEXT_LD_US(vle_v_b, int8_t, int8_t, lde_b, clearb) | ||
1001 | +GEN_VEXT_LD_US(vle_v_h, int16_t, int16_t, lde_h, clearh) | ||
1002 | +GEN_VEXT_LD_US(vle_v_w, int32_t, int32_t, lde_w, clearl) | ||
1003 | +GEN_VEXT_LD_US(vle_v_d, int64_t, int64_t, lde_d, clearq) | ||
1004 | +GEN_VEXT_LD_US(vlbu_v_b, uint8_t, uint8_t, ldbu_b, clearb) | ||
1005 | +GEN_VEXT_LD_US(vlbu_v_h, uint8_t, uint16_t, ldbu_h, clearh) | ||
1006 | +GEN_VEXT_LD_US(vlbu_v_w, uint8_t, uint32_t, ldbu_w, clearl) | ||
1007 | +GEN_VEXT_LD_US(vlbu_v_d, uint8_t, uint64_t, ldbu_d, clearq) | ||
1008 | +GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h, clearh) | ||
1009 | +GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w, clearl) | ||
1010 | +GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d, clearq) | ||
1011 | +GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w, clearl) | ||
1012 | +GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d, clearq) | ||
1013 | + | ||
1014 | +#define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN) \ | ||
1015 | +void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ | ||
1016 | + CPURISCVState *env, uint32_t desc) \ | ||
1017 | +{ \ | ||
1018 | + uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ | ||
1019 | + vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ | ||
1020 | + NULL, sizeof(ETYPE), sizeof(MTYPE), \ | ||
1021 | + GETPC(), MMU_DATA_STORE); \ | ||
1022 | +} \ | ||
1023 | + \ | ||
1024 | +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
1025 | + CPURISCVState *env, uint32_t desc) \ | ||
1026 | +{ \ | ||
1027 | + vext_ldst_us(vd, base, env, desc, STORE_FN, NULL, \ | ||
1028 | + sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\ | ||
1029 | +} | ||
1030 | + | ||
1031 | +GEN_VEXT_ST_US(vsb_v_b, int8_t, int8_t , stb_b) | ||
1032 | +GEN_VEXT_ST_US(vsb_v_h, int8_t, int16_t, stb_h) | ||
1033 | +GEN_VEXT_ST_US(vsb_v_w, int8_t, int32_t, stb_w) | ||
1034 | +GEN_VEXT_ST_US(vsb_v_d, int8_t, int64_t, stb_d) | ||
1035 | +GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h) | ||
1036 | +GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w) | ||
1037 | +GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d) | ||
1038 | +GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w) | ||
1039 | +GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d) | ||
1040 | +GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b) | ||
1041 | +GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h) | ||
1042 | +GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w) | ||
1043 | +GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d) | ||
1044 | -- | 506 | -- |
1045 | 2.27.0 | 507 | 2.41.0 |
1046 | |||
1047 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | This commit adds support for the Zvknh vector-crypto extension, which |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | consists of the following instructions: |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 5 | |
6 | Message-id: 20200623215920.2594-12-zhiwei_liu@c-sky.com | 6 | * vsha2ms.vv |
7 | * vsha2c[hl].vv | ||
8 | |||
9 | Translation functions are defined in | ||
10 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in | ||
11 | `target/riscv/vcrypto_helper.c`. | ||
12 | |||
13 | Co-authored-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
14 | Co-authored-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
15 | [max.chou@sifive.com: Replaced vstart checking by TCG op] | ||
16 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
17 | Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
18 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
19 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
20 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
21 | [max.chou@sifive.com: Exposed x-zvknha & x-zvknhb properties] | ||
22 | [max.chou@sifive.com: Replaced SEW selection to happened during | ||
23 | translation] | ||
24 | Message-ID: <20230711165917.2629866-11-max.chou@sifive.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 25 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 26 | --- |
9 | target/riscv/helper.h | 49 +++++++ | 27 | target/riscv/cpu_cfg.h | 2 + |
10 | target/riscv/insn32.decode | 16 ++ | 28 | target/riscv/helper.h | 6 + |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 186 ++++++++++++++++++++++++ | 29 | target/riscv/insn32.decode | 5 + |
12 | target/riscv/vector_helper.c | 111 ++++++++++++++ | 30 | target/riscv/cpu.c | 13 +- |
13 | 4 files changed, 362 insertions(+) | 31 | target/riscv/vcrypto_helper.c | 238 +++++++++++++++++++++++ |
32 | target/riscv/insn_trans/trans_rvvk.c.inc | 129 ++++++++++++ | ||
33 | 6 files changed, 390 insertions(+), 3 deletions(-) | ||
14 | 34 | ||
35 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/target/riscv/cpu_cfg.h | ||
38 | +++ b/target/riscv/cpu_cfg.h | ||
39 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
40 | bool ext_zvbb; | ||
41 | bool ext_zvbc; | ||
42 | bool ext_zvkned; | ||
43 | + bool ext_zvknha; | ||
44 | + bool ext_zvknhb; | ||
45 | bool ext_zmmul; | ||
46 | bool ext_zvfbfmin; | ||
47 | bool ext_zvfbfwma; | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 48 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h |
16 | index XXXXXXX..XXXXXXX 100644 | 49 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 50 | --- a/target/riscv/helper.h |
18 | +++ b/target/riscv/helper.h | 51 | +++ b/target/riscv/helper.h |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vec_rsubs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | 52 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vaesdm_vs, void, ptr, ptr, env, i32) |
20 | DEF_HELPER_FLAGS_4(vec_rsubs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | 53 | DEF_HELPER_4(vaesz_vs, void, ptr, ptr, env, i32) |
21 | DEF_HELPER_FLAGS_4(vec_rsubs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | 54 | DEF_HELPER_5(vaeskf1_vi, void, ptr, ptr, i32, env, i32) |
22 | DEF_HELPER_FLAGS_4(vec_rsubs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | 55 | DEF_HELPER_5(vaeskf2_vi, void, ptr, ptr, i32, env, i32) |
23 | + | 56 | + |
24 | +DEF_HELPER_6(vwaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | 57 | +DEF_HELPER_5(vsha2ms_vv, void, ptr, ptr, ptr, env, i32) |
25 | +DEF_HELPER_6(vwaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 58 | +DEF_HELPER_5(vsha2ch32_vv, void, ptr, ptr, ptr, env, i32) |
26 | +DEF_HELPER_6(vwaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 59 | +DEF_HELPER_5(vsha2ch64_vv, void, ptr, ptr, ptr, env, i32) |
27 | +DEF_HELPER_6(vwsubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | 60 | +DEF_HELPER_5(vsha2cl32_vv, void, ptr, ptr, ptr, env, i32) |
28 | +DEF_HELPER_6(vwsubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 61 | +DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32) |
29 | +DEF_HELPER_6(vwsubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vwadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vwadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vwadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vwsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vwsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vwsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vwaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vwaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vwaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vwsubu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vwsubu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vwsubu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vwadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vwadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vwadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vwsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vwsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vwsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
48 | +DEF_HELPER_6(vwaddu_wv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
49 | +DEF_HELPER_6(vwaddu_wv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
50 | +DEF_HELPER_6(vwaddu_wv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
51 | +DEF_HELPER_6(vwsubu_wv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
52 | +DEF_HELPER_6(vwsubu_wv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
53 | +DEF_HELPER_6(vwsubu_wv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
54 | +DEF_HELPER_6(vwadd_wv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
55 | +DEF_HELPER_6(vwadd_wv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
56 | +DEF_HELPER_6(vwadd_wv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
57 | +DEF_HELPER_6(vwsub_wv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
58 | +DEF_HELPER_6(vwsub_wv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
59 | +DEF_HELPER_6(vwsub_wv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
60 | +DEF_HELPER_6(vwaddu_wx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
61 | +DEF_HELPER_6(vwaddu_wx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
62 | +DEF_HELPER_6(vwaddu_wx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
63 | +DEF_HELPER_6(vwsubu_wx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
64 | +DEF_HELPER_6(vwsubu_wx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
65 | +DEF_HELPER_6(vwsubu_wx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
66 | +DEF_HELPER_6(vwadd_wx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
67 | +DEF_HELPER_6(vwadd_wx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
68 | +DEF_HELPER_6(vwadd_wx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
69 | +DEF_HELPER_6(vwsub_wx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
70 | +DEF_HELPER_6(vwsub_wx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
71 | +DEF_HELPER_6(vwsub_wx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
72 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 62 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode |
73 | index XXXXXXX..XXXXXXX 100644 | 63 | index XXXXXXX..XXXXXXX 100644 |
74 | --- a/target/riscv/insn32.decode | 64 | --- a/target/riscv/insn32.decode |
75 | +++ b/target/riscv/insn32.decode | 65 | +++ b/target/riscv/insn32.decode |
76 | @@ -XXX,XX +XXX,XX @@ vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm | 66 | @@ -XXX,XX +XXX,XX @@ vaesdm_vs 101001 1 ..... 00000 010 ..... 1110111 @r2_vm_1 |
77 | vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm | 67 | vaesz_vs 101001 1 ..... 00111 010 ..... 1110111 @r2_vm_1 |
78 | vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm | 68 | vaeskf1_vi 100010 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
79 | vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm | 69 | vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
80 | +vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm | 70 | + |
81 | +vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm | 71 | +# *** Zvknh vector crypto extension *** |
82 | +vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm | 72 | +vsha2ms_vv 101101 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
83 | +vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm | 73 | +vsha2ch_vv 101110 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
84 | +vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm | 74 | +vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
85 | +vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm | 75 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
86 | +vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm | ||
87 | +vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm | ||
88 | +vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm | ||
89 | +vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm | ||
90 | +vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm | ||
91 | +vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm | ||
92 | +vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm | ||
93 | +vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm | ||
94 | +vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm | ||
95 | +vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm | ||
96 | |||
97 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
98 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
99 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
100 | index XXXXXXX..XXXXXXX 100644 | 76 | index XXXXXXX..XXXXXXX 100644 |
101 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 77 | --- a/target/riscv/cpu.c |
102 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 78 | +++ b/target/riscv/cpu.c |
103 | @@ -XXX,XX +XXX,XX @@ static bool vext_check_nf(DisasContext *s, uint32_t nf) | 79 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { |
104 | return (1 << s->lmul) * nf <= 8; | 80 | ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh), |
81 | ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin), | ||
82 | ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), | ||
83 | + ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha), | ||
84 | + ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb), | ||
85 | ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), | ||
86 | ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), | ||
87 | ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia), | ||
88 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
89 | * In principle Zve*x would also suffice here, were they supported | ||
90 | * in qemu | ||
91 | */ | ||
92 | - if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned) && !cpu->cfg.ext_zve32f) { | ||
93 | + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha) && | ||
94 | + !cpu->cfg.ext_zve32f) { | ||
95 | error_setg(errp, | ||
96 | "Vector crypto extensions require V or Zve* extensions"); | ||
97 | return; | ||
98 | } | ||
99 | |||
100 | - if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) { | ||
101 | - error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions"); | ||
102 | + if ((cpu->cfg.ext_zvbc || cpu->cfg.ext_zvknhb) && !cpu->cfg.ext_zve64f) { | ||
103 | + error_setg( | ||
104 | + errp, | ||
105 | + "Zvbc and Zvknhb extensions require V or Zve64{f,d} extensions"); | ||
106 | return; | ||
107 | } | ||
108 | |||
109 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
110 | DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), | ||
111 | DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), | ||
112 | DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), | ||
113 | + DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), | ||
114 | + DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), | ||
115 | |||
116 | DEFINE_PROP_END_OF_LIST(), | ||
117 | }; | ||
118 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c | ||
119 | index XXXXXXX..XXXXXXX 100644 | ||
120 | --- a/target/riscv/vcrypto_helper.c | ||
121 | +++ b/target/riscv/vcrypto_helper.c | ||
122 | @@ -XXX,XX +XXX,XX @@ void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, | ||
123 | /* set tail elements to 1s */ | ||
124 | vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); | ||
105 | } | 125 | } |
106 | 126 | + | |
127 | +static inline uint32_t sig0_sha256(uint32_t x) | ||
128 | +{ | ||
129 | + return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); | ||
130 | +} | ||
131 | + | ||
132 | +static inline uint32_t sig1_sha256(uint32_t x) | ||
133 | +{ | ||
134 | + return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); | ||
135 | +} | ||
136 | + | ||
137 | +static inline uint64_t sig0_sha512(uint64_t x) | ||
138 | +{ | ||
139 | + return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); | ||
140 | +} | ||
141 | + | ||
142 | +static inline uint64_t sig1_sha512(uint64_t x) | ||
143 | +{ | ||
144 | + return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); | ||
145 | +} | ||
146 | + | ||
147 | +static inline void vsha2ms_e32(uint32_t *vd, uint32_t *vs1, uint32_t *vs2) | ||
148 | +{ | ||
149 | + uint32_t res[4]; | ||
150 | + res[0] = sig1_sha256(vs1[H4(2)]) + vs2[H4(1)] + sig0_sha256(vd[H4(1)]) + | ||
151 | + vd[H4(0)]; | ||
152 | + res[1] = sig1_sha256(vs1[H4(3)]) + vs2[H4(2)] + sig0_sha256(vd[H4(2)]) + | ||
153 | + vd[H4(1)]; | ||
154 | + res[2] = | ||
155 | + sig1_sha256(res[0]) + vs2[H4(3)] + sig0_sha256(vd[H4(3)]) + vd[H4(2)]; | ||
156 | + res[3] = | ||
157 | + sig1_sha256(res[1]) + vs1[H4(0)] + sig0_sha256(vs2[H4(0)]) + vd[H4(3)]; | ||
158 | + vd[H4(3)] = res[3]; | ||
159 | + vd[H4(2)] = res[2]; | ||
160 | + vd[H4(1)] = res[1]; | ||
161 | + vd[H4(0)] = res[0]; | ||
162 | +} | ||
163 | + | ||
164 | +static inline void vsha2ms_e64(uint64_t *vd, uint64_t *vs1, uint64_t *vs2) | ||
165 | +{ | ||
166 | + uint64_t res[4]; | ||
167 | + res[0] = sig1_sha512(vs1[2]) + vs2[1] + sig0_sha512(vd[1]) + vd[0]; | ||
168 | + res[1] = sig1_sha512(vs1[3]) + vs2[2] + sig0_sha512(vd[2]) + vd[1]; | ||
169 | + res[2] = sig1_sha512(res[0]) + vs2[3] + sig0_sha512(vd[3]) + vd[2]; | ||
170 | + res[3] = sig1_sha512(res[1]) + vs1[0] + sig0_sha512(vs2[0]) + vd[3]; | ||
171 | + vd[3] = res[3]; | ||
172 | + vd[2] = res[2]; | ||
173 | + vd[1] = res[1]; | ||
174 | + vd[0] = res[0]; | ||
175 | +} | ||
176 | + | ||
177 | +void HELPER(vsha2ms_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, | ||
178 | + uint32_t desc) | ||
179 | +{ | ||
180 | + uint32_t sew = FIELD_EX64(env->vtype, VTYPE, VSEW); | ||
181 | + uint32_t esz = sew == MO_32 ? 4 : 8; | ||
182 | + uint32_t total_elems; | ||
183 | + uint32_t vta = vext_vta(desc); | ||
184 | + | ||
185 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
186 | + if (sew == MO_32) { | ||
187 | + vsha2ms_e32(((uint32_t *)vd) + i * 4, ((uint32_t *)vs1) + i * 4, | ||
188 | + ((uint32_t *)vs2) + i * 4); | ||
189 | + } else { | ||
190 | + /* If not 32 then SEW should be 64 */ | ||
191 | + vsha2ms_e64(((uint64_t *)vd) + i * 4, ((uint64_t *)vs1) + i * 4, | ||
192 | + ((uint64_t *)vs2) + i * 4); | ||
193 | + } | ||
194 | + } | ||
195 | + /* set tail elements to 1s */ | ||
196 | + total_elems = vext_get_total_elems(env, desc, esz); | ||
197 | + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); | ||
198 | + env->vstart = 0; | ||
199 | +} | ||
200 | + | ||
201 | +static inline uint64_t sum0_64(uint64_t x) | ||
202 | +{ | ||
203 | + return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); | ||
204 | +} | ||
205 | + | ||
206 | +static inline uint32_t sum0_32(uint32_t x) | ||
207 | +{ | ||
208 | + return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); | ||
209 | +} | ||
210 | + | ||
211 | +static inline uint64_t sum1_64(uint64_t x) | ||
212 | +{ | ||
213 | + return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); | ||
214 | +} | ||
215 | + | ||
216 | +static inline uint32_t sum1_32(uint32_t x) | ||
217 | +{ | ||
218 | + return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); | ||
219 | +} | ||
220 | + | ||
221 | +#define ch(x, y, z) ((x & y) ^ ((~x) & z)) | ||
222 | + | ||
223 | +#define maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) | ||
224 | + | ||
225 | +static void vsha2c_64(uint64_t *vs2, uint64_t *vd, uint64_t *vs1) | ||
226 | +{ | ||
227 | + uint64_t a = vs2[3], b = vs2[2], e = vs2[1], f = vs2[0]; | ||
228 | + uint64_t c = vd[3], d = vd[2], g = vd[1], h = vd[0]; | ||
229 | + uint64_t W0 = vs1[0], W1 = vs1[1]; | ||
230 | + uint64_t T1 = h + sum1_64(e) + ch(e, f, g) + W0; | ||
231 | + uint64_t T2 = sum0_64(a) + maj(a, b, c); | ||
232 | + | ||
233 | + h = g; | ||
234 | + g = f; | ||
235 | + f = e; | ||
236 | + e = d + T1; | ||
237 | + d = c; | ||
238 | + c = b; | ||
239 | + b = a; | ||
240 | + a = T1 + T2; | ||
241 | + | ||
242 | + T1 = h + sum1_64(e) + ch(e, f, g) + W1; | ||
243 | + T2 = sum0_64(a) + maj(a, b, c); | ||
244 | + h = g; | ||
245 | + g = f; | ||
246 | + f = e; | ||
247 | + e = d + T1; | ||
248 | + d = c; | ||
249 | + c = b; | ||
250 | + b = a; | ||
251 | + a = T1 + T2; | ||
252 | + | ||
253 | + vd[0] = f; | ||
254 | + vd[1] = e; | ||
255 | + vd[2] = b; | ||
256 | + vd[3] = a; | ||
257 | +} | ||
258 | + | ||
259 | +static void vsha2c_32(uint32_t *vs2, uint32_t *vd, uint32_t *vs1) | ||
260 | +{ | ||
261 | + uint32_t a = vs2[H4(3)], b = vs2[H4(2)], e = vs2[H4(1)], f = vs2[H4(0)]; | ||
262 | + uint32_t c = vd[H4(3)], d = vd[H4(2)], g = vd[H4(1)], h = vd[H4(0)]; | ||
263 | + uint32_t W0 = vs1[H4(0)], W1 = vs1[H4(1)]; | ||
264 | + uint32_t T1 = h + sum1_32(e) + ch(e, f, g) + W0; | ||
265 | + uint32_t T2 = sum0_32(a) + maj(a, b, c); | ||
266 | + | ||
267 | + h = g; | ||
268 | + g = f; | ||
269 | + f = e; | ||
270 | + e = d + T1; | ||
271 | + d = c; | ||
272 | + c = b; | ||
273 | + b = a; | ||
274 | + a = T1 + T2; | ||
275 | + | ||
276 | + T1 = h + sum1_32(e) + ch(e, f, g) + W1; | ||
277 | + T2 = sum0_32(a) + maj(a, b, c); | ||
278 | + h = g; | ||
279 | + g = f; | ||
280 | + f = e; | ||
281 | + e = d + T1; | ||
282 | + d = c; | ||
283 | + c = b; | ||
284 | + b = a; | ||
285 | + a = T1 + T2; | ||
286 | + | ||
287 | + vd[H4(0)] = f; | ||
288 | + vd[H4(1)] = e; | ||
289 | + vd[H4(2)] = b; | ||
290 | + vd[H4(3)] = a; | ||
291 | +} | ||
292 | + | ||
293 | +void HELPER(vsha2ch32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, | ||
294 | + uint32_t desc) | ||
295 | +{ | ||
296 | + const uint32_t esz = 4; | ||
297 | + uint32_t total_elems; | ||
298 | + uint32_t vta = vext_vta(desc); | ||
299 | + | ||
300 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
301 | + vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i, | ||
302 | + ((uint32_t *)vs1) + 4 * i + 2); | ||
303 | + } | ||
304 | + | ||
305 | + /* set tail elements to 1s */ | ||
306 | + total_elems = vext_get_total_elems(env, desc, esz); | ||
307 | + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); | ||
308 | + env->vstart = 0; | ||
309 | +} | ||
310 | + | ||
311 | +void HELPER(vsha2ch64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, | ||
312 | + uint32_t desc) | ||
313 | +{ | ||
314 | + const uint32_t esz = 8; | ||
315 | + uint32_t total_elems; | ||
316 | + uint32_t vta = vext_vta(desc); | ||
317 | + | ||
318 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
319 | + vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i, | ||
320 | + ((uint64_t *)vs1) + 4 * i + 2); | ||
321 | + } | ||
322 | + | ||
323 | + /* set tail elements to 1s */ | ||
324 | + total_elems = vext_get_total_elems(env, desc, esz); | ||
325 | + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); | ||
326 | + env->vstart = 0; | ||
327 | +} | ||
328 | + | ||
329 | +void HELPER(vsha2cl32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, | ||
330 | + uint32_t desc) | ||
331 | +{ | ||
332 | + const uint32_t esz = 4; | ||
333 | + uint32_t total_elems; | ||
334 | + uint32_t vta = vext_vta(desc); | ||
335 | + | ||
336 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
337 | + vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i, | ||
338 | + (((uint32_t *)vs1) + 4 * i)); | ||
339 | + } | ||
340 | + | ||
341 | + /* set tail elements to 1s */ | ||
342 | + total_elems = vext_get_total_elems(env, desc, esz); | ||
343 | + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); | ||
344 | + env->vstart = 0; | ||
345 | +} | ||
346 | + | ||
347 | +void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, | ||
348 | + uint32_t desc) | ||
349 | +{ | ||
350 | + uint32_t esz = 8; | ||
351 | + uint32_t total_elems; | ||
352 | + uint32_t vta = vext_vta(desc); | ||
353 | + | ||
354 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
355 | + vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i, | ||
356 | + (((uint64_t *)vs1) + 4 * i)); | ||
357 | + } | ||
358 | + | ||
359 | + /* set tail elements to 1s */ | ||
360 | + total_elems = vext_get_total_elems(env, desc, esz); | ||
361 | + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); | ||
362 | + env->vstart = 0; | ||
363 | +} | ||
364 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
365 | index XXXXXXX..XXXXXXX 100644 | ||
366 | --- a/target/riscv/insn_trans/trans_rvvk.c.inc | ||
367 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
368 | @@ -XXX,XX +XXX,XX @@ static bool vaeskf2_check(DisasContext *s, arg_vaeskf2_vi *a) | ||
369 | |||
370 | GEN_VI_UNMASKED_TRANS(vaeskf1_vi, vaeskf1_check, ZVKNED_EGS) | ||
371 | GEN_VI_UNMASKED_TRANS(vaeskf2_vi, vaeskf2_check, ZVKNED_EGS) | ||
372 | + | ||
107 | +/* | 373 | +/* |
108 | + * The destination vector register group cannot overlap a source vector register | 374 | + * Zvknh |
109 | + * group of a different element width. (Section 11.2) | ||
110 | + */ | 375 | + */ |
111 | +static inline bool vext_check_overlap_group(int rd, int dlen, int rs, int slen) | 376 | + |
112 | +{ | 377 | +#define ZVKNH_EGS 4 |
113 | + return ((rd >= rs + slen) || (rs >= rd + dlen)); | 378 | + |
114 | +} | 379 | +#define GEN_VV_UNMASKED_TRANS(NAME, CHECK, EGS) \ |
115 | /* common translation macro */ | 380 | + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ |
116 | #define GEN_VEXT_TRANS(NAME, SEQ, ARGTYPE, OP, CHECK) \ | 381 | + { \ |
117 | static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE *a)\ | 382 | + if (CHECK(s, a)) { \ |
118 | @@ -XXX,XX +XXX,XX @@ static void tcg_gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs, | 383 | + uint32_t data = 0; \ |
119 | } | 384 | + TCGLabel *over = gen_new_label(); \ |
120 | 385 | + TCGv_i32 egs; \ | |
121 | GEN_OPIVI_GVEC_TRANS(vrsub_vi, 0, vrsub_vx, rsubi) | 386 | + \ |
122 | + | 387 | + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ |
123 | +/* Vector Widening Integer Add/Subtract */ | 388 | + /* save opcode for unwinding in case we throw an exception */ \ |
124 | + | 389 | + decode_save_opc(s); \ |
125 | +/* OPIVV with WIDEN */ | 390 | + egs = tcg_constant_i32(EGS); \ |
126 | +static bool opivv_widen_check(DisasContext *s, arg_rmrr *a) | 391 | + gen_helper_egs_check(egs, cpu_env); \ |
127 | +{ | 392 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ |
128 | + return (vext_check_isa_ill(s) && | 393 | + } \ |
129 | + vext_check_overlap_mask(s, a->rd, a->vm, true) && | 394 | + \ |
130 | + vext_check_reg(s, a->rd, true) && | 395 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ |
131 | + vext_check_reg(s, a->rs2, false) && | 396 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ |
132 | + vext_check_reg(s, a->rs1, false) && | 397 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ |
133 | + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, | 398 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ |
134 | + 1 << s->lmul) && | 399 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ |
135 | + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, | 400 | + \ |
136 | + 1 << s->lmul) && | 401 | + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), \ |
137 | + (s->lmul < 0x3) && (s->sew < 0x3)); | 402 | + vreg_ofs(s, a->rs2), cpu_env, \ |
138 | +} | 403 | + s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, \ |
139 | + | 404 | + data, gen_helper_##NAME); \ |
140 | +static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, | 405 | + \ |
141 | + gen_helper_gvec_4_ptr *fn, | 406 | + mark_vs_dirty(s); \ |
142 | + bool (*checkfn)(DisasContext *, arg_rmrr *)) | 407 | + gen_set_label(over); \ |
143 | +{ | 408 | + return true; \ |
144 | + if (checkfn(s, a)) { | 409 | + } \ |
410 | + return false; \ | ||
411 | + } | ||
412 | + | ||
413 | +static bool vsha_check_sew(DisasContext *s) | ||
414 | +{ | ||
415 | + return (s->cfg_ptr->ext_zvknha == true && s->sew == MO_32) || | ||
416 | + (s->cfg_ptr->ext_zvknhb == true && | ||
417 | + (s->sew == MO_32 || s->sew == MO_64)); | ||
418 | +} | ||
419 | + | ||
420 | +static bool vsha_check(DisasContext *s, arg_rmrr *a) | ||
421 | +{ | ||
422 | + int egw_bytes = ZVKNH_EGS << s->sew; | ||
423 | + int mult = 1 << MAX(s->lmul, 0); | ||
424 | + return opivv_check(s, a) && | ||
425 | + vsha_check_sew(s) && | ||
426 | + MAXSZ(s) >= egw_bytes && | ||
427 | + !is_overlapped(a->rd, mult, a->rs1, mult) && | ||
428 | + !is_overlapped(a->rd, mult, a->rs2, mult) && | ||
429 | + s->lmul >= 0; | ||
430 | +} | ||
431 | + | ||
432 | +GEN_VV_UNMASKED_TRANS(vsha2ms_vv, vsha_check, ZVKNH_EGS) | ||
433 | + | ||
434 | +static bool trans_vsha2cl_vv(DisasContext *s, arg_rmrr *a) | ||
435 | +{ | ||
436 | + if (vsha_check(s, a)) { | ||
145 | + uint32_t data = 0; | 437 | + uint32_t data = 0; |
146 | + TCGLabel *over = gen_new_label(); | 438 | + TCGLabel *over = gen_new_label(); |
147 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 439 | + TCGv_i32 egs; |
148 | + | 440 | + |
149 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | 441 | + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { |
442 | + /* save opcode for unwinding in case we throw an exception */ | ||
443 | + decode_save_opc(s); | ||
444 | + egs = tcg_constant_i32(ZVKNH_EGS); | ||
445 | + gen_helper_egs_check(egs, cpu_env); | ||
446 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
447 | + } | ||
448 | + | ||
150 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | 449 | + data = FIELD_DP32(data, VDATA, VM, a->vm); |
151 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | 450 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); |
152 | + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | 451 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); |
153 | + vreg_ofs(s, a->rs1), | 452 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); |
154 | + vreg_ofs(s, a->rs2), | 453 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); |
155 | + cpu_env, 0, s->vlen / 8, | 454 | + |
156 | + data, fn); | 455 | + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), |
456 | + vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, | ||
457 | + s->cfg_ptr->vlen / 8, data, | ||
458 | + s->sew == MO_32 ? | ||
459 | + gen_helper_vsha2cl32_vv : gen_helper_vsha2cl64_vv); | ||
460 | + | ||
461 | + mark_vs_dirty(s); | ||
157 | + gen_set_label(over); | 462 | + gen_set_label(over); |
158 | + return true; | 463 | + return true; |
159 | + } | 464 | + } |
160 | + return false; | 465 | + return false; |
161 | +} | 466 | +} |
162 | + | 467 | + |
163 | +#define GEN_OPIVV_WIDEN_TRANS(NAME, CHECK) \ | 468 | +static bool trans_vsha2ch_vv(DisasContext *s, arg_rmrr *a) |
164 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | 469 | +{ |
165 | +{ \ | 470 | + if (vsha_check(s, a)) { |
166 | + static gen_helper_gvec_4_ptr * const fns[3] = { \ | ||
167 | + gen_helper_##NAME##_b, \ | ||
168 | + gen_helper_##NAME##_h, \ | ||
169 | + gen_helper_##NAME##_w \ | ||
170 | + }; \ | ||
171 | + return do_opivv_widen(s, a, fns[s->sew], CHECK); \ | ||
172 | +} | ||
173 | + | ||
174 | +GEN_OPIVV_WIDEN_TRANS(vwaddu_vv, opivv_widen_check) | ||
175 | +GEN_OPIVV_WIDEN_TRANS(vwadd_vv, opivv_widen_check) | ||
176 | +GEN_OPIVV_WIDEN_TRANS(vwsubu_vv, opivv_widen_check) | ||
177 | +GEN_OPIVV_WIDEN_TRANS(vwsub_vv, opivv_widen_check) | ||
178 | + | ||
179 | +/* OPIVX with WIDEN */ | ||
180 | +static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) | ||
181 | +{ | ||
182 | + return (vext_check_isa_ill(s) && | ||
183 | + vext_check_overlap_mask(s, a->rd, a->vm, true) && | ||
184 | + vext_check_reg(s, a->rd, true) && | ||
185 | + vext_check_reg(s, a->rs2, false) && | ||
186 | + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, | ||
187 | + 1 << s->lmul) && | ||
188 | + (s->lmul < 0x3) && (s->sew < 0x3)); | ||
189 | +} | ||
190 | + | ||
191 | +static bool do_opivx_widen(DisasContext *s, arg_rmrr *a, | ||
192 | + gen_helper_opivx *fn) | ||
193 | +{ | ||
194 | + if (opivx_widen_check(s, a)) { | ||
195 | + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); | ||
196 | + } | ||
197 | + return true; | ||
198 | +} | ||
199 | + | ||
200 | +#define GEN_OPIVX_WIDEN_TRANS(NAME) \ | ||
201 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
202 | +{ \ | ||
203 | + static gen_helper_opivx * const fns[3] = { \ | ||
204 | + gen_helper_##NAME##_b, \ | ||
205 | + gen_helper_##NAME##_h, \ | ||
206 | + gen_helper_##NAME##_w \ | ||
207 | + }; \ | ||
208 | + return do_opivx_widen(s, a, fns[s->sew]); \ | ||
209 | +} | ||
210 | + | ||
211 | +GEN_OPIVX_WIDEN_TRANS(vwaddu_vx) | ||
212 | +GEN_OPIVX_WIDEN_TRANS(vwadd_vx) | ||
213 | +GEN_OPIVX_WIDEN_TRANS(vwsubu_vx) | ||
214 | +GEN_OPIVX_WIDEN_TRANS(vwsub_vx) | ||
215 | + | ||
216 | +/* WIDEN OPIVV with WIDEN */ | ||
217 | +static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a) | ||
218 | +{ | ||
219 | + return (vext_check_isa_ill(s) && | ||
220 | + vext_check_overlap_mask(s, a->rd, a->vm, true) && | ||
221 | + vext_check_reg(s, a->rd, true) && | ||
222 | + vext_check_reg(s, a->rs2, true) && | ||
223 | + vext_check_reg(s, a->rs1, false) && | ||
224 | + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, | ||
225 | + 1 << s->lmul) && | ||
226 | + (s->lmul < 0x3) && (s->sew < 0x3)); | ||
227 | +} | ||
228 | + | ||
229 | +static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, | ||
230 | + gen_helper_gvec_4_ptr *fn) | ||
231 | +{ | ||
232 | + if (opiwv_widen_check(s, a)) { | ||
233 | + uint32_t data = 0; | 471 | + uint32_t data = 0; |
234 | + TCGLabel *over = gen_new_label(); | 472 | + TCGLabel *over = gen_new_label(); |
235 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | 473 | + TCGv_i32 egs; |
236 | + | 474 | + |
237 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | 475 | + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { |
476 | + /* save opcode for unwinding in case we throw an exception */ | ||
477 | + decode_save_opc(s); | ||
478 | + egs = tcg_constant_i32(ZVKNH_EGS); | ||
479 | + gen_helper_egs_check(egs, cpu_env); | ||
480 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
481 | + } | ||
482 | + | ||
238 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | 483 | + data = FIELD_DP32(data, VDATA, VM, a->vm); |
239 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | 484 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); |
240 | + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | 485 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); |
241 | + vreg_ofs(s, a->rs1), | 486 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); |
242 | + vreg_ofs(s, a->rs2), | 487 | + data = FIELD_DP32(data, VDATA, VMA, s->vma); |
243 | + cpu_env, 0, s->vlen / 8, data, fn); | 488 | + |
489 | + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), | ||
490 | + vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, | ||
491 | + s->cfg_ptr->vlen / 8, data, | ||
492 | + s->sew == MO_32 ? | ||
493 | + gen_helper_vsha2ch32_vv : gen_helper_vsha2ch64_vv); | ||
494 | + | ||
495 | + mark_vs_dirty(s); | ||
244 | + gen_set_label(over); | 496 | + gen_set_label(over); |
245 | + return true; | 497 | + return true; |
246 | + } | 498 | + } |
247 | + return false; | 499 | + return false; |
248 | +} | 500 | +} |
249 | + | ||
250 | +#define GEN_OPIWV_WIDEN_TRANS(NAME) \ | ||
251 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
252 | +{ \ | ||
253 | + static gen_helper_gvec_4_ptr * const fns[3] = { \ | ||
254 | + gen_helper_##NAME##_b, \ | ||
255 | + gen_helper_##NAME##_h, \ | ||
256 | + gen_helper_##NAME##_w \ | ||
257 | + }; \ | ||
258 | + return do_opiwv_widen(s, a, fns[s->sew]); \ | ||
259 | +} | ||
260 | + | ||
261 | +GEN_OPIWV_WIDEN_TRANS(vwaddu_wv) | ||
262 | +GEN_OPIWV_WIDEN_TRANS(vwadd_wv) | ||
263 | +GEN_OPIWV_WIDEN_TRANS(vwsubu_wv) | ||
264 | +GEN_OPIWV_WIDEN_TRANS(vwsub_wv) | ||
265 | + | ||
266 | +/* WIDEN OPIVX with WIDEN */ | ||
267 | +static bool opiwx_widen_check(DisasContext *s, arg_rmrr *a) | ||
268 | +{ | ||
269 | + return (vext_check_isa_ill(s) && | ||
270 | + vext_check_overlap_mask(s, a->rd, a->vm, true) && | ||
271 | + vext_check_reg(s, a->rd, true) && | ||
272 | + vext_check_reg(s, a->rs2, true) && | ||
273 | + (s->lmul < 0x3) && (s->sew < 0x3)); | ||
274 | +} | ||
275 | + | ||
276 | +static bool do_opiwx_widen(DisasContext *s, arg_rmrr *a, | ||
277 | + gen_helper_opivx *fn) | ||
278 | +{ | ||
279 | + if (opiwx_widen_check(s, a)) { | ||
280 | + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); | ||
281 | + } | ||
282 | + return false; | ||
283 | +} | ||
284 | + | ||
285 | +#define GEN_OPIWX_WIDEN_TRANS(NAME) \ | ||
286 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
287 | +{ \ | ||
288 | + static gen_helper_opivx * const fns[3] = { \ | ||
289 | + gen_helper_##NAME##_b, \ | ||
290 | + gen_helper_##NAME##_h, \ | ||
291 | + gen_helper_##NAME##_w \ | ||
292 | + }; \ | ||
293 | + return do_opiwx_widen(s, a, fns[s->sew]); \ | ||
294 | +} | ||
295 | + | ||
296 | +GEN_OPIWX_WIDEN_TRANS(vwaddu_wx) | ||
297 | +GEN_OPIWX_WIDEN_TRANS(vwadd_wx) | ||
298 | +GEN_OPIWX_WIDEN_TRANS(vwsubu_wx) | ||
299 | +GEN_OPIWX_WIDEN_TRANS(vwsub_wx) | ||
300 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
301 | index XXXXXXX..XXXXXXX 100644 | ||
302 | --- a/target/riscv/vector_helper.c | ||
303 | +++ b/target/riscv/vector_helper.c | ||
304 | @@ -XXX,XX +XXX,XX @@ void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) | ||
305 | *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); | ||
306 | } | ||
307 | } | ||
308 | + | ||
309 | +/* Vector Widening Integer Add/Subtract */ | ||
310 | +#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t | ||
311 | +#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t | ||
312 | +#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t | ||
313 | +#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t | ||
314 | +#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t | ||
315 | +#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t | ||
316 | +#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t | ||
317 | +#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t | ||
318 | +#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t | ||
319 | +#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t | ||
320 | +#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t | ||
321 | +#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t | ||
322 | +RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) | ||
323 | +RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) | ||
324 | +RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) | ||
325 | +RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) | ||
326 | +RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) | ||
327 | +RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) | ||
328 | +RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) | ||
329 | +RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) | ||
330 | +RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) | ||
331 | +RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) | ||
332 | +RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) | ||
333 | +RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) | ||
334 | +RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) | ||
335 | +RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) | ||
336 | +RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) | ||
337 | +RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) | ||
338 | +RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) | ||
339 | +RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) | ||
340 | +RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) | ||
341 | +RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) | ||
342 | +RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) | ||
343 | +RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) | ||
344 | +RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) | ||
345 | +RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) | ||
346 | +GEN_VEXT_VV(vwaddu_vv_b, 1, 2, clearh) | ||
347 | +GEN_VEXT_VV(vwaddu_vv_h, 2, 4, clearl) | ||
348 | +GEN_VEXT_VV(vwaddu_vv_w, 4, 8, clearq) | ||
349 | +GEN_VEXT_VV(vwsubu_vv_b, 1, 2, clearh) | ||
350 | +GEN_VEXT_VV(vwsubu_vv_h, 2, 4, clearl) | ||
351 | +GEN_VEXT_VV(vwsubu_vv_w, 4, 8, clearq) | ||
352 | +GEN_VEXT_VV(vwadd_vv_b, 1, 2, clearh) | ||
353 | +GEN_VEXT_VV(vwadd_vv_h, 2, 4, clearl) | ||
354 | +GEN_VEXT_VV(vwadd_vv_w, 4, 8, clearq) | ||
355 | +GEN_VEXT_VV(vwsub_vv_b, 1, 2, clearh) | ||
356 | +GEN_VEXT_VV(vwsub_vv_h, 2, 4, clearl) | ||
357 | +GEN_VEXT_VV(vwsub_vv_w, 4, 8, clearq) | ||
358 | +GEN_VEXT_VV(vwaddu_wv_b, 1, 2, clearh) | ||
359 | +GEN_VEXT_VV(vwaddu_wv_h, 2, 4, clearl) | ||
360 | +GEN_VEXT_VV(vwaddu_wv_w, 4, 8, clearq) | ||
361 | +GEN_VEXT_VV(vwsubu_wv_b, 1, 2, clearh) | ||
362 | +GEN_VEXT_VV(vwsubu_wv_h, 2, 4, clearl) | ||
363 | +GEN_VEXT_VV(vwsubu_wv_w, 4, 8, clearq) | ||
364 | +GEN_VEXT_VV(vwadd_wv_b, 1, 2, clearh) | ||
365 | +GEN_VEXT_VV(vwadd_wv_h, 2, 4, clearl) | ||
366 | +GEN_VEXT_VV(vwadd_wv_w, 4, 8, clearq) | ||
367 | +GEN_VEXT_VV(vwsub_wv_b, 1, 2, clearh) | ||
368 | +GEN_VEXT_VV(vwsub_wv_h, 2, 4, clearl) | ||
369 | +GEN_VEXT_VV(vwsub_wv_w, 4, 8, clearq) | ||
370 | + | ||
371 | +RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) | ||
372 | +RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) | ||
373 | +RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) | ||
374 | +RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) | ||
375 | +RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) | ||
376 | +RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) | ||
377 | +RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) | ||
378 | +RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) | ||
379 | +RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) | ||
380 | +RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) | ||
381 | +RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) | ||
382 | +RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) | ||
383 | +RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) | ||
384 | +RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) | ||
385 | +RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) | ||
386 | +RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) | ||
387 | +RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) | ||
388 | +RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) | ||
389 | +RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) | ||
390 | +RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) | ||
391 | +RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) | ||
392 | +RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) | ||
393 | +RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) | ||
394 | +RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) | ||
395 | +GEN_VEXT_VX(vwaddu_vx_b, 1, 2, clearh) | ||
396 | +GEN_VEXT_VX(vwaddu_vx_h, 2, 4, clearl) | ||
397 | +GEN_VEXT_VX(vwaddu_vx_w, 4, 8, clearq) | ||
398 | +GEN_VEXT_VX(vwsubu_vx_b, 1, 2, clearh) | ||
399 | +GEN_VEXT_VX(vwsubu_vx_h, 2, 4, clearl) | ||
400 | +GEN_VEXT_VX(vwsubu_vx_w, 4, 8, clearq) | ||
401 | +GEN_VEXT_VX(vwadd_vx_b, 1, 2, clearh) | ||
402 | +GEN_VEXT_VX(vwadd_vx_h, 2, 4, clearl) | ||
403 | +GEN_VEXT_VX(vwadd_vx_w, 4, 8, clearq) | ||
404 | +GEN_VEXT_VX(vwsub_vx_b, 1, 2, clearh) | ||
405 | +GEN_VEXT_VX(vwsub_vx_h, 2, 4, clearl) | ||
406 | +GEN_VEXT_VX(vwsub_vx_w, 4, 8, clearq) | ||
407 | +GEN_VEXT_VX(vwaddu_wx_b, 1, 2, clearh) | ||
408 | +GEN_VEXT_VX(vwaddu_wx_h, 2, 4, clearl) | ||
409 | +GEN_VEXT_VX(vwaddu_wx_w, 4, 8, clearq) | ||
410 | +GEN_VEXT_VX(vwsubu_wx_b, 1, 2, clearh) | ||
411 | +GEN_VEXT_VX(vwsubu_wx_h, 2, 4, clearl) | ||
412 | +GEN_VEXT_VX(vwsubu_wx_w, 4, 8, clearq) | ||
413 | +GEN_VEXT_VX(vwadd_wx_b, 1, 2, clearh) | ||
414 | +GEN_VEXT_VX(vwadd_wx_h, 2, 4, clearl) | ||
415 | +GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq) | ||
416 | +GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh) | ||
417 | +GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl) | ||
418 | +GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq) | ||
419 | -- | 501 | -- |
420 | 2.27.0 | 502 | 2.41.0 |
421 | |||
422 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Lawrence Hunter <lawrence.hunter@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | This commit adds support for the Zvksh vector-crypto extension, which |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | consists of the following instructions: |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 5 | |
6 | Message-id: 20200623215920.2594-43-zhiwei_liu@c-sky.com | 6 | * vsm3me.vv |
7 | * vsm3c.vi | ||
8 | |||
9 | Translation functions are defined in | ||
10 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in | ||
11 | `target/riscv/vcrypto_helper.c`. | ||
12 | |||
13 | Co-authored-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
14 | [max.chou@sifive.com: Replaced vstart checking by TCG op] | ||
15 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
16 | Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
17 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
18 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
19 | [max.chou@sifive.com: Exposed x-zvksh property] | ||
20 | Message-ID: <20230711165917.2629866-12-max.chou@sifive.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 21 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 22 | --- |
9 | target/riscv/helper.h | 13 ++++++++++ | 23 | target/riscv/cpu_cfg.h | 1 + |
10 | target/riscv/insn32.decode | 4 +++ | 24 | target/riscv/helper.h | 3 + |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 6 +++++ | 25 | target/riscv/insn32.decode | 4 + |
12 | target/riscv/vector_helper.c | 33 +++++++++++++++++++++++++ | 26 | target/riscv/cpu.c | 6 +- |
13 | 4 files changed, 56 insertions(+) | 27 | target/riscv/vcrypto_helper.c | 134 +++++++++++++++++++++++ |
14 | 28 | target/riscv/insn_trans/trans_rvvk.c.inc | 31 ++++++ | |
29 | 6 files changed, 177 insertions(+), 2 deletions(-) | ||
30 | |||
31 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/target/riscv/cpu_cfg.h | ||
34 | +++ b/target/riscv/cpu_cfg.h | ||
35 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
36 | bool ext_zvkned; | ||
37 | bool ext_zvknha; | ||
38 | bool ext_zvknhb; | ||
39 | + bool ext_zvksh; | ||
40 | bool ext_zmmul; | ||
41 | bool ext_zvfbfmin; | ||
42 | bool ext_zvfbfwma; | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 43 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h |
16 | index XXXXXXX..XXXXXXX 100644 | 44 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 45 | --- a/target/riscv/helper.h |
18 | +++ b/target/riscv/helper.h | 46 | +++ b/target/riscv/helper.h |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32) | 47 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vsha2ch32_vv, void, ptr, ptr, ptr, env, i32) |
20 | DEF_HELPER_6(vfmerge_vfm_h, void, ptr, ptr, i64, ptr, env, i32) | 48 | DEF_HELPER_5(vsha2ch64_vv, void, ptr, ptr, ptr, env, i32) |
21 | DEF_HELPER_6(vfmerge_vfm_w, void, ptr, ptr, i64, ptr, env, i32) | 49 | DEF_HELPER_5(vsha2cl32_vv, void, ptr, ptr, ptr, env, i32) |
22 | DEF_HELPER_6(vfmerge_vfm_d, void, ptr, ptr, i64, ptr, env, i32) | 50 | DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32) |
23 | + | 51 | + |
24 | +DEF_HELPER_5(vfcvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32) | 52 | +DEF_HELPER_5(vsm3me_vv, void, ptr, ptr, ptr, env, i32) |
25 | +DEF_HELPER_5(vfcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32) | 53 | +DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32) |
26 | +DEF_HELPER_5(vfcvt_xu_f_v_d, void, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_5(vfcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_5(vfcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_5(vfcvt_x_f_v_d, void, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_5(vfcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_5(vfcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_5(vfcvt_f_xu_v_d, void, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_5(vfcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) | ||
34 | +DEF_HELPER_5(vfcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) | ||
35 | +DEF_HELPER_5(vfcvt_f_x_v_d, void, ptr, ptr, ptr, env, i32) | ||
36 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 54 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode |
37 | index XXXXXXX..XXXXXXX 100644 | 55 | index XXXXXXX..XXXXXXX 100644 |
38 | --- a/target/riscv/insn32.decode | 56 | --- a/target/riscv/insn32.decode |
39 | +++ b/target/riscv/insn32.decode | 57 | +++ b/target/riscv/insn32.decode |
40 | @@ -XXX,XX +XXX,XX @@ vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm | 58 | @@ -XXX,XX +XXX,XX @@ vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
41 | vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm | 59 | vsha2ms_vv 101101 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
42 | vfmerge_vfm 010111 0 ..... ..... 101 ..... 1010111 @r_vm_0 | 60 | vsha2ch_vv 101110 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
43 | vfmv_v_f 010111 1 00000 ..... 101 ..... 1010111 @r2 | 61 | vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
44 | +vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm | 62 | + |
45 | +vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm | 63 | +# *** Zvksh vector crypto extension *** |
46 | +vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm | 64 | +vsm3me_vv 100000 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
47 | +vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm | 65 | +vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
48 | 66 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | |
49 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | 67 | index XXXXXXX..XXXXXXX 100644 |
50 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | 68 | --- a/target/riscv/cpu.c |
51 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | 69 | +++ b/target/riscv/cpu.c |
52 | index XXXXXXX..XXXXXXX 100644 | 70 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { |
53 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 71 | ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), |
54 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 72 | ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha), |
55 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) | 73 | ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb), |
74 | + ISA_EXT_DATA_ENTRY(zvksh, PRIV_VERSION_1_12_0, ext_zvksh), | ||
75 | ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), | ||
76 | ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), | ||
77 | ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia), | ||
78 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
79 | * In principle Zve*x would also suffice here, were they supported | ||
80 | * in qemu | ||
81 | */ | ||
82 | - if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha) && | ||
83 | - !cpu->cfg.ext_zve32f) { | ||
84 | + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha || | ||
85 | + cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) { | ||
86 | error_setg(errp, | ||
87 | "Vector crypto extensions require V or Zve* extensions"); | ||
88 | return; | ||
89 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
90 | DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), | ||
91 | DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), | ||
92 | DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), | ||
93 | + DEFINE_PROP_BOOL("x-zvksh", RISCVCPU, cfg.ext_zvksh, false), | ||
94 | |||
95 | DEFINE_PROP_END_OF_LIST(), | ||
96 | }; | ||
97 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c | ||
98 | index XXXXXXX..XXXXXXX 100644 | ||
99 | --- a/target/riscv/vcrypto_helper.c | ||
100 | +++ b/target/riscv/vcrypto_helper.c | ||
101 | @@ -XXX,XX +XXX,XX @@ void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, | ||
102 | vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); | ||
103 | env->vstart = 0; | ||
104 | } | ||
105 | + | ||
106 | +static inline uint32_t p1(uint32_t x) | ||
107 | +{ | ||
108 | + return x ^ rol32(x, 15) ^ rol32(x, 23); | ||
109 | +} | ||
110 | + | ||
111 | +static inline uint32_t zvksh_w(uint32_t m16, uint32_t m9, uint32_t m3, | ||
112 | + uint32_t m13, uint32_t m6) | ||
113 | +{ | ||
114 | + return p1(m16 ^ m9 ^ rol32(m3, 15)) ^ rol32(m13, 7) ^ m6; | ||
115 | +} | ||
116 | + | ||
117 | +void HELPER(vsm3me_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr, | ||
118 | + CPURISCVState *env, uint32_t desc) | ||
119 | +{ | ||
120 | + uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW)); | ||
121 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
122 | + uint32_t vta = vext_vta(desc); | ||
123 | + uint32_t *vd = vd_vptr; | ||
124 | + uint32_t *vs1 = vs1_vptr; | ||
125 | + uint32_t *vs2 = vs2_vptr; | ||
126 | + | ||
127 | + for (int i = env->vstart / 8; i < env->vl / 8; i++) { | ||
128 | + uint32_t w[24]; | ||
129 | + for (int j = 0; j < 8; j++) { | ||
130 | + w[j] = bswap32(vs1[H4((i * 8) + j)]); | ||
131 | + w[j + 8] = bswap32(vs2[H4((i * 8) + j)]); | ||
132 | + } | ||
133 | + for (int j = 0; j < 8; j++) { | ||
134 | + w[j + 16] = | ||
135 | + zvksh_w(w[j], w[j + 7], w[j + 13], w[j + 3], w[j + 10]); | ||
136 | + } | ||
137 | + for (int j = 0; j < 8; j++) { | ||
138 | + vd[(i * 8) + j] = bswap32(w[H4(j + 16)]); | ||
139 | + } | ||
140 | + } | ||
141 | + vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz); | ||
142 | + env->vstart = 0; | ||
143 | +} | ||
144 | + | ||
145 | +static inline uint32_t ff1(uint32_t x, uint32_t y, uint32_t z) | ||
146 | +{ | ||
147 | + return x ^ y ^ z; | ||
148 | +} | ||
149 | + | ||
150 | +static inline uint32_t ff2(uint32_t x, uint32_t y, uint32_t z) | ||
151 | +{ | ||
152 | + return (x & y) | (x & z) | (y & z); | ||
153 | +} | ||
154 | + | ||
155 | +static inline uint32_t ff_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j) | ||
156 | +{ | ||
157 | + return (j <= 15) ? ff1(x, y, z) : ff2(x, y, z); | ||
158 | +} | ||
159 | + | ||
160 | +static inline uint32_t gg1(uint32_t x, uint32_t y, uint32_t z) | ||
161 | +{ | ||
162 | + return x ^ y ^ z; | ||
163 | +} | ||
164 | + | ||
165 | +static inline uint32_t gg2(uint32_t x, uint32_t y, uint32_t z) | ||
166 | +{ | ||
167 | + return (x & y) | (~x & z); | ||
168 | +} | ||
169 | + | ||
170 | +static inline uint32_t gg_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j) | ||
171 | +{ | ||
172 | + return (j <= 15) ? gg1(x, y, z) : gg2(x, y, z); | ||
173 | +} | ||
174 | + | ||
175 | +static inline uint32_t t_j(uint32_t j) | ||
176 | +{ | ||
177 | + return (j <= 15) ? 0x79cc4519 : 0x7a879d8a; | ||
178 | +} | ||
179 | + | ||
180 | +static inline uint32_t p_0(uint32_t x) | ||
181 | +{ | ||
182 | + return x ^ rol32(x, 9) ^ rol32(x, 17); | ||
183 | +} | ||
184 | + | ||
185 | +static void sm3c(uint32_t *vd, uint32_t *vs1, uint32_t *vs2, uint32_t uimm) | ||
186 | +{ | ||
187 | + uint32_t x0, x1; | ||
188 | + uint32_t j; | ||
189 | + uint32_t ss1, ss2, tt1, tt2; | ||
190 | + x0 = vs2[0] ^ vs2[4]; | ||
191 | + x1 = vs2[1] ^ vs2[5]; | ||
192 | + j = 2 * uimm; | ||
193 | + ss1 = rol32(rol32(vs1[0], 12) + vs1[4] + rol32(t_j(j), j % 32), 7); | ||
194 | + ss2 = ss1 ^ rol32(vs1[0], 12); | ||
195 | + tt1 = ff_j(vs1[0], vs1[1], vs1[2], j) + vs1[3] + ss2 + x0; | ||
196 | + tt2 = gg_j(vs1[4], vs1[5], vs1[6], j) + vs1[7] + ss1 + vs2[0]; | ||
197 | + vs1[3] = vs1[2]; | ||
198 | + vd[3] = rol32(vs1[1], 9); | ||
199 | + vs1[1] = vs1[0]; | ||
200 | + vd[1] = tt1; | ||
201 | + vs1[7] = vs1[6]; | ||
202 | + vd[7] = rol32(vs1[5], 19); | ||
203 | + vs1[5] = vs1[4]; | ||
204 | + vd[5] = p_0(tt2); | ||
205 | + j = 2 * uimm + 1; | ||
206 | + ss1 = rol32(rol32(vd[1], 12) + vd[5] + rol32(t_j(j), j % 32), 7); | ||
207 | + ss2 = ss1 ^ rol32(vd[1], 12); | ||
208 | + tt1 = ff_j(vd[1], vs1[1], vd[3], j) + vs1[3] + ss2 + x1; | ||
209 | + tt2 = gg_j(vd[5], vs1[5], vd[7], j) + vs1[7] + ss1 + vs2[1]; | ||
210 | + vd[2] = rol32(vs1[1], 9); | ||
211 | + vd[0] = tt1; | ||
212 | + vd[6] = rol32(vs1[5], 19); | ||
213 | + vd[4] = p_0(tt2); | ||
214 | +} | ||
215 | + | ||
216 | +void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, | ||
217 | + CPURISCVState *env, uint32_t desc) | ||
218 | +{ | ||
219 | + uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW)); | ||
220 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
221 | + uint32_t vta = vext_vta(desc); | ||
222 | + uint32_t *vd = vd_vptr; | ||
223 | + uint32_t *vs2 = vs2_vptr; | ||
224 | + uint32_t v1[8], v2[8], v3[8]; | ||
225 | + | ||
226 | + for (int i = env->vstart / 8; i < env->vl / 8; i++) { | ||
227 | + for (int k = 0; k < 8; k++) { | ||
228 | + v2[k] = bswap32(vd[H4(i * 8 + k)]); | ||
229 | + v3[k] = bswap32(vs2[H4(i * 8 + k)]); | ||
230 | + } | ||
231 | + sm3c(v1, v2, v3, uimm); | ||
232 | + for (int k = 0; k < 8; k++) { | ||
233 | + vd[i * 8 + k] = bswap32(v1[H4(k)]); | ||
234 | + } | ||
235 | + } | ||
236 | + vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz); | ||
237 | + env->vstart = 0; | ||
238 | +} | ||
239 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
240 | index XXXXXXX..XXXXXXX 100644 | ||
241 | --- a/target/riscv/insn_trans/trans_rvvk.c.inc | ||
242 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
243 | @@ -XXX,XX +XXX,XX @@ static bool trans_vsha2ch_vv(DisasContext *s, arg_rmrr *a) | ||
56 | } | 244 | } |
57 | return false; | 245 | return false; |
58 | } | 246 | } |
59 | + | 247 | + |
60 | +/* Single-Width Floating-Point/Integer Type-Convert Instructions */ | 248 | +/* |
61 | +GEN_OPFV_TRANS(vfcvt_xu_f_v, opfv_check) | 249 | + * Zvksh |
62 | +GEN_OPFV_TRANS(vfcvt_x_f_v, opfv_check) | 250 | + */ |
63 | +GEN_OPFV_TRANS(vfcvt_f_xu_v, opfv_check) | 251 | + |
64 | +GEN_OPFV_TRANS(vfcvt_f_x_v, opfv_check) | 252 | +#define ZVKSH_EGS 8 |
65 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 253 | + |
66 | index XXXXXXX..XXXXXXX 100644 | 254 | +static inline bool vsm3_check(DisasContext *s, arg_rmrr *a) |
67 | --- a/target/riscv/vector_helper.c | 255 | +{ |
68 | +++ b/target/riscv/vector_helper.c | 256 | + int egw_bytes = ZVKSH_EGS << s->sew; |
69 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | 257 | + int mult = 1 << MAX(s->lmul, 0); |
70 | GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh) | 258 | + return s->cfg_ptr->ext_zvksh == true && |
71 | GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4, clearl) | 259 | + require_rvv(s) && |
72 | GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8, clearq) | 260 | + vext_check_isa_ill(s) && |
73 | + | 261 | + !is_overlapped(a->rd, mult, a->rs2, mult) && |
74 | +/* Single-Width Floating-Point/Integer Type-Convert Instructions */ | 262 | + MAXSZ(s) >= egw_bytes && |
75 | +/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ | 263 | + s->sew == MO_32; |
76 | +RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) | 264 | +} |
77 | +RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) | 265 | + |
78 | +RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) | 266 | +static inline bool vsm3me_check(DisasContext *s, arg_rmrr *a) |
79 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2, clearh) | 267 | +{ |
80 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4, clearl) | 268 | + return vsm3_check(s, a) && vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm); |
81 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8, clearq) | 269 | +} |
82 | + | 270 | + |
83 | +/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ | 271 | +static inline bool vsm3c_check(DisasContext *s, arg_rmrr *a) |
84 | +RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) | 272 | +{ |
85 | +RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) | 273 | + return vsm3_check(s, a) && vext_check_ss(s, a->rd, a->rs2, a->vm); |
86 | +RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) | 274 | +} |
87 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2, clearh) | 275 | + |
88 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4, clearl) | 276 | +GEN_VV_UNMASKED_TRANS(vsm3me_vv, vsm3me_check, ZVKSH_EGS) |
89 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8, clearq) | 277 | +GEN_VI_UNMASKED_TRANS(vsm3c_vi, vsm3c_check, ZVKSH_EGS) |
90 | + | ||
91 | +/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ | ||
92 | +RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) | ||
93 | +RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) | ||
94 | +RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) | ||
95 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2, clearh) | ||
96 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4, clearl) | ||
97 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8, clearq) | ||
98 | + | ||
99 | +/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ | ||
100 | +RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) | ||
101 | +RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) | ||
102 | +RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) | ||
103 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2, clearh) | ||
104 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4, clearl) | ||
105 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8, clearq) | ||
106 | -- | 278 | -- |
107 | 2.27.0 | 279 | 2.41.0 |
108 | |||
109 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Nazar Kazakov <nazar.kazakov@codethink.co.uk> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | This commit adds support for the Zvkg vector-crypto extension, which |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | consists of the following instructions: |
5 | Message-id: 20200623215920.2594-59-zhiwei_liu@c-sky.com | 5 | |
6 | * vgmul.vv | ||
7 | * vghsh.vv | ||
8 | |||
9 | Translation functions are defined in | ||
10 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in | ||
11 | `target/riscv/vcrypto_helper.c`. | ||
12 | |||
13 | Co-authored-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
14 | [max.chou@sifive.com: Replaced vstart checking by TCG op] | ||
15 | Signed-off-by: Lawrence Hunter <lawrence.hunter@codethink.co.uk> | ||
16 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
17 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
18 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
19 | [max.chou@sifive.com: Exposed x-zvkg property] | ||
20 | [max.chou@sifive.com: Replaced uint by int for cross win32 build] | ||
21 | Message-ID: <20230711165917.2629866-13-max.chou@sifive.com> | ||
6 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 22 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
7 | --- | 23 | --- |
8 | target/riscv/helper.h | 17 ++++ | 24 | target/riscv/cpu_cfg.h | 1 + |
9 | target/riscv/insn32.decode | 6 ++ | 25 | target/riscv/helper.h | 3 + |
10 | target/riscv/insn_trans/trans_rvv.inc.c | 18 ++++ | 26 | target/riscv/insn32.decode | 4 ++ |
11 | target/riscv/vector_helper.c | 114 ++++++++++++++++++++++++ | 27 | target/riscv/cpu.c | 6 +- |
12 | 4 files changed, 155 insertions(+) | 28 | target/riscv/vcrypto_helper.c | 72 ++++++++++++++++++++++++ |
13 | 29 | target/riscv/insn_trans/trans_rvvk.c.inc | 30 ++++++++++ | |
30 | 6 files changed, 114 insertions(+), 2 deletions(-) | ||
31 | |||
32 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/riscv/cpu_cfg.h | ||
35 | +++ b/target/riscv/cpu_cfg.h | ||
36 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
37 | bool ext_zve64d; | ||
38 | bool ext_zvbb; | ||
39 | bool ext_zvbc; | ||
40 | + bool ext_zvkg; | ||
41 | bool ext_zvkned; | ||
42 | bool ext_zvknha; | ||
43 | bool ext_zvknhb; | ||
14 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 44 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h |
15 | index XXXXXXX..XXXXXXX 100644 | 45 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/riscv/helper.h | 46 | --- a/target/riscv/helper.h |
17 | +++ b/target/riscv/helper.h | 47 | +++ b/target/riscv/helper.h |
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vid_v_b, void, ptr, ptr, env, i32) | 48 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32) |
19 | DEF_HELPER_4(vid_v_h, void, ptr, ptr, env, i32) | 49 | |
20 | DEF_HELPER_4(vid_v_w, void, ptr, ptr, env, i32) | 50 | DEF_HELPER_5(vsm3me_vv, void, ptr, ptr, ptr, env, i32) |
21 | DEF_HELPER_4(vid_v_d, void, ptr, ptr, env, i32) | 51 | DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32) |
22 | + | 52 | + |
23 | +DEF_HELPER_6(vslideup_vx_b, void, ptr, ptr, tl, ptr, env, i32) | 53 | +DEF_HELPER_5(vghsh_vv, void, ptr, ptr, ptr, env, i32) |
24 | +DEF_HELPER_6(vslideup_vx_h, void, ptr, ptr, tl, ptr, env, i32) | 54 | +DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32) |
25 | +DEF_HELPER_6(vslideup_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vslideup_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vslidedown_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vslidedown_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vslidedown_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vslidedown_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vslide1up_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vslide1up_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vslide1up_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vslide1up_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vslide1down_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
39 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 55 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode |
40 | index XXXXXXX..XXXXXXX 100644 | 56 | index XXXXXXX..XXXXXXX 100644 |
41 | --- a/target/riscv/insn32.decode | 57 | --- a/target/riscv/insn32.decode |
42 | +++ b/target/riscv/insn32.decode | 58 | +++ b/target/riscv/insn32.decode |
43 | @@ -XXX,XX +XXX,XX @@ vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r | 59 | @@ -XXX,XX +XXX,XX @@ vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
44 | vmv_s_x 001101 1 00000 ..... 110 ..... 1010111 @r2 | 60 | # *** Zvksh vector crypto extension *** |
45 | vfmv_f_s 001100 1 ..... 00000 001 ..... 1010111 @r2rd | 61 | vsm3me_vv 100000 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
46 | vfmv_s_f 001101 1 00000 ..... 101 ..... 1010111 @r2 | 62 | vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
47 | +vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm | 63 | + |
48 | +vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm | 64 | +# *** Zvkg vector crypto extension *** |
49 | +vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm | 65 | +vghsh_vv 101100 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
50 | +vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm | 66 | +vgmul_vv 101000 1 ..... 10001 010 ..... 1110111 @r2_vm_1 |
51 | +vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm | 67 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
52 | +vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm | 68 | index XXXXXXX..XXXXXXX 100644 |
53 | 69 | --- a/target/riscv/cpu.c | |
54 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | 70 | +++ b/target/riscv/cpu.c |
55 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | 71 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { |
56 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | 72 | ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma), |
57 | index XXXXXXX..XXXXXXX 100644 | 73 | ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh), |
58 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 74 | ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin), |
59 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 75 | + ISA_EXT_DATA_ENTRY(zvkg, PRIV_VERSION_1_12_0, ext_zvkg), |
60 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) | 76 | ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), |
61 | } | 77 | ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha), |
62 | return false; | 78 | ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb), |
79 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) | ||
80 | * In principle Zve*x would also suffice here, were they supported | ||
81 | * in qemu | ||
82 | */ | ||
83 | - if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha || | ||
84 | - cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) { | ||
85 | + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned || | ||
86 | + cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) { | ||
87 | error_setg(errp, | ||
88 | "Vector crypto extensions require V or Zve* extensions"); | ||
89 | return; | ||
90 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { | ||
91 | /* Vector cryptography extensions */ | ||
92 | DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), | ||
93 | DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), | ||
94 | + DEFINE_PROP_BOOL("x-zvkg", RISCVCPU, cfg.ext_zvkg, false), | ||
95 | DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), | ||
96 | DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), | ||
97 | DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), | ||
98 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c | ||
99 | index XXXXXXX..XXXXXXX 100644 | ||
100 | --- a/target/riscv/vcrypto_helper.c | ||
101 | +++ b/target/riscv/vcrypto_helper.c | ||
102 | @@ -XXX,XX +XXX,XX @@ void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, | ||
103 | vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz); | ||
104 | env->vstart = 0; | ||
63 | } | 105 | } |
64 | + | 106 | + |
65 | +/* Vector Slide Instructions */ | 107 | +void HELPER(vghsh_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr, |
66 | +static bool slideup_check(DisasContext *s, arg_rmrr *a) | 108 | + CPURISCVState *env, uint32_t desc) |
67 | +{ | 109 | +{ |
68 | + return (vext_check_isa_ill(s) && | 110 | + uint64_t *vd = vd_vptr; |
69 | + vext_check_overlap_mask(s, a->rd, a->vm, true) && | 111 | + uint64_t *vs1 = vs1_vptr; |
70 | + vext_check_reg(s, a->rd, false) && | 112 | + uint64_t *vs2 = vs2_vptr; |
71 | + vext_check_reg(s, a->rs2, false) && | 113 | + uint32_t vta = vext_vta(desc); |
72 | + (a->rd != a->rs2)); | 114 | + uint32_t total_elems = vext_get_total_elems(env, desc, 4); |
73 | +} | 115 | + |
74 | + | 116 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { |
75 | +GEN_OPIVX_TRANS(vslideup_vx, slideup_check) | 117 | + uint64_t Y[2] = {vd[i * 2 + 0], vd[i * 2 + 1]}; |
76 | +GEN_OPIVX_TRANS(vslide1up_vx, slideup_check) | 118 | + uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])}; |
77 | +GEN_OPIVI_TRANS(vslideup_vi, 1, vslideup_vx, slideup_check) | 119 | + uint64_t X[2] = {vs1[i * 2 + 0], vs1[i * 2 + 1]}; |
78 | + | 120 | + uint64_t Z[2] = {0, 0}; |
79 | +GEN_OPIVX_TRANS(vslidedown_vx, opivx_check) | 121 | + |
80 | +GEN_OPIVX_TRANS(vslide1down_vx, opivx_check) | 122 | + uint64_t S[2] = {brev8(Y[0] ^ X[0]), brev8(Y[1] ^ X[1])}; |
81 | +GEN_OPIVI_TRANS(vslidedown_vi, 1, vslidedown_vx, opivx_check) | 123 | + |
82 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 124 | + for (int j = 0; j < 128; j++) { |
83 | index XXXXXXX..XXXXXXX 100644 | 125 | + if ((S[j / 64] >> (j % 64)) & 1) { |
84 | --- a/target/riscv/vector_helper.c | 126 | + Z[0] ^= H[0]; |
85 | +++ b/target/riscv/vector_helper.c | 127 | + Z[1] ^= H[1]; |
86 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb) | 128 | + } |
87 | GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh) | 129 | + bool reduce = ((H[1] >> 63) & 1); |
88 | GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl) | 130 | + H[1] = H[1] << 1 | H[0] >> 63; |
89 | GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq) | 131 | + H[0] = H[0] << 1; |
132 | + if (reduce) { | ||
133 | + H[0] ^= 0x87; | ||
134 | + } | ||
135 | + } | ||
136 | + | ||
137 | + vd[i * 2 + 0] = brev8(Z[0]); | ||
138 | + vd[i * 2 + 1] = brev8(Z[1]); | ||
139 | + } | ||
140 | + /* set tail elements to 1s */ | ||
141 | + vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4); | ||
142 | + env->vstart = 0; | ||
143 | +} | ||
144 | + | ||
145 | +void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env, | ||
146 | + uint32_t desc) | ||
147 | +{ | ||
148 | + uint64_t *vd = vd_vptr; | ||
149 | + uint64_t *vs2 = vs2_vptr; | ||
150 | + uint32_t vta = vext_vta(desc); | ||
151 | + uint32_t total_elems = vext_get_total_elems(env, desc, 4); | ||
152 | + | ||
153 | + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { | ||
154 | + uint64_t Y[2] = {brev8(vd[i * 2 + 0]), brev8(vd[i * 2 + 1])}; | ||
155 | + uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])}; | ||
156 | + uint64_t Z[2] = {0, 0}; | ||
157 | + | ||
158 | + for (int j = 0; j < 128; j++) { | ||
159 | + if ((Y[j / 64] >> (j % 64)) & 1) { | ||
160 | + Z[0] ^= H[0]; | ||
161 | + Z[1] ^= H[1]; | ||
162 | + } | ||
163 | + bool reduce = ((H[1] >> 63) & 1); | ||
164 | + H[1] = H[1] << 1 | H[0] >> 63; | ||
165 | + H[0] = H[0] << 1; | ||
166 | + if (reduce) { | ||
167 | + H[0] ^= 0x87; | ||
168 | + } | ||
169 | + } | ||
170 | + | ||
171 | + vd[i * 2 + 0] = brev8(Z[0]); | ||
172 | + vd[i * 2 + 1] = brev8(Z[1]); | ||
173 | + } | ||
174 | + /* set tail elements to 1s */ | ||
175 | + vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4); | ||
176 | + env->vstart = 0; | ||
177 | +} | ||
178 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
179 | index XXXXXXX..XXXXXXX 100644 | ||
180 | --- a/target/riscv/insn_trans/trans_rvvk.c.inc | ||
181 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
182 | @@ -XXX,XX +XXX,XX @@ static inline bool vsm3c_check(DisasContext *s, arg_rmrr *a) | ||
183 | |||
184 | GEN_VV_UNMASKED_TRANS(vsm3me_vv, vsm3me_check, ZVKSH_EGS) | ||
185 | GEN_VI_UNMASKED_TRANS(vsm3c_vi, vsm3c_check, ZVKSH_EGS) | ||
90 | + | 186 | + |
91 | +/* | 187 | +/* |
92 | + *** Vector Permutation Instructions | 188 | + * Zvkg |
93 | + */ | 189 | + */ |
94 | + | 190 | + |
95 | +/* Vector Slide Instructions */ | 191 | +#define ZVKG_EGS 4 |
96 | +#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H, CLEAR_FN) \ | 192 | + |
97 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | 193 | +static bool vgmul_check(DisasContext *s, arg_rmr *a) |
98 | + CPURISCVState *env, uint32_t desc) \ | 194 | +{ |
99 | +{ \ | 195 | + int egw_bytes = ZVKG_EGS << s->sew; |
100 | + uint32_t mlen = vext_mlen(desc); \ | 196 | + return s->cfg_ptr->ext_zvkg == true && |
101 | + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ | 197 | + vext_check_isa_ill(s) && |
102 | + uint32_t vm = vext_vm(desc); \ | 198 | + require_rvv(s) && |
103 | + uint32_t vl = env->vl; \ | 199 | + MAXSZ(s) >= egw_bytes && |
104 | + target_ulong offset = s1, i; \ | 200 | + vext_check_ss(s, a->rd, a->rs2, a->vm) && |
105 | + \ | 201 | + s->sew == MO_32; |
106 | + for (i = offset; i < vl; i++) { \ | 202 | +} |
107 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | 203 | + |
108 | + continue; \ | 204 | +GEN_V_UNMASKED_TRANS(vgmul_vv, vgmul_check, ZVKG_EGS) |
109 | + } \ | 205 | + |
110 | + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ | 206 | +static bool vghsh_check(DisasContext *s, arg_rmrr *a) |
111 | + } \ | 207 | +{ |
112 | + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ | 208 | + int egw_bytes = ZVKG_EGS << s->sew; |
113 | +} | 209 | + return s->cfg_ptr->ext_zvkg == true && |
114 | + | 210 | + opivv_check(s, a) && |
115 | +/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ | 211 | + MAXSZ(s) >= egw_bytes && |
116 | +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1, clearb) | 212 | + s->sew == MO_32; |
117 | +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2, clearh) | 213 | +} |
118 | +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4, clearl) | 214 | + |
119 | +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8, clearq) | 215 | +GEN_VV_UNMASKED_TRANS(vghsh_vv, vghsh_check, ZVKG_EGS) |
120 | + | ||
121 | +#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H, CLEAR_FN) \ | ||
122 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
123 | + CPURISCVState *env, uint32_t desc) \ | ||
124 | +{ \ | ||
125 | + uint32_t mlen = vext_mlen(desc); \ | ||
126 | + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ | ||
127 | + uint32_t vm = vext_vm(desc); \ | ||
128 | + uint32_t vl = env->vl; \ | ||
129 | + target_ulong offset = s1, i; \ | ||
130 | + \ | ||
131 | + for (i = 0; i < vl; ++i) { \ | ||
132 | + target_ulong j = i + offset; \ | ||
133 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
134 | + continue; \ | ||
135 | + } \ | ||
136 | + *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \ | ||
137 | + } \ | ||
138 | + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ | ||
139 | +} | ||
140 | + | ||
141 | +/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ | ||
142 | +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1, clearb) | ||
143 | +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2, clearh) | ||
144 | +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4, clearl) | ||
145 | +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8, clearq) | ||
146 | + | ||
147 | +#define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H, CLEAR_FN) \ | ||
148 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
149 | + CPURISCVState *env, uint32_t desc) \ | ||
150 | +{ \ | ||
151 | + uint32_t mlen = vext_mlen(desc); \ | ||
152 | + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ | ||
153 | + uint32_t vm = vext_vm(desc); \ | ||
154 | + uint32_t vl = env->vl; \ | ||
155 | + uint32_t i; \ | ||
156 | + \ | ||
157 | + for (i = 0; i < vl; i++) { \ | ||
158 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
159 | + continue; \ | ||
160 | + } \ | ||
161 | + if (i == 0) { \ | ||
162 | + *((ETYPE *)vd + H(i)) = s1; \ | ||
163 | + } else { \ | ||
164 | + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ | ||
165 | + } \ | ||
166 | + } \ | ||
167 | + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ | ||
168 | +} | ||
169 | + | ||
170 | +/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ | ||
171 | +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1, clearb) | ||
172 | +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2, clearh) | ||
173 | +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4, clearl) | ||
174 | +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8, clearq) | ||
175 | + | ||
176 | +#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H, CLEAR_FN) \ | ||
177 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
178 | + CPURISCVState *env, uint32_t desc) \ | ||
179 | +{ \ | ||
180 | + uint32_t mlen = vext_mlen(desc); \ | ||
181 | + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ | ||
182 | + uint32_t vm = vext_vm(desc); \ | ||
183 | + uint32_t vl = env->vl; \ | ||
184 | + uint32_t i; \ | ||
185 | + \ | ||
186 | + for (i = 0; i < vl; i++) { \ | ||
187 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
188 | + continue; \ | ||
189 | + } \ | ||
190 | + if (i == vl - 1) { \ | ||
191 | + *((ETYPE *)vd + H(i)) = s1; \ | ||
192 | + } else { \ | ||
193 | + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ | ||
194 | + } \ | ||
195 | + } \ | ||
196 | + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ | ||
197 | +} | ||
198 | + | ||
199 | +/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ | ||
200 | +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1, clearb) | ||
201 | +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2, clearh) | ||
202 | +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4, clearl) | ||
203 | +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq) | ||
204 | -- | 216 | -- |
205 | 2.27.0 | 217 | 2.41.0 |
206 | |||
207 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Max Chou <max.chou@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Allows sharing of sm4_subword between different targets. |
4 | |||
5 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
6 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 8 | Signed-off-by: Max Chou <max.chou@sifive.com> |
6 | Message-id: 20200623215920.2594-48-zhiwei_liu@c-sky.com | 9 | Message-ID: <20230711165917.2629866-14-max.chou@sifive.com> |
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 11 | --- |
9 | target/riscv/helper.h | 10 +++++++ | 12 | include/crypto/sm4.h | 8 ++++++++ |
10 | target/riscv/insn32.decode | 4 +++ | 13 | target/arm/tcg/crypto_helper.c | 10 ++-------- |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 5 ++++ | 14 | 2 files changed, 10 insertions(+), 8 deletions(-) |
12 | target/riscv/vector_helper.c | 39 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 58 insertions(+) | ||
14 | 15 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 16 | diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h |
16 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 18 | --- a/include/crypto/sm4.h |
18 | +++ b/target/riscv/helper.h | 19 | +++ b/include/crypto/sm4.h |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwredsumu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | 20 | @@ -XXX,XX +XXX,XX @@ |
20 | DEF_HELPER_6(vwredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32) | 21 | |
21 | DEF_HELPER_6(vwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | 22 | extern const uint8_t sm4_sbox[256]; |
22 | DEF_HELPER_6(vwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | 23 | |
23 | + | 24 | +static inline uint32_t sm4_subword(uint32_t word) |
24 | +DEF_HELPER_6(vfredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | 25 | +{ |
25 | +DEF_HELPER_6(vfredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | 26 | + return sm4_sbox[word & 0xff] | |
26 | +DEF_HELPER_6(vfredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | 27 | + sm4_sbox[(word >> 8) & 0xff] << 8 | |
27 | +DEF_HELPER_6(vfredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | 28 | + sm4_sbox[(word >> 16) & 0xff] << 16 | |
28 | +DEF_HELPER_6(vfredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | 29 | + sm4_sbox[(word >> 24) & 0xff] << 24; |
29 | +DEF_HELPER_6(vfredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vfredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vfredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/target/riscv/insn32.decode | ||
36 | +++ b/target/riscv/insn32.decode | ||
37 | @@ -XXX,XX +XXX,XX @@ vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm | ||
38 | vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm | ||
39 | vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm | ||
40 | vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm | ||
41 | +# Vector ordered and unordered reduction sum | ||
42 | +vfredsum_vs 0000-1 . ..... ..... 001 ..... 1010111 @r_vm | ||
43 | +vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm | ||
44 | +vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm | ||
45 | |||
46 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
47 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
48 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
51 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
52 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVV_TRANS(vredxor_vs, reduction_check) | ||
53 | /* Vector Widening Integer Reduction Instructions */ | ||
54 | GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_check) | ||
55 | GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_check) | ||
56 | + | ||
57 | +/* Vector Single-Width Floating-Point Reduction Instructions */ | ||
58 | +GEN_OPFVV_TRANS(vfredsum_vs, reduction_check) | ||
59 | +GEN_OPFVV_TRANS(vfredmax_vs, reduction_check) | ||
60 | +GEN_OPFVV_TRANS(vfredmin_vs, reduction_check) | ||
61 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
62 | index XXXXXXX..XXXXXXX 100644 | ||
63 | --- a/target/riscv/vector_helper.c | ||
64 | +++ b/target/riscv/vector_helper.c | ||
65 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD, clearq) | ||
66 | GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD, clearh) | ||
67 | GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD, clearl) | ||
68 | GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD, clearq) | ||
69 | + | ||
70 | +/* Vector Single-Width Floating-Point Reduction Instructions */ | ||
71 | +#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\ | ||
72 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
73 | + void *vs2, CPURISCVState *env, \ | ||
74 | + uint32_t desc) \ | ||
75 | +{ \ | ||
76 | + uint32_t mlen = vext_mlen(desc); \ | ||
77 | + uint32_t vm = vext_vm(desc); \ | ||
78 | + uint32_t vl = env->vl; \ | ||
79 | + uint32_t i; \ | ||
80 | + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \ | ||
81 | + TD s1 = *((TD *)vs1 + HD(0)); \ | ||
82 | + \ | ||
83 | + for (i = 0; i < vl; i++) { \ | ||
84 | + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ | ||
85 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
86 | + continue; \ | ||
87 | + } \ | ||
88 | + s1 = OP(s1, (TD)s2, &env->fp_status); \ | ||
89 | + } \ | ||
90 | + *((TD *)vd + HD(0)) = s1; \ | ||
91 | + CLEAR_FN(vd, 1, sizeof(TD), tot); \ | ||
92 | +} | 30 | +} |
93 | + | 31 | + |
94 | +/* Unordered sum */ | 32 | #endif |
95 | +GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add, clearh) | 33 | diff --git a/target/arm/tcg/crypto_helper.c b/target/arm/tcg/crypto_helper.c |
96 | +GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add, clearl) | 34 | index XXXXXXX..XXXXXXX 100644 |
97 | +GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add, clearq) | 35 | --- a/target/arm/tcg/crypto_helper.c |
98 | + | 36 | +++ b/target/arm/tcg/crypto_helper.c |
99 | +/* Maximum value */ | 37 | @@ -XXX,XX +XXX,XX @@ static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) |
100 | +GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum, clearh) | 38 | CR_ST_WORD(d, (i + 3) % 4) ^ |
101 | +GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum, clearl) | 39 | CR_ST_WORD(n, i); |
102 | +GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, clearq) | 40 | |
103 | + | 41 | - t = sm4_sbox[t & 0xff] | |
104 | +/* Minimum value */ | 42 | - sm4_sbox[(t >> 8) & 0xff] << 8 | |
105 | +GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, clearh) | 43 | - sm4_sbox[(t >> 16) & 0xff] << 16 | |
106 | +GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, clearl) | 44 | - sm4_sbox[(t >> 24) & 0xff] << 24; |
107 | +GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, clearq) | 45 | + t = sm4_subword(t); |
46 | |||
47 | CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ | ||
48 | rol32(t, 24); | ||
49 | @@ -XXX,XX +XXX,XX @@ static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) | ||
50 | CR_ST_WORD(d, (i + 3) % 4) ^ | ||
51 | CR_ST_WORD(m, i); | ||
52 | |||
53 | - t = sm4_sbox[t & 0xff] | | ||
54 | - sm4_sbox[(t >> 8) & 0xff] << 8 | | ||
55 | - sm4_sbox[(t >> 16) & 0xff] << 16 | | ||
56 | - sm4_sbox[(t >> 24) & 0xff] << 24; | ||
57 | + t = sm4_subword(t); | ||
58 | |||
59 | CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); | ||
60 | } | ||
108 | -- | 61 | -- |
109 | 2.27.0 | 62 | 2.41.0 |
110 | |||
111 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Max Chou <max.chou@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Adds sm4_ck constant for use in sm4 cryptography across different targets. |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 5 | Signed-off-by: Max Chou <max.chou@sifive.com> |
6 | Message-id: 20200623215920.2594-47-zhiwei_liu@c-sky.com | 6 | Reviewed-by: Frank Chang <frank.chang@sifive.com> |
7 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
8 | Message-ID: <20230711165917.2629866-15-max.chou@sifive.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 9 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 10 | --- |
9 | target/riscv/helper.h | 7 +++++++ | 11 | include/crypto/sm4.h | 1 + |
10 | target/riscv/insn32.decode | 2 ++ | 12 | crypto/sm4.c | 10 ++++++++++ |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 4 ++++ | 13 | 2 files changed, 11 insertions(+) |
12 | target/riscv/vector_helper.c | 11 +++++++++++ | ||
13 | 4 files changed, 24 insertions(+) | ||
14 | 14 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 15 | diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h |
16 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 17 | --- a/include/crypto/sm4.h |
18 | +++ b/target/riscv/helper.h | 18 | +++ b/include/crypto/sm4.h |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vredxor_vs_b, void, ptr, ptr, ptr, ptr, env, i32) | 19 | @@ -XXX,XX +XXX,XX @@ |
20 | DEF_HELPER_6(vredxor_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | 20 | #define QEMU_SM4_H |
21 | DEF_HELPER_6(vredxor_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | 21 | |
22 | DEF_HELPER_6(vredxor_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | 22 | extern const uint8_t sm4_sbox[256]; |
23 | + | 23 | +extern const uint32_t sm4_ck[32]; |
24 | +DEF_HELPER_6(vwredsumu_vs_b, void, ptr, ptr, ptr, ptr, env, i32) | 24 | |
25 | +DEF_HELPER_6(vwredsumu_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | 25 | static inline uint32_t sm4_subword(uint32_t word) |
26 | +DEF_HELPER_6(vwredsumu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | 26 | { |
27 | +DEF_HELPER_6(vwredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32) | 27 | diff --git a/crypto/sm4.c b/crypto/sm4.c |
28 | +DEF_HELPER_6(vwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
31 | index XXXXXXX..XXXXXXX 100644 | 28 | index XXXXXXX..XXXXXXX 100644 |
32 | --- a/target/riscv/insn32.decode | 29 | --- a/crypto/sm4.c |
33 | +++ b/target/riscv/insn32.decode | 30 | +++ b/crypto/sm4.c |
34 | @@ -XXX,XX +XXX,XX @@ vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm | 31 | @@ -XXX,XX +XXX,XX @@ uint8_t const sm4_sbox[] = { |
35 | vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm | 32 | 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, |
36 | vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm | 33 | }; |
37 | vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm | 34 | |
38 | +vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm | 35 | +uint32_t const sm4_ck[] = { |
39 | +vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm | 36 | + 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, |
40 | 37 | + 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, | |
41 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | 38 | + 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, |
42 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | 39 | + 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, |
43 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | 40 | + 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, |
44 | index XXXXXXX..XXXXXXX 100644 | 41 | + 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, |
45 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 42 | + 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, |
46 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 43 | + 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 |
47 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVV_TRANS(vredmin_vs, reduction_check) | 44 | +}; |
48 | GEN_OPIVV_TRANS(vredand_vs, reduction_check) | ||
49 | GEN_OPIVV_TRANS(vredor_vs, reduction_check) | ||
50 | GEN_OPIVV_TRANS(vredxor_vs, reduction_check) | ||
51 | + | ||
52 | +/* Vector Widening Integer Reduction Instructions */ | ||
53 | +GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_check) | ||
54 | +GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_check) | ||
55 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/riscv/vector_helper.c | ||
58 | +++ b/target/riscv/vector_helper.c | ||
59 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR, clearb) | ||
60 | GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR, clearh) | ||
61 | GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR, clearl) | ||
62 | GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR, clearq) | ||
63 | + | ||
64 | +/* Vector Widening Integer Reduction Instructions */ | ||
65 | +/* signed sum reduction into double-width accumulator */ | ||
66 | +GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD, clearh) | ||
67 | +GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD, clearl) | ||
68 | +GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD, clearq) | ||
69 | + | ||
70 | +/* Unsigned sum reduction into double-width accumulator */ | ||
71 | +GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD, clearh) | ||
72 | +GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD, clearl) | ||
73 | +GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD, clearq) | ||
74 | -- | 45 | -- |
75 | 2.27.0 | 46 | 2.41.0 |
76 | |||
77 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Max Chou <max.chou@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | This commit adds support for the Zvksed vector-crypto extension, which |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | consists of the following instructions: |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 5 | |
6 | Message-id: 20200623215920.2594-31-zhiwei_liu@c-sky.com | 6 | * vsm4k.vi |
7 | * vsm4r.[vv,vs] | ||
8 | |||
9 | Translation functions are defined in | ||
10 | `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in | ||
11 | `target/riscv/vcrypto_helper.c`. | ||
12 | |||
13 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
14 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
15 | [lawrence.hunter@codethink.co.uk: Moved SM4 functions from | ||
16 | crypto_helper.c to vcrypto_helper.c] | ||
17 | [nazar.kazakov@codethink.co.uk: Added alignment checks, refactored code to | ||
18 | use macros, and minor style changes] | ||
19 | Signed-off-by: Max Chou <max.chou@sifive.com> | ||
20 | Message-ID: <20230711165917.2629866-16-max.chou@sifive.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 21 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 22 | --- |
9 | target/riscv/helper.h | 16 ++++ | 23 | target/riscv/cpu_cfg.h | 1 + |
10 | target/riscv/insn32.decode | 5 + | 24 | target/riscv/helper.h | 4 + |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 118 ++++++++++++++++++++++++ | 25 | target/riscv/insn32.decode | 5 + |
12 | target/riscv/vector_helper.c | 111 ++++++++++++++++++++++ | 26 | target/riscv/cpu.c | 5 +- |
13 | 4 files changed, 250 insertions(+) | 27 | target/riscv/vcrypto_helper.c | 127 +++++++++++++++++++++++ |
14 | 28 | target/riscv/insn_trans/trans_rvvk.c.inc | 43 ++++++++ | |
29 | 6 files changed, 184 insertions(+), 1 deletion(-) | ||
30 | |||
31 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/target/riscv/cpu_cfg.h | ||
34 | +++ b/target/riscv/cpu_cfg.h | ||
35 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
36 | bool ext_zvkned; | ||
37 | bool ext_zvknha; | ||
38 | bool ext_zvknhb; | ||
39 | + bool ext_zvksed; | ||
40 | bool ext_zvksh; | ||
41 | bool ext_zmmul; | ||
42 | bool ext_zvfbfmin; | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 43 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h |
16 | index XXXXXXX..XXXXXXX 100644 | 44 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 45 | --- a/target/riscv/helper.h |
18 | +++ b/target/riscv/helper.h | 46 | +++ b/target/riscv/helper.h |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vnclipu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | 47 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32) |
20 | DEF_HELPER_6(vnclip_vx_b, void, ptr, ptr, tl, ptr, env, i32) | 48 | |
21 | DEF_HELPER_6(vnclip_vx_h, void, ptr, ptr, tl, ptr, env, i32) | 49 | DEF_HELPER_5(vghsh_vv, void, ptr, ptr, ptr, env, i32) |
22 | DEF_HELPER_6(vnclip_vx_w, void, ptr, ptr, tl, ptr, env, i32) | 50 | DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32) |
23 | + | 51 | + |
24 | +DEF_HELPER_6(vfadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 52 | +DEF_HELPER_5(vsm4k_vi, void, ptr, ptr, i32, env, i32) |
25 | +DEF_HELPER_6(vfadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 53 | +DEF_HELPER_4(vsm4r_vv, void, ptr, ptr, env, i32) |
26 | +DEF_HELPER_6(vfadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | 54 | +DEF_HELPER_4(vsm4r_vs, void, ptr, ptr, env, i32) |
27 | +DEF_HELPER_6(vfsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vfsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vfsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vfadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vfadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vfadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vfsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vfsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vfsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vfrsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vfrsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vfrsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
39 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 55 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode |
40 | index XXXXXXX..XXXXXXX 100644 | 56 | index XXXXXXX..XXXXXXX 100644 |
41 | --- a/target/riscv/insn32.decode | 57 | --- a/target/riscv/insn32.decode |
42 | +++ b/target/riscv/insn32.decode | 58 | +++ b/target/riscv/insn32.decode |
43 | @@ -XXX,XX +XXX,XX @@ vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm | 59 | @@ -XXX,XX +XXX,XX @@ vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
44 | vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm | 60 | # *** Zvkg vector crypto extension *** |
45 | vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm | 61 | vghsh_vv 101100 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
46 | vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm | 62 | vgmul_vv 101000 1 ..... 10001 010 ..... 1110111 @r2_vm_1 |
47 | +vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm | 63 | + |
48 | +vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm | 64 | +# *** Zvksed vector crypto extension *** |
49 | +vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm | 65 | +vsm4k_vi 100001 1 ..... ..... 010 ..... 1110111 @r_vm_1 |
50 | +vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm | 66 | +vsm4r_vv 101000 1 ..... 10000 010 ..... 1110111 @r2_vm_1 |
51 | +vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm | 67 | +vsm4r_vs 101001 1 ..... 10000 010 ..... 1110111 @r2_vm_1 |
52 | 68 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | |
53 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | 69 | index XXXXXXX..XXXXXXX 100644 |
54 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | 70 | --- a/target/riscv/cpu.c |
55 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | 71 | +++ b/target/riscv/cpu.c |
56 | index XXXXXXX..XXXXXXX 100644 | 72 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { |
57 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 73 | ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), |
58 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 74 | ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha), |
59 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_NARROW_TRANS(vnclipu_vx) | 75 | ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb), |
60 | GEN_OPIVX_NARROW_TRANS(vnclip_vx) | 76 | + ISA_EXT_DATA_ENTRY(zvksed, PRIV_VERSION_1_12_0, ext_zvksed), |
61 | GEN_OPIVI_NARROW_TRANS(vnclipu_vi, 1, vnclipu_vx) | 77 | ISA_EXT_DATA_ENTRY(zvksh, PRIV_VERSION_1_12_0, ext_zvksh), |
62 | GEN_OPIVI_NARROW_TRANS(vnclip_vi, 1, vnclip_vx) | 78 | ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), |
63 | + | 79 | ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), |
64 | +/* | 80 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) |
65 | + *** Vector Float Point Arithmetic Instructions | 81 | * in qemu |
66 | + */ | 82 | */ |
67 | +/* Vector Single-Width Floating-Point Add/Subtract Instructions */ | 83 | if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned || |
68 | + | 84 | - cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) { |
69 | +/* | 85 | + cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed || cpu->cfg.ext_zvksh) && |
70 | + * If the current SEW does not correspond to a supported IEEE floating-point | 86 | + !cpu->cfg.ext_zve32f) { |
71 | + * type, an illegal instruction exception is raised. | 87 | error_setg(errp, |
72 | + */ | 88 | "Vector crypto extensions require V or Zve* extensions"); |
73 | +static bool opfvv_check(DisasContext *s, arg_rmrr *a) | 89 | return; |
74 | +{ | 90 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { |
75 | + return (vext_check_isa_ill(s) && | 91 | DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), |
76 | + vext_check_overlap_mask(s, a->rd, a->vm, false) && | 92 | DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), |
77 | + vext_check_reg(s, a->rd, false) && | 93 | DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), |
78 | + vext_check_reg(s, a->rs2, false) && | 94 | + DEFINE_PROP_BOOL("x-zvksed", RISCVCPU, cfg.ext_zvksed, false), |
79 | + vext_check_reg(s, a->rs1, false) && | 95 | DEFINE_PROP_BOOL("x-zvksh", RISCVCPU, cfg.ext_zvksh, false), |
80 | + (s->sew != 0)); | 96 | |
81 | +} | 97 | DEFINE_PROP_END_OF_LIST(), |
82 | + | 98 | diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c |
83 | +/* OPFVV without GVEC IR */ | 99 | index XXXXXXX..XXXXXXX 100644 |
84 | +#define GEN_OPFVV_TRANS(NAME, CHECK) \ | 100 | --- a/target/riscv/vcrypto_helper.c |
85 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | 101 | +++ b/target/riscv/vcrypto_helper.c |
86 | +{ \ | ||
87 | + if (CHECK(s, a)) { \ | ||
88 | + uint32_t data = 0; \ | ||
89 | + static gen_helper_gvec_4_ptr * const fns[3] = { \ | ||
90 | + gen_helper_##NAME##_h, \ | ||
91 | + gen_helper_##NAME##_w, \ | ||
92 | + gen_helper_##NAME##_d, \ | ||
93 | + }; \ | ||
94 | + TCGLabel *over = gen_new_label(); \ | ||
95 | + gen_set_rm(s, 7); \ | ||
96 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
97 | + \ | ||
98 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ | ||
99 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
100 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
101 | + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
102 | + vreg_ofs(s, a->rs1), \ | ||
103 | + vreg_ofs(s, a->rs2), cpu_env, 0, \ | ||
104 | + s->vlen / 8, data, fns[s->sew - 1]); \ | ||
105 | + gen_set_label(over); \ | ||
106 | + return true; \ | ||
107 | + } \ | ||
108 | + return false; \ | ||
109 | +} | ||
110 | +GEN_OPFVV_TRANS(vfadd_vv, opfvv_check) | ||
111 | +GEN_OPFVV_TRANS(vfsub_vv, opfvv_check) | ||
112 | + | ||
113 | +typedef void gen_helper_opfvf(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr, | ||
114 | + TCGv_env, TCGv_i32); | ||
115 | + | ||
116 | +static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, | ||
117 | + uint32_t data, gen_helper_opfvf *fn, DisasContext *s) | ||
118 | +{ | ||
119 | + TCGv_ptr dest, src2, mask; | ||
120 | + TCGv_i32 desc; | ||
121 | + | ||
122 | + TCGLabel *over = gen_new_label(); | ||
123 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
124 | + | ||
125 | + dest = tcg_temp_new_ptr(); | ||
126 | + mask = tcg_temp_new_ptr(); | ||
127 | + src2 = tcg_temp_new_ptr(); | ||
128 | + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); | ||
129 | + | ||
130 | + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); | ||
131 | + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2)); | ||
132 | + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); | ||
133 | + | ||
134 | + fn(dest, mask, cpu_fpr[rs1], src2, cpu_env, desc); | ||
135 | + | ||
136 | + tcg_temp_free_ptr(dest); | ||
137 | + tcg_temp_free_ptr(mask); | ||
138 | + tcg_temp_free_ptr(src2); | ||
139 | + tcg_temp_free_i32(desc); | ||
140 | + gen_set_label(over); | ||
141 | + return true; | ||
142 | +} | ||
143 | + | ||
144 | +static bool opfvf_check(DisasContext *s, arg_rmrr *a) | ||
145 | +{ | ||
146 | +/* | ||
147 | + * If the current SEW does not correspond to a supported IEEE floating-point | ||
148 | + * type, an illegal instruction exception is raised | ||
149 | + */ | ||
150 | + return (vext_check_isa_ill(s) && | ||
151 | + vext_check_overlap_mask(s, a->rd, a->vm, false) && | ||
152 | + vext_check_reg(s, a->rd, false) && | ||
153 | + vext_check_reg(s, a->rs2, false) && | ||
154 | + (s->sew != 0)); | ||
155 | +} | ||
156 | + | ||
157 | +/* OPFVF without GVEC IR */ | ||
158 | +#define GEN_OPFVF_TRANS(NAME, CHECK) \ | ||
159 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
160 | +{ \ | ||
161 | + if (CHECK(s, a)) { \ | ||
162 | + uint32_t data = 0; \ | ||
163 | + static gen_helper_opfvf *const fns[3] = { \ | ||
164 | + gen_helper_##NAME##_h, \ | ||
165 | + gen_helper_##NAME##_w, \ | ||
166 | + gen_helper_##NAME##_d, \ | ||
167 | + }; \ | ||
168 | + gen_set_rm(s, 7); \ | ||
169 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ | ||
170 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
171 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
172 | + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ | ||
173 | + fns[s->sew - 1], s); \ | ||
174 | + } \ | ||
175 | + return false; \ | ||
176 | +} | ||
177 | + | ||
178 | +GEN_OPFVF_TRANS(vfadd_vf, opfvf_check) | ||
179 | +GEN_OPFVF_TRANS(vfsub_vf, opfvf_check) | ||
180 | +GEN_OPFVF_TRANS(vfrsub_vf, opfvf_check) | ||
181 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
182 | index XXXXXXX..XXXXXXX 100644 | ||
183 | --- a/target/riscv/vector_helper.c | ||
184 | +++ b/target/riscv/vector_helper.c | ||
185 | @@ -XXX,XX +XXX,XX @@ | 102 | @@ -XXX,XX +XXX,XX @@ |
103 | #include "cpu.h" | ||
104 | #include "crypto/aes.h" | ||
105 | #include "crypto/aes-round.h" | ||
106 | +#include "crypto/sm4.h" | ||
186 | #include "exec/memop.h" | 107 | #include "exec/memop.h" |
187 | #include "exec/exec-all.h" | 108 | #include "exec/exec-all.h" |
188 | #include "exec/helper-proto.h" | 109 | #include "exec/helper-proto.h" |
189 | +#include "fpu/softfloat.h" | 110 | @@ -XXX,XX +XXX,XX @@ void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env, |
190 | #include "tcg/tcg-gvec-desc.h" | 111 | vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4); |
191 | #include "internals.h" | 112 | env->vstart = 0; |
192 | #include <math.h> | 113 | } |
193 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) | 114 | + |
194 | GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1, clearb) | 115 | +void HELPER(vsm4k_vi)(void *vd, void *vs2, uint32_t uimm5, CPURISCVState *env, |
195 | GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2, clearh) | 116 | + uint32_t desc) |
196 | GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4, clearl) | 117 | +{ |
118 | + const uint32_t egs = 4; | ||
119 | + uint32_t rnd = uimm5 & 0x7; | ||
120 | + uint32_t group_start = env->vstart / egs; | ||
121 | + uint32_t group_end = env->vl / egs; | ||
122 | + uint32_t esz = sizeof(uint32_t); | ||
123 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
124 | + | ||
125 | + for (uint32_t i = group_start; i < group_end; ++i) { | ||
126 | + uint32_t vstart = i * egs; | ||
127 | + uint32_t vend = (i + 1) * egs; | ||
128 | + uint32_t rk[4] = {0}; | ||
129 | + uint32_t tmp[8] = {0}; | ||
130 | + | ||
131 | + for (uint32_t j = vstart; j < vend; ++j) { | ||
132 | + rk[j - vstart] = *((uint32_t *)vs2 + H4(j)); | ||
133 | + } | ||
134 | + | ||
135 | + for (uint32_t j = 0; j < egs; ++j) { | ||
136 | + tmp[j] = rk[j]; | ||
137 | + } | ||
138 | + | ||
139 | + for (uint32_t j = 0; j < egs; ++j) { | ||
140 | + uint32_t b, s; | ||
141 | + b = tmp[j + 1] ^ tmp[j + 2] ^ tmp[j + 3] ^ sm4_ck[rnd * 4 + j]; | ||
142 | + | ||
143 | + s = sm4_subword(b); | ||
144 | + | ||
145 | + tmp[j + 4] = tmp[j] ^ (s ^ rol32(s, 13) ^ rol32(s, 23)); | ||
146 | + } | ||
147 | + | ||
148 | + for (uint32_t j = vstart; j < vend; ++j) { | ||
149 | + *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)]; | ||
150 | + } | ||
151 | + } | ||
152 | + | ||
153 | + env->vstart = 0; | ||
154 | + /* set tail elements to 1s */ | ||
155 | + vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz); | ||
156 | +} | ||
157 | + | ||
158 | +static void do_sm4_round(uint32_t *rk, uint32_t *buf) | ||
159 | +{ | ||
160 | + const uint32_t egs = 4; | ||
161 | + uint32_t s, b; | ||
162 | + | ||
163 | + for (uint32_t j = egs; j < egs * 2; ++j) { | ||
164 | + b = buf[j - 3] ^ buf[j - 2] ^ buf[j - 1] ^ rk[j - 4]; | ||
165 | + | ||
166 | + s = sm4_subword(b); | ||
167 | + | ||
168 | + buf[j] = buf[j - 4] ^ (s ^ rol32(s, 2) ^ rol32(s, 10) ^ rol32(s, 18) ^ | ||
169 | + rol32(s, 24)); | ||
170 | + } | ||
171 | +} | ||
172 | + | ||
173 | +void HELPER(vsm4r_vv)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) | ||
174 | +{ | ||
175 | + const uint32_t egs = 4; | ||
176 | + uint32_t group_start = env->vstart / egs; | ||
177 | + uint32_t group_end = env->vl / egs; | ||
178 | + uint32_t esz = sizeof(uint32_t); | ||
179 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
180 | + | ||
181 | + for (uint32_t i = group_start; i < group_end; ++i) { | ||
182 | + uint32_t vstart = i * egs; | ||
183 | + uint32_t vend = (i + 1) * egs; | ||
184 | + uint32_t rk[4] = {0}; | ||
185 | + uint32_t tmp[8] = {0}; | ||
186 | + | ||
187 | + for (uint32_t j = vstart; j < vend; ++j) { | ||
188 | + rk[j - vstart] = *((uint32_t *)vs2 + H4(j)); | ||
189 | + } | ||
190 | + | ||
191 | + for (uint32_t j = vstart; j < vend; ++j) { | ||
192 | + tmp[j - vstart] = *((uint32_t *)vd + H4(j)); | ||
193 | + } | ||
194 | + | ||
195 | + do_sm4_round(rk, tmp); | ||
196 | + | ||
197 | + for (uint32_t j = vstart; j < vend; ++j) { | ||
198 | + *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)]; | ||
199 | + } | ||
200 | + } | ||
201 | + | ||
202 | + env->vstart = 0; | ||
203 | + /* set tail elements to 1s */ | ||
204 | + vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz); | ||
205 | +} | ||
206 | + | ||
207 | +void HELPER(vsm4r_vs)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) | ||
208 | +{ | ||
209 | + const uint32_t egs = 4; | ||
210 | + uint32_t group_start = env->vstart / egs; | ||
211 | + uint32_t group_end = env->vl / egs; | ||
212 | + uint32_t esz = sizeof(uint32_t); | ||
213 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
214 | + | ||
215 | + for (uint32_t i = group_start; i < group_end; ++i) { | ||
216 | + uint32_t vstart = i * egs; | ||
217 | + uint32_t vend = (i + 1) * egs; | ||
218 | + uint32_t rk[4] = {0}; | ||
219 | + uint32_t tmp[8] = {0}; | ||
220 | + | ||
221 | + for (uint32_t j = 0; j < egs; ++j) { | ||
222 | + rk[j] = *((uint32_t *)vs2 + H4(j)); | ||
223 | + } | ||
224 | + | ||
225 | + for (uint32_t j = vstart; j < vend; ++j) { | ||
226 | + tmp[j - vstart] = *((uint32_t *)vd + H4(j)); | ||
227 | + } | ||
228 | + | ||
229 | + do_sm4_round(rk, tmp); | ||
230 | + | ||
231 | + for (uint32_t j = vstart; j < vend; ++j) { | ||
232 | + *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)]; | ||
233 | + } | ||
234 | + } | ||
235 | + | ||
236 | + env->vstart = 0; | ||
237 | + /* set tail elements to 1s */ | ||
238 | + vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz); | ||
239 | +} | ||
240 | diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
241 | index XXXXXXX..XXXXXXX 100644 | ||
242 | --- a/target/riscv/insn_trans/trans_rvvk.c.inc | ||
243 | +++ b/target/riscv/insn_trans/trans_rvvk.c.inc | ||
244 | @@ -XXX,XX +XXX,XX @@ static bool vghsh_check(DisasContext *s, arg_rmrr *a) | ||
245 | } | ||
246 | |||
247 | GEN_VV_UNMASKED_TRANS(vghsh_vv, vghsh_check, ZVKG_EGS) | ||
197 | + | 248 | + |
198 | +/* | 249 | +/* |
199 | + *** Vector Float Point Arithmetic Instructions | 250 | + * Zvksed |
200 | + */ | 251 | + */ |
201 | +/* Vector Single-Width Floating-Point Add/Subtract Instructions */ | 252 | + |
202 | +#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | 253 | +#define ZVKSED_EGS 4 |
203 | +static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ | 254 | + |
204 | + CPURISCVState *env) \ | 255 | +static bool zvksed_check(DisasContext *s) |
205 | +{ \ | 256 | +{ |
206 | + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ | 257 | + int egw_bytes = ZVKSED_EGS << s->sew; |
207 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | 258 | + return s->cfg_ptr->ext_zvksed == true && |
208 | + *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ | 259 | + require_rvv(s) && |
209 | +} | 260 | + vext_check_isa_ill(s) && |
210 | + | 261 | + MAXSZ(s) >= egw_bytes && |
211 | +#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ, CLEAR_FN) \ | 262 | + s->sew == MO_32; |
212 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | 263 | +} |
213 | + void *vs2, CPURISCVState *env, \ | 264 | + |
214 | + uint32_t desc) \ | 265 | +static bool vsm4k_vi_check(DisasContext *s, arg_rmrr *a) |
215 | +{ \ | 266 | +{ |
216 | + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ | 267 | + return zvksed_check(s) && |
217 | + uint32_t mlen = vext_mlen(desc); \ | 268 | + require_align(a->rd, s->lmul) && |
218 | + uint32_t vm = vext_vm(desc); \ | 269 | + require_align(a->rs2, s->lmul); |
219 | + uint32_t vl = env->vl; \ | 270 | +} |
220 | + uint32_t i; \ | 271 | + |
221 | + \ | 272 | +GEN_VI_UNMASKED_TRANS(vsm4k_vi, vsm4k_vi_check, ZVKSED_EGS) |
222 | + for (i = 0; i < vl; i++) { \ | 273 | + |
223 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | 274 | +static bool vsm4r_vv_check(DisasContext *s, arg_rmr *a) |
224 | + continue; \ | 275 | +{ |
225 | + } \ | 276 | + return zvksed_check(s) && |
226 | + do_##NAME(vd, vs1, vs2, i, env); \ | 277 | + require_align(a->rd, s->lmul) && |
227 | + } \ | 278 | + require_align(a->rs2, s->lmul); |
228 | + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ | 279 | +} |
229 | +} | 280 | + |
230 | + | 281 | +GEN_V_UNMASKED_TRANS(vsm4r_vv, vsm4r_vv_check, ZVKSED_EGS) |
231 | +RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) | 282 | + |
232 | +RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) | 283 | +static bool vsm4r_vs_check(DisasContext *s, arg_rmr *a) |
233 | +RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) | 284 | +{ |
234 | +GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2, clearh) | 285 | + return zvksed_check(s) && |
235 | +GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4, clearl) | 286 | + !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1) && |
236 | +GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8, clearq) | 287 | + require_align(a->rd, s->lmul); |
237 | + | 288 | +} |
238 | +#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | 289 | + |
239 | +static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | 290 | +GEN_V_UNMASKED_TRANS(vsm4r_vs, vsm4r_vs_check, ZVKSED_EGS) |
240 | + CPURISCVState *env) \ | ||
241 | +{ \ | ||
242 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
243 | + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ | ||
244 | +} | ||
245 | + | ||
246 | +#define GEN_VEXT_VF(NAME, ESZ, DSZ, CLEAR_FN) \ | ||
247 | +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ | ||
248 | + void *vs2, CPURISCVState *env, \ | ||
249 | + uint32_t desc) \ | ||
250 | +{ \ | ||
251 | + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ | ||
252 | + uint32_t mlen = vext_mlen(desc); \ | ||
253 | + uint32_t vm = vext_vm(desc); \ | ||
254 | + uint32_t vl = env->vl; \ | ||
255 | + uint32_t i; \ | ||
256 | + \ | ||
257 | + for (i = 0; i < vl; i++) { \ | ||
258 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
259 | + continue; \ | ||
260 | + } \ | ||
261 | + do_##NAME(vd, s1, vs2, i, env); \ | ||
262 | + } \ | ||
263 | + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ | ||
264 | +} | ||
265 | + | ||
266 | +RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) | ||
267 | +RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) | ||
268 | +RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) | ||
269 | +GEN_VEXT_VF(vfadd_vf_h, 2, 2, clearh) | ||
270 | +GEN_VEXT_VF(vfadd_vf_w, 4, 4, clearl) | ||
271 | +GEN_VEXT_VF(vfadd_vf_d, 8, 8, clearq) | ||
272 | + | ||
273 | +RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) | ||
274 | +RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) | ||
275 | +RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) | ||
276 | +GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2, clearh) | ||
277 | +GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4, clearl) | ||
278 | +GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8, clearq) | ||
279 | +RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) | ||
280 | +RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) | ||
281 | +RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) | ||
282 | +GEN_VEXT_VF(vfsub_vf_h, 2, 2, clearh) | ||
283 | +GEN_VEXT_VF(vfsub_vf_w, 4, 4, clearl) | ||
284 | +GEN_VEXT_VF(vfsub_vf_d, 8, 8, clearq) | ||
285 | + | ||
286 | +static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) | ||
287 | +{ | ||
288 | + return float16_sub(b, a, s); | ||
289 | +} | ||
290 | + | ||
291 | +static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) | ||
292 | +{ | ||
293 | + return float32_sub(b, a, s); | ||
294 | +} | ||
295 | + | ||
296 | +static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) | ||
297 | +{ | ||
298 | + return float64_sub(b, a, s); | ||
299 | +} | ||
300 | + | ||
301 | +RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) | ||
302 | +RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) | ||
303 | +RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) | ||
304 | +GEN_VEXT_VF(vfrsub_vf_h, 2, 2, clearh) | ||
305 | +GEN_VEXT_VF(vfrsub_vf_w, 4, 4, clearl) | ||
306 | +GEN_VEXT_VF(vfrsub_vf_d, 8, 8, clearq) | ||
307 | -- | 291 | -- |
308 | 2.27.0 | 292 | 2.41.0 |
309 | |||
310 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Rob Bradford <rbradford@rivosinc.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | These are WARL fields - zero out the bits for unavailable counters and |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | special case the TM bit in mcountinhibit which is hardwired to zero. |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 5 | This patch achieves this by modifying the value written so that any use |
6 | Message-id: 20200623215920.2594-46-zhiwei_liu@c-sky.com | 6 | of the field will see the correctly masked bits. |
7 | |||
8 | Tested by modifying OpenSBI to write max value to these CSRs and upon | ||
9 | subsequent read the appropriate number of bits for number of PMUs is | ||
10 | enabled and the TM bit is zero in mcountinhibit. | ||
11 | |||
12 | Signed-off-by: Rob Bradford <rbradford@rivosinc.com> | ||
13 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
14 | Reviewed-by: Atish Patra <atishp@rivosinc.com> | ||
15 | Message-ID: <20230802124906.24197-1-rbradford@rivosinc.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 16 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 17 | --- |
9 | target/riscv/helper.h | 33 +++++++++++ | 18 | target/riscv/csr.c | 11 +++++++++-- |
10 | target/riscv/insn32.decode | 8 +++ | 19 | 1 file changed, 9 insertions(+), 2 deletions(-) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 18 ++++++ | ||
12 | target/riscv/vector_helper.c | 74 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 133 insertions(+) | ||
14 | 20 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 21 | diff --git a/target/riscv/csr.c b/target/riscv/csr.c |
16 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 23 | --- a/target/riscv/csr.c |
18 | +++ b/target/riscv/helper.h | 24 | +++ b/target/riscv/csr.c |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfncvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) | 25 | @@ -XXX,XX +XXX,XX @@ static RISCVException write_mcountinhibit(CPURISCVState *env, int csrno, |
20 | DEF_HELPER_5(vfncvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) | 26 | { |
21 | DEF_HELPER_5(vfncvt_f_f_v_h, void, ptr, ptr, ptr, env, i32) | 27 | int cidx; |
22 | DEF_HELPER_5(vfncvt_f_f_v_w, void, ptr, ptr, ptr, env, i32) | 28 | PMUCTRState *counter; |
29 | + RISCVCPU *cpu = env_archcpu(env); | ||
30 | |||
31 | - env->mcountinhibit = val; | ||
32 | + /* WARL register - disable unavailable counters; TM bit is always 0 */ | ||
33 | + env->mcountinhibit = | ||
34 | + val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_IR); | ||
35 | |||
36 | /* Check if any other counter is also monitoring cycles/instructions */ | ||
37 | for (cidx = 0; cidx < RV_MAX_MHPMCOUNTERS; cidx++) { | ||
38 | @@ -XXX,XX +XXX,XX @@ static RISCVException read_mcounteren(CPURISCVState *env, int csrno, | ||
39 | static RISCVException write_mcounteren(CPURISCVState *env, int csrno, | ||
40 | target_ulong val) | ||
41 | { | ||
42 | - env->mcounteren = val; | ||
43 | + RISCVCPU *cpu = env_archcpu(env); | ||
23 | + | 44 | + |
24 | +DEF_HELPER_6(vredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32) | 45 | + /* WARL register - disable unavailable counters */ |
25 | +DEF_HELPER_6(vredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | 46 | + env->mcounteren = val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_TM | |
26 | +DEF_HELPER_6(vredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | 47 | + COUNTEREN_IR); |
27 | +DEF_HELPER_6(vredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | 48 | return RISCV_EXCP_NONE; |
28 | +DEF_HELPER_6(vredmaxu_vs_b, void, ptr, ptr, ptr, ptr, env, i32) | 49 | } |
29 | +DEF_HELPER_6(vredmaxu_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | 50 | |
30 | +DEF_HELPER_6(vredmaxu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vredmaxu_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vredmax_vs_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vredminu_vs_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vredminu_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vredminu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vredminu_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vredmin_vs_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vredand_vs_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vredand_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vredand_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vredand_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
48 | +DEF_HELPER_6(vredor_vs_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
49 | +DEF_HELPER_6(vredor_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
50 | +DEF_HELPER_6(vredor_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
51 | +DEF_HELPER_6(vredor_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
52 | +DEF_HELPER_6(vredxor_vs_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
53 | +DEF_HELPER_6(vredxor_vs_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
54 | +DEF_HELPER_6(vredxor_vs_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
55 | +DEF_HELPER_6(vredxor_vs_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
56 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/riscv/insn32.decode | ||
59 | +++ b/target/riscv/insn32.decode | ||
60 | @@ -XXX,XX +XXX,XX @@ vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm | ||
61 | vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm | ||
62 | vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm | ||
63 | vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm | ||
64 | +vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm | ||
65 | +vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm | ||
66 | +vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm | ||
67 | +vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm | ||
68 | +vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm | ||
69 | +vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm | ||
70 | +vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm | ||
71 | +vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm | ||
72 | |||
73 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
74 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
75 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
78 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
79 | @@ -XXX,XX +XXX,XX @@ GEN_OPFV_NARROW_TRANS(vfncvt_x_f_v) | ||
80 | GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_v) | ||
81 | GEN_OPFV_NARROW_TRANS(vfncvt_f_x_v) | ||
82 | GEN_OPFV_NARROW_TRANS(vfncvt_f_f_v) | ||
83 | + | ||
84 | +/* | ||
85 | + *** Vector Reduction Operations | ||
86 | + */ | ||
87 | +/* Vector Single-Width Integer Reduction Instructions */ | ||
88 | +static bool reduction_check(DisasContext *s, arg_rmrr *a) | ||
89 | +{ | ||
90 | + return vext_check_isa_ill(s) && vext_check_reg(s, a->rs2, false); | ||
91 | +} | ||
92 | + | ||
93 | +GEN_OPIVV_TRANS(vredsum_vs, reduction_check) | ||
94 | +GEN_OPIVV_TRANS(vredmaxu_vs, reduction_check) | ||
95 | +GEN_OPIVV_TRANS(vredmax_vs, reduction_check) | ||
96 | +GEN_OPIVV_TRANS(vredminu_vs, reduction_check) | ||
97 | +GEN_OPIVV_TRANS(vredmin_vs, reduction_check) | ||
98 | +GEN_OPIVV_TRANS(vredand_vs, reduction_check) | ||
99 | +GEN_OPIVV_TRANS(vredor_vs, reduction_check) | ||
100 | +GEN_OPIVV_TRANS(vredxor_vs, reduction_check) | ||
101 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
102 | index XXXXXXX..XXXXXXX 100644 | ||
103 | --- a/target/riscv/vector_helper.c | ||
104 | +++ b/target/riscv/vector_helper.c | ||
105 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) | ||
106 | RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) | ||
107 | GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2, clearh) | ||
108 | GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4, clearl) | ||
109 | + | ||
110 | +/* | ||
111 | + *** Vector Reduction Operations | ||
112 | + */ | ||
113 | +/* Vector Single-Width Integer Reduction Instructions */ | ||
114 | +#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\ | ||
115 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
116 | + void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
117 | +{ \ | ||
118 | + uint32_t mlen = vext_mlen(desc); \ | ||
119 | + uint32_t vm = vext_vm(desc); \ | ||
120 | + uint32_t vl = env->vl; \ | ||
121 | + uint32_t i; \ | ||
122 | + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \ | ||
123 | + TD s1 = *((TD *)vs1 + HD(0)); \ | ||
124 | + \ | ||
125 | + for (i = 0; i < vl; i++) { \ | ||
126 | + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ | ||
127 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
128 | + continue; \ | ||
129 | + } \ | ||
130 | + s1 = OP(s1, (TD)s2); \ | ||
131 | + } \ | ||
132 | + *((TD *)vd + HD(0)) = s1; \ | ||
133 | + CLEAR_FN(vd, 1, sizeof(TD), tot); \ | ||
134 | +} | ||
135 | + | ||
136 | +/* vd[0] = sum(vs1[0], vs2[*]) */ | ||
137 | +GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD, clearb) | ||
138 | +GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD, clearh) | ||
139 | +GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD, clearl) | ||
140 | +GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD, clearq) | ||
141 | + | ||
142 | +/* vd[0] = maxu(vs1[0], vs2[*]) */ | ||
143 | +GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX, clearb) | ||
144 | +GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX, clearh) | ||
145 | +GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX, clearl) | ||
146 | +GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX, clearq) | ||
147 | + | ||
148 | +/* vd[0] = max(vs1[0], vs2[*]) */ | ||
149 | +GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX, clearb) | ||
150 | +GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX, clearh) | ||
151 | +GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX, clearl) | ||
152 | +GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX, clearq) | ||
153 | + | ||
154 | +/* vd[0] = minu(vs1[0], vs2[*]) */ | ||
155 | +GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN, clearb) | ||
156 | +GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN, clearh) | ||
157 | +GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN, clearl) | ||
158 | +GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN, clearq) | ||
159 | + | ||
160 | +/* vd[0] = min(vs1[0], vs2[*]) */ | ||
161 | +GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN, clearb) | ||
162 | +GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN, clearh) | ||
163 | +GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN, clearl) | ||
164 | +GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN, clearq) | ||
165 | + | ||
166 | +/* vd[0] = and(vs1[0], vs2[*]) */ | ||
167 | +GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND, clearb) | ||
168 | +GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND, clearh) | ||
169 | +GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND, clearl) | ||
170 | +GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND, clearq) | ||
171 | + | ||
172 | +/* vd[0] = or(vs1[0], vs2[*]) */ | ||
173 | +GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR, clearb) | ||
174 | +GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR, clearh) | ||
175 | +GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR, clearl) | ||
176 | +GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR, clearq) | ||
177 | + | ||
178 | +/* vd[0] = xor(vs1[0], vs2[*]) */ | ||
179 | +GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR, clearb) | ||
180 | +GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR, clearh) | ||
181 | +GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR, clearl) | ||
182 | +GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR, clearq) | ||
183 | -- | 51 | -- |
184 | 2.27.0 | 52 | 2.41.0 |
185 | |||
186 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Jason Chien <jason.chien@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | Vector extension is default off. The only way to use vector extension is | 3 | RVA23 Profiles states: |
4 | 1. use cpu rv32 or rv64 | 4 | The RVA23 profiles are intended to be used for 64-bit application |
5 | 2. turn on it by command line | 5 | processors that will run rich OS stacks from standard binary OS |
6 | "-cpu rv64,x-v=true,vlen=128,elen=64,vext_spec=v0.7.1". | 6 | distributions and with a substantial number of third-party binary user |
7 | applications that will be supported over a considerable length of time | ||
8 | in the field. | ||
7 | 9 | ||
8 | vlen is the vector register length, default value is 128 bit. | 10 | The chapter 4 of the unprivileged spec introduces the Zihintntl extension |
9 | elen is the max operator size in bits, default value is 64 bit. | 11 | and Zihintntl is a mandatory extension presented in RVA23 Profiles, whose |
10 | vext_spec is the vector specification version, default value is v0.7.1. | 12 | purpose is to enable application and operating system portability across |
11 | These properties can be specified with other values. | 13 | different implementations. Thus the DTS should contain the Zihintntl ISA |
14 | string in order to pass to software. | ||
12 | 15 | ||
13 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 16 | The unprivileged spec states: |
17 | Like any HINTs, these instructions may be freely ignored. Hence, although | ||
18 | they are described in terms of cache-based memory hierarchies, they do not | ||
19 | mandate the provision of caches. | ||
20 | |||
21 | These instructions are encoded with non-used opcode, e.g. ADD x0, x0, x2, | ||
22 | which QEMU already supports, and QEMU does not emulate cache. Therefore | ||
23 | these instructions can be considered as a no-op, and we only need to add | ||
24 | a new property for the Zihintntl extension. | ||
25 | |||
26 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
14 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 27 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> |
15 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 28 | Signed-off-by: Jason Chien <jason.chien@sifive.com> |
16 | Message-id: 20200623215920.2594-62-zhiwei_liu@c-sky.com | 29 | Message-ID: <20230726074049.19505-2-jason.chien@sifive.com> |
17 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 30 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
18 | --- | 31 | --- |
19 | target/riscv/cpu.h | 4 +++- | 32 | target/riscv/cpu_cfg.h | 1 + |
20 | target/riscv/cpu.c | 43 +++++++++++++++++++++++++++++++++++++++++++ | 33 | target/riscv/cpu.c | 2 ++ |
21 | 2 files changed, 46 insertions(+), 1 deletion(-) | 34 | 2 files changed, 3 insertions(+) |
22 | 35 | ||
23 | diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h | 36 | diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h |
24 | index XXXXXXX..XXXXXXX 100644 | 37 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/target/riscv/cpu.h | 38 | --- a/target/riscv/cpu_cfg.h |
26 | +++ b/target/riscv/cpu.h | 39 | +++ b/target/riscv/cpu_cfg.h |
27 | @@ -XXX,XX +XXX,XX @@ typedef struct CPURISCVState CPURISCVState; | 40 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { |
28 | 41 | bool ext_icbom; | |
29 | #include "pmp.h" | 42 | bool ext_icboz; |
30 | 43 | bool ext_zicond; | |
31 | -#define RV_VLEN_MAX 512 | 44 | + bool ext_zihintntl; |
32 | +#define RV_VLEN_MAX 256 | 45 | bool ext_zihintpause; |
33 | 46 | bool ext_smstateen; | |
34 | FIELD(VTYPE, VLMUL, 0, 2) | 47 | bool ext_sstc; |
35 | FIELD(VTYPE, VSEW, 2, 3) | ||
36 | @@ -XXX,XX +XXX,XX @@ typedef struct RISCVCPU { | ||
37 | bool ext_s; | ||
38 | bool ext_u; | ||
39 | bool ext_h; | ||
40 | + bool ext_v; | ||
41 | bool ext_counters; | ||
42 | bool ext_ifencei; | ||
43 | bool ext_icsr; | ||
44 | |||
45 | char *priv_spec; | ||
46 | char *user_spec; | ||
47 | + char *vext_spec; | ||
48 | uint16_t vlen; | ||
49 | uint16_t elen; | ||
50 | bool mmu; | ||
51 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | 48 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
52 | index XXXXXXX..XXXXXXX 100644 | 49 | index XXXXXXX..XXXXXXX 100644 |
53 | --- a/target/riscv/cpu.c | 50 | --- a/target/riscv/cpu.c |
54 | +++ b/target/riscv/cpu.c | 51 | +++ b/target/riscv/cpu.c |
55 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) | 52 | @@ -XXX,XX +XXX,XX @@ static const struct isa_ext_data isa_edata_arr[] = { |
56 | if (cpu->cfg.ext_h) { | 53 | ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond), |
57 | target_misa |= RVH; | 54 | ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr), |
58 | } | 55 | ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei), |
59 | + if (cpu->cfg.ext_v) { | 56 | + ISA_EXT_DATA_ENTRY(zihintntl, PRIV_VERSION_1_10_0, ext_zihintntl), |
60 | + target_misa |= RVV; | 57 | ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause), |
61 | + if (!is_power_of_2(cpu->cfg.vlen)) { | 58 | ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul), |
62 | + error_setg(errp, | 59 | ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs), |
63 | + "Vector extension VLEN must be power of 2"); | 60 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { |
64 | + return; | 61 | DEFINE_PROP_BOOL("sscofpmf", RISCVCPU, cfg.ext_sscofpmf, false), |
65 | + } | ||
66 | + if (cpu->cfg.vlen > RV_VLEN_MAX || cpu->cfg.vlen < 128) { | ||
67 | + error_setg(errp, | ||
68 | + "Vector extension implementation only supports VLEN " | ||
69 | + "in the range [128, %d]", RV_VLEN_MAX); | ||
70 | + return; | ||
71 | + } | ||
72 | + if (!is_power_of_2(cpu->cfg.elen)) { | ||
73 | + error_setg(errp, | ||
74 | + "Vector extension ELEN must be power of 2"); | ||
75 | + return; | ||
76 | + } | ||
77 | + if (cpu->cfg.elen > 64 || cpu->cfg.vlen < 8) { | ||
78 | + error_setg(errp, | ||
79 | + "Vector extension implementation only supports ELEN " | ||
80 | + "in the range [8, 64]"); | ||
81 | + return; | ||
82 | + } | ||
83 | + if (cpu->cfg.vext_spec) { | ||
84 | + if (!g_strcmp0(cpu->cfg.vext_spec, "v0.7.1")) { | ||
85 | + vext_version = VEXT_VERSION_0_07_1; | ||
86 | + } else { | ||
87 | + error_setg(errp, | ||
88 | + "Unsupported vector spec version '%s'", | ||
89 | + cpu->cfg.vext_spec); | ||
90 | + return; | ||
91 | + } | ||
92 | + } else { | ||
93 | + qemu_log("vector verison is not specified, " | ||
94 | + "use the default value v0.7.1\n"); | ||
95 | + } | ||
96 | + set_vext_version(env, vext_version); | ||
97 | + } | ||
98 | |||
99 | set_misa(env, RVXLEN | target_misa); | ||
100 | } | ||
101 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { | ||
102 | DEFINE_PROP_BOOL("u", RISCVCPU, cfg.ext_u, true), | ||
103 | /* This is experimental so mark with 'x-' */ | ||
104 | DEFINE_PROP_BOOL("x-h", RISCVCPU, cfg.ext_h, false), | ||
105 | + DEFINE_PROP_BOOL("x-v", RISCVCPU, cfg.ext_v, false), | ||
106 | DEFINE_PROP_BOOL("Counters", RISCVCPU, cfg.ext_counters, true), | ||
107 | DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true), | 62 | DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true), |
108 | DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true), | 63 | DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true), |
109 | DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec), | 64 | + DEFINE_PROP_BOOL("Zihintntl", RISCVCPU, cfg.ext_zihintntl, true), |
110 | + DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec), | 65 | DEFINE_PROP_BOOL("Zihintpause", RISCVCPU, cfg.ext_zihintpause, true), |
111 | + DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128), | 66 | DEFINE_PROP_BOOL("Zawrs", RISCVCPU, cfg.ext_zawrs, true), |
112 | + DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64), | 67 | DEFINE_PROP_BOOL("Zfa", RISCVCPU, cfg.ext_zfa, true), |
113 | DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true), | ||
114 | DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true), | ||
115 | DEFINE_PROP_END_OF_LIST(), | ||
116 | -- | 68 | -- |
117 | 2.27.0 | 69 | 2.41.0 |
118 | |||
119 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Commit a47842d ("riscv: Add support for the Zfa extension") implemented the zfa extension. |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | However, it has some typos for fleq.d and fltq.d. Both of them misused the fltq.s |
5 | Message-id: 20200623215920.2594-61-zhiwei_liu@c-sky.com | 5 | helper function. |
6 | |||
7 | Fixes: a47842d ("riscv: Add support for the Zfa extension") | ||
8 | Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> | ||
9 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
10 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
11 | Message-ID: <20230728003906.768-1-zhiwei_liu@linux.alibaba.com> | ||
6 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
7 | --- | 13 | --- |
8 | target/riscv/helper.h | 5 ++++ | 14 | target/riscv/insn_trans/trans_rvzfa.c.inc | 4 ++-- |
9 | target/riscv/insn32.decode | 1 + | 15 | 1 file changed, 2 insertions(+), 2 deletions(-) |
10 | target/riscv/insn_trans/trans_rvv.inc.c | 32 +++++++++++++++++++++++++ | ||
11 | target/riscv/vector_helper.c | 26 ++++++++++++++++++++ | ||
12 | 4 files changed, 64 insertions(+) | ||
13 | 16 | ||
14 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 17 | diff --git a/target/riscv/insn_trans/trans_rvzfa.c.inc b/target/riscv/insn_trans/trans_rvzfa.c.inc |
15 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/riscv/helper.h | 19 | --- a/target/riscv/insn_trans/trans_rvzfa.c.inc |
17 | +++ b/target/riscv/helper.h | 20 | +++ b/target/riscv/insn_trans/trans_rvzfa.c.inc |
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vrgather_vx_b, void, ptr, ptr, tl, ptr, env, i32) | 21 | @@ -XXX,XX +XXX,XX @@ bool trans_fleq_d(DisasContext *ctx, arg_fleq_d *a) |
19 | DEF_HELPER_6(vrgather_vx_h, void, ptr, ptr, tl, ptr, env, i32) | 22 | TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); |
20 | DEF_HELPER_6(vrgather_vx_w, void, ptr, ptr, tl, ptr, env, i32) | 23 | TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); |
21 | DEF_HELPER_6(vrgather_vx_d, void, ptr, ptr, tl, ptr, env, i32) | 24 | |
22 | + | 25 | - gen_helper_fltq_s(dest, cpu_env, src1, src2); |
23 | +DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32) | 26 | + gen_helper_fleq_d(dest, cpu_env, src1, src2); |
24 | +DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32) | 27 | gen_set_gpr(ctx, a->rd, dest); |
25 | +DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/riscv/insn32.decode | ||
30 | +++ b/target/riscv/insn32.decode | ||
31 | @@ -XXX,XX +XXX,XX @@ vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm | ||
32 | vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm | ||
33 | vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm | ||
34 | vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm | ||
35 | +vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r | ||
36 | |||
37 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
38 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
39 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
42 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a) | ||
44 | } | ||
45 | return true; | 28 | return true; |
46 | } | 29 | } |
47 | + | 30 | @@ -XXX,XX +XXX,XX @@ bool trans_fltq_d(DisasContext *ctx, arg_fltq_d *a) |
48 | +/* Vector Compress Instruction */ | 31 | TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); |
49 | +static bool vcompress_vm_check(DisasContext *s, arg_r *a) | 32 | TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); |
50 | +{ | 33 | |
51 | + return (vext_check_isa_ill(s) && | 34 | - gen_helper_fltq_s(dest, cpu_env, src1, src2); |
52 | + vext_check_reg(s, a->rd, false) && | 35 | + gen_helper_fltq_d(dest, cpu_env, src1, src2); |
53 | + vext_check_reg(s, a->rs2, false) && | 36 | gen_set_gpr(ctx, a->rd, dest); |
54 | + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs1, 1) && | 37 | return true; |
55 | + (a->rd != a->rs2)); | 38 | } |
56 | +} | ||
57 | + | ||
58 | +static bool trans_vcompress_vm(DisasContext *s, arg_r *a) | ||
59 | +{ | ||
60 | + if (vcompress_vm_check(s, a)) { | ||
61 | + uint32_t data = 0; | ||
62 | + static gen_helper_gvec_4_ptr * const fns[4] = { | ||
63 | + gen_helper_vcompress_vm_b, gen_helper_vcompress_vm_h, | ||
64 | + gen_helper_vcompress_vm_w, gen_helper_vcompress_vm_d, | ||
65 | + }; | ||
66 | + TCGLabel *over = gen_new_label(); | ||
67 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
68 | + | ||
69 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | ||
70 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
71 | + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
72 | + vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), | ||
73 | + cpu_env, 0, s->vlen / 8, data, fns[s->sew]); | ||
74 | + gen_set_label(over); | ||
75 | + return true; | ||
76 | + } | ||
77 | + return false; | ||
78 | +} | ||
79 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
80 | index XXXXXXX..XXXXXXX 100644 | ||
81 | --- a/target/riscv/vector_helper.c | ||
82 | +++ b/target/riscv/vector_helper.c | ||
83 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1, clearb) | ||
84 | GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2, clearh) | ||
85 | GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4, clearl) | ||
86 | GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq) | ||
87 | + | ||
88 | +/* Vector Compress Instruction */ | ||
89 | +#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H, CLEAR_FN) \ | ||
90 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
91 | + CPURISCVState *env, uint32_t desc) \ | ||
92 | +{ \ | ||
93 | + uint32_t mlen = vext_mlen(desc); \ | ||
94 | + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ | ||
95 | + uint32_t vl = env->vl; \ | ||
96 | + uint32_t num = 0, i; \ | ||
97 | + \ | ||
98 | + for (i = 0; i < vl; i++) { \ | ||
99 | + if (!vext_elem_mask(vs1, mlen, i)) { \ | ||
100 | + continue; \ | ||
101 | + } \ | ||
102 | + *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ | ||
103 | + num++; \ | ||
104 | + } \ | ||
105 | + CLEAR_FN(vd, num, num * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ | ||
106 | +} | ||
107 | + | ||
108 | +/* Compress into vd elements of vs2 where vs1 is enabled */ | ||
109 | +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1, clearb) | ||
110 | +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2, clearh) | ||
111 | +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4, clearl) | ||
112 | +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8, clearq) | ||
113 | -- | 39 | -- |
114 | 2.27.0 | 40 | 2.41.0 |
115 | |||
116 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Jason Chien <jason.chien@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | When writing the upper mtime, we should keep the original lower mtime |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | whose value is given by cpu_riscv_read_rtc() instead of |
5 | cpu_riscv_read_rtc_raw(). The same logic applies to writes to lower mtime. | ||
6 | |||
7 | Signed-off-by: Jason Chien <jason.chien@sifive.com> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 8 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> |
6 | Message-id: 20200623215920.2594-45-zhiwei_liu@c-sky.com | 9 | Message-ID: <20230728082502.26439-1-jason.chien@sifive.com> |
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 11 | --- |
9 | target/riscv/helper.h | 11 ++++++ | 12 | hw/intc/riscv_aclint.c | 5 +++-- |
10 | target/riscv/insn32.decode | 5 +++ | 13 | 1 file changed, 3 insertions(+), 2 deletions(-) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 48 +++++++++++++++++++++++++ | ||
12 | target/riscv/vector_helper.c | 39 ++++++++++++++++++++ | ||
13 | 4 files changed, 103 insertions(+) | ||
14 | 14 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 15 | diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c |
16 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 17 | --- a/hw/intc/riscv_aclint.c |
18 | +++ b/target/riscv/helper.h | 18 | +++ b/hw/intc/riscv_aclint.c |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfwcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) | 19 | @@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, |
20 | DEF_HELPER_5(vfwcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) | 20 | return; |
21 | DEF_HELPER_5(vfwcvt_f_f_v_h, void, ptr, ptr, ptr, env, i32) | 21 | } else if (addr == mtimer->time_base || addr == mtimer->time_base + 4) { |
22 | DEF_HELPER_5(vfwcvt_f_f_v_w, void, ptr, ptr, ptr, env, i32) | 22 | uint64_t rtc_r = cpu_riscv_read_rtc_raw(mtimer->timebase_freq); |
23 | + | 23 | + uint64_t rtc = cpu_riscv_read_rtc(mtimer); |
24 | +DEF_HELPER_5(vfncvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32) | 24 | |
25 | +DEF_HELPER_5(vfncvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32) | 25 | if (addr == mtimer->time_base) { |
26 | +DEF_HELPER_5(vfncvt_x_f_v_h, void, ptr, ptr, ptr, env, i32) | 26 | if (size == 4) { |
27 | +DEF_HELPER_5(vfncvt_x_f_v_w, void, ptr, ptr, ptr, env, i32) | 27 | /* time_lo for RV32/RV64 */ |
28 | +DEF_HELPER_5(vfncvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32) | 28 | - mtimer->time_delta = ((rtc_r & ~0xFFFFFFFFULL) | value) - rtc_r; |
29 | +DEF_HELPER_5(vfncvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32) | 29 | + mtimer->time_delta = ((rtc & ~0xFFFFFFFFULL) | value) - rtc_r; |
30 | +DEF_HELPER_5(vfncvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) | 30 | } else { |
31 | +DEF_HELPER_5(vfncvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) | 31 | /* time for RV64 */ |
32 | +DEF_HELPER_5(vfncvt_f_f_v_h, void, ptr, ptr, ptr, env, i32) | 32 | mtimer->time_delta = value - rtc_r; |
33 | +DEF_HELPER_5(vfncvt_f_f_v_w, void, ptr, ptr, ptr, env, i32) | 33 | @@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, |
34 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 34 | } else { |
35 | index XXXXXXX..XXXXXXX 100644 | 35 | if (size == 4) { |
36 | --- a/target/riscv/insn32.decode | 36 | /* time_hi for RV32/RV64 */ |
37 | +++ b/target/riscv/insn32.decode | 37 | - mtimer->time_delta = (value << 32 | (rtc_r & 0xFFFFFFFF)) - rtc_r; |
38 | @@ -XXX,XX +XXX,XX @@ vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm | 38 | + mtimer->time_delta = (value << 32 | (rtc & 0xFFFFFFFF)) - rtc_r; |
39 | vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm | 39 | } else { |
40 | vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm | 40 | qemu_log_mask(LOG_GUEST_ERROR, |
41 | vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm | 41 | "aclint-mtimer: invalid time_hi write: %08x", |
42 | +vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm | ||
43 | +vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm | ||
44 | +vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm | ||
45 | +vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm | ||
46 | +vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm | ||
47 | |||
48 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
49 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
50 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
53 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
54 | @@ -XXX,XX +XXX,XX @@ GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v) | ||
55 | GEN_OPFV_WIDEN_TRANS(vfwcvt_f_xu_v) | ||
56 | GEN_OPFV_WIDEN_TRANS(vfwcvt_f_x_v) | ||
57 | GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v) | ||
58 | + | ||
59 | +/* Narrowing Floating-Point/Integer Type-Convert Instructions */ | ||
60 | + | ||
61 | +/* | ||
62 | + * If the current SEW does not correspond to a supported IEEE floating-point | ||
63 | + * type, an illegal instruction exception is raised | ||
64 | + */ | ||
65 | +static bool opfv_narrow_check(DisasContext *s, arg_rmr *a) | ||
66 | +{ | ||
67 | + return (vext_check_isa_ill(s) && | ||
68 | + vext_check_overlap_mask(s, a->rd, a->vm, false) && | ||
69 | + vext_check_reg(s, a->rd, false) && | ||
70 | + vext_check_reg(s, a->rs2, true) && | ||
71 | + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, | ||
72 | + 2 << s->lmul) && | ||
73 | + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); | ||
74 | +} | ||
75 | + | ||
76 | +#define GEN_OPFV_NARROW_TRANS(NAME) \ | ||
77 | +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
78 | +{ \ | ||
79 | + if (opfv_narrow_check(s, a)) { \ | ||
80 | + uint32_t data = 0; \ | ||
81 | + static gen_helper_gvec_3_ptr * const fns[2] = { \ | ||
82 | + gen_helper_##NAME##_h, \ | ||
83 | + gen_helper_##NAME##_w, \ | ||
84 | + }; \ | ||
85 | + TCGLabel *over = gen_new_label(); \ | ||
86 | + gen_set_rm(s, 7); \ | ||
87 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
88 | + \ | ||
89 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ | ||
90 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
91 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
92 | + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
93 | + vreg_ofs(s, a->rs2), cpu_env, 0, \ | ||
94 | + s->vlen / 8, data, fns[s->sew - 1]); \ | ||
95 | + gen_set_label(over); \ | ||
96 | + return true; \ | ||
97 | + } \ | ||
98 | + return false; \ | ||
99 | +} | ||
100 | + | ||
101 | +GEN_OPFV_NARROW_TRANS(vfncvt_xu_f_v) | ||
102 | +GEN_OPFV_NARROW_TRANS(vfncvt_x_f_v) | ||
103 | +GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_v) | ||
104 | +GEN_OPFV_NARROW_TRANS(vfncvt_f_x_v) | ||
105 | +GEN_OPFV_NARROW_TRANS(vfncvt_f_f_v) | ||
106 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
107 | index XXXXXXX..XXXXXXX 100644 | ||
108 | --- a/target/riscv/vector_helper.c | ||
109 | +++ b/target/riscv/vector_helper.c | ||
110 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) | ||
111 | RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) | ||
112 | GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4, clearl) | ||
113 | GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8, clearq) | ||
114 | + | ||
115 | +/* Narrowing Floating-Point/Integer Type-Convert Instructions */ | ||
116 | +/* (TD, T2, TX2) */ | ||
117 | +#define NOP_UU_H uint16_t, uint32_t, uint32_t | ||
118 | +#define NOP_UU_W uint32_t, uint64_t, uint64_t | ||
119 | +/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ | ||
120 | +RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16) | ||
121 | +RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32) | ||
122 | +GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2, clearh) | ||
123 | +GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4, clearl) | ||
124 | + | ||
125 | +/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ | ||
126 | +RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16) | ||
127 | +RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32) | ||
128 | +GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2, clearh) | ||
129 | +GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4, clearl) | ||
130 | + | ||
131 | +/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ | ||
132 | +RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16) | ||
133 | +RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32) | ||
134 | +GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2, clearh) | ||
135 | +GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4, clearl) | ||
136 | + | ||
137 | +/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ | ||
138 | +RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16) | ||
139 | +RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32) | ||
140 | +GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2, clearh) | ||
141 | +GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4, clearl) | ||
142 | + | ||
143 | +/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ | ||
144 | +static uint16_t vfncvtffv16(uint32_t a, float_status *s) | ||
145 | +{ | ||
146 | + return float32_to_float16(a, true, s); | ||
147 | +} | ||
148 | + | ||
149 | +RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) | ||
150 | +RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) | ||
151 | +GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2, clearh) | ||
152 | +GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4, clearl) | ||
153 | -- | 42 | -- |
154 | 2.27.0 | 43 | 2.41.0 |
155 | |||
156 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Jason Chien <jason.chien@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | The variables whose values are given by cpu_riscv_read_rtc() should be named |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | "rtc". The variables whose value are given by cpu_riscv_read_rtc_raw() |
5 | should be named "rtc_r". | ||
6 | |||
7 | Signed-off-by: Jason Chien <jason.chien@sifive.com> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 8 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> |
6 | Message-id: 20200623215920.2594-44-zhiwei_liu@c-sky.com | 9 | Message-ID: <20230728082502.26439-2-jason.chien@sifive.com> |
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 11 | --- |
9 | target/riscv/helper.h | 11 ++++++ | 12 | hw/intc/riscv_aclint.c | 6 +++--- |
10 | target/riscv/insn32.decode | 5 +++ | 13 | 1 file changed, 3 insertions(+), 3 deletions(-) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 48 +++++++++++++++++++++++++ | ||
12 | target/riscv/vector_helper.c | 42 ++++++++++++++++++++++ | ||
13 | 4 files changed, 106 insertions(+) | ||
14 | 14 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 15 | diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c |
16 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 17 | --- a/hw/intc/riscv_aclint.c |
18 | +++ b/target/riscv/helper.h | 18 | +++ b/hw/intc/riscv_aclint.c |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfcvt_f_xu_v_d, void, ptr, ptr, ptr, env, i32) | 19 | @@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer, |
20 | DEF_HELPER_5(vfcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) | 20 | uint64_t next; |
21 | DEF_HELPER_5(vfcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) | 21 | uint64_t diff; |
22 | DEF_HELPER_5(vfcvt_f_x_v_d, void, ptr, ptr, ptr, env, i32) | 22 | |
23 | + | 23 | - uint64_t rtc_r = cpu_riscv_read_rtc(mtimer); |
24 | +DEF_HELPER_5(vfwcvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32) | 24 | + uint64_t rtc = cpu_riscv_read_rtc(mtimer); |
25 | +DEF_HELPER_5(vfwcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32) | 25 | |
26 | +DEF_HELPER_5(vfwcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32) | 26 | /* Compute the relative hartid w.r.t the socket */ |
27 | +DEF_HELPER_5(vfwcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32) | 27 | hartid = hartid - mtimer->hartid_base; |
28 | +DEF_HELPER_5(vfwcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32) | 28 | |
29 | +DEF_HELPER_5(vfwcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32) | 29 | mtimer->timecmp[hartid] = value; |
30 | +DEF_HELPER_5(vfwcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) | 30 | - if (mtimer->timecmp[hartid] <= rtc_r) { |
31 | +DEF_HELPER_5(vfwcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) | 31 | + if (mtimer->timecmp[hartid] <= rtc) { |
32 | +DEF_HELPER_5(vfwcvt_f_f_v_h, void, ptr, ptr, ptr, env, i32) | 32 | /* |
33 | +DEF_HELPER_5(vfwcvt_f_f_v_w, void, ptr, ptr, ptr, env, i32) | 33 | * If we're setting an MTIMECMP value in the "past", |
34 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 34 | * immediately raise the timer interrupt |
35 | index XXXXXXX..XXXXXXX 100644 | 35 | @@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer, |
36 | --- a/target/riscv/insn32.decode | 36 | |
37 | +++ b/target/riscv/insn32.decode | 37 | /* otherwise, set up the future timer interrupt */ |
38 | @@ -XXX,XX +XXX,XX @@ vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm | 38 | qemu_irq_lower(mtimer->timer_irqs[hartid]); |
39 | vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm | 39 | - diff = mtimer->timecmp[hartid] - rtc_r; |
40 | vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm | 40 | + diff = mtimer->timecmp[hartid] - rtc; |
41 | vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm | 41 | /* back to ns (note args switched in muldiv64) */ |
42 | +vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm | 42 | uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq); |
43 | +vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm | 43 | |
44 | +vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm | ||
45 | +vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm | ||
46 | +vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm | ||
47 | |||
48 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
49 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
50 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
53 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
54 | @@ -XXX,XX +XXX,XX @@ GEN_OPFV_TRANS(vfcvt_xu_f_v, opfv_check) | ||
55 | GEN_OPFV_TRANS(vfcvt_x_f_v, opfv_check) | ||
56 | GEN_OPFV_TRANS(vfcvt_f_xu_v, opfv_check) | ||
57 | GEN_OPFV_TRANS(vfcvt_f_x_v, opfv_check) | ||
58 | + | ||
59 | +/* Widening Floating-Point/Integer Type-Convert Instructions */ | ||
60 | + | ||
61 | +/* | ||
62 | + * If the current SEW does not correspond to a supported IEEE floating-point | ||
63 | + * type, an illegal instruction exception is raised | ||
64 | + */ | ||
65 | +static bool opfv_widen_check(DisasContext *s, arg_rmr *a) | ||
66 | +{ | ||
67 | + return (vext_check_isa_ill(s) && | ||
68 | + vext_check_overlap_mask(s, a->rd, a->vm, true) && | ||
69 | + vext_check_reg(s, a->rd, true) && | ||
70 | + vext_check_reg(s, a->rs2, false) && | ||
71 | + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, | ||
72 | + 1 << s->lmul) && | ||
73 | + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); | ||
74 | +} | ||
75 | + | ||
76 | +#define GEN_OPFV_WIDEN_TRANS(NAME) \ | ||
77 | +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
78 | +{ \ | ||
79 | + if (opfv_widen_check(s, a)) { \ | ||
80 | + uint32_t data = 0; \ | ||
81 | + static gen_helper_gvec_3_ptr * const fns[2] = { \ | ||
82 | + gen_helper_##NAME##_h, \ | ||
83 | + gen_helper_##NAME##_w, \ | ||
84 | + }; \ | ||
85 | + TCGLabel *over = gen_new_label(); \ | ||
86 | + gen_set_rm(s, 7); \ | ||
87 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
88 | + \ | ||
89 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ | ||
90 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
91 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
92 | + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
93 | + vreg_ofs(s, a->rs2), cpu_env, 0, \ | ||
94 | + s->vlen / 8, data, fns[s->sew - 1]); \ | ||
95 | + gen_set_label(over); \ | ||
96 | + return true; \ | ||
97 | + } \ | ||
98 | + return false; \ | ||
99 | +} | ||
100 | + | ||
101 | +GEN_OPFV_WIDEN_TRANS(vfwcvt_xu_f_v) | ||
102 | +GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v) | ||
103 | +GEN_OPFV_WIDEN_TRANS(vfwcvt_f_xu_v) | ||
104 | +GEN_OPFV_WIDEN_TRANS(vfwcvt_f_x_v) | ||
105 | +GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v) | ||
106 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
107 | index XXXXXXX..XXXXXXX 100644 | ||
108 | --- a/target/riscv/vector_helper.c | ||
109 | +++ b/target/riscv/vector_helper.c | ||
110 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) | ||
111 | GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2, clearh) | ||
112 | GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4, clearl) | ||
113 | GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8, clearq) | ||
114 | + | ||
115 | +/* Widening Floating-Point/Integer Type-Convert Instructions */ | ||
116 | +/* (TD, T2, TX2) */ | ||
117 | +#define WOP_UU_H uint32_t, uint16_t, uint16_t | ||
118 | +#define WOP_UU_W uint64_t, uint32_t, uint32_t | ||
119 | +/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ | ||
120 | +RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) | ||
121 | +RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) | ||
122 | +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4, clearl) | ||
123 | +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8, clearq) | ||
124 | + | ||
125 | +/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ | ||
126 | +RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) | ||
127 | +RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) | ||
128 | +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4, clearl) | ||
129 | +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8, clearq) | ||
130 | + | ||
131 | +/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ | ||
132 | +RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) | ||
133 | +RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) | ||
134 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4, clearl) | ||
135 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8, clearq) | ||
136 | + | ||
137 | +/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ | ||
138 | +RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) | ||
139 | +RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) | ||
140 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4, clearl) | ||
141 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8, clearq) | ||
142 | + | ||
143 | +/* | ||
144 | + * vfwcvt.f.f.v vd, vs2, vm # | ||
145 | + * Convert single-width float to double-width float. | ||
146 | + */ | ||
147 | +static uint32_t vfwcvtffv16(uint16_t a, float_status *s) | ||
148 | +{ | ||
149 | + return float16_to_float32(a, true, s); | ||
150 | +} | ||
151 | + | ||
152 | +RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) | ||
153 | +RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) | ||
154 | +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4, clearl) | ||
155 | +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8, clearq) | ||
156 | -- | 44 | -- |
157 | 2.27.0 | 45 | 2.41.0 |
158 | |||
159 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | We should not use types dependend on host arch for target_ucontext. |
4 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 4 | This bug is found when run rv32 applications. |
5 | |||
6 | Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20200623215920.2594-42-zhiwei_liu@c-sky.com | 8 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | Message-ID: <20230811055438.1945-1-zhiwei_liu@linux.alibaba.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 12 | --- |
9 | target/riscv/helper.h | 4 +++ | 13 | linux-user/riscv/signal.c | 4 ++-- |
10 | target/riscv/insn32.decode | 2 ++ | 14 | 1 file changed, 2 insertions(+), 2 deletions(-) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 38 +++++++++++++++++++++++++ | ||
12 | target/riscv/vector_helper.c | 24 ++++++++++++++++ | ||
13 | 4 files changed, 68 insertions(+) | ||
14 | 15 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 16 | diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c |
16 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 18 | --- a/linux-user/riscv/signal.c |
18 | +++ b/target/riscv/helper.h | 19 | +++ b/linux-user/riscv/signal.c |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, i32) | 20 | @@ -XXX,XX +XXX,XX @@ struct target_sigcontext { |
20 | DEF_HELPER_5(vfclass_v_h, void, ptr, ptr, ptr, env, i32) | 21 | }; /* cf. riscv-linux:arch/riscv/include/uapi/asm/ptrace.h */ |
21 | DEF_HELPER_5(vfclass_v_w, void, ptr, ptr, ptr, env, i32) | 22 | |
22 | DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32) | 23 | struct target_ucontext { |
23 | + | 24 | - unsigned long uc_flags; |
24 | +DEF_HELPER_6(vfmerge_vfm_h, void, ptr, ptr, i64, ptr, env, i32) | 25 | - struct target_ucontext *uc_link; |
25 | +DEF_HELPER_6(vfmerge_vfm_w, void, ptr, ptr, i64, ptr, env, i32) | 26 | + abi_ulong uc_flags; |
26 | +DEF_HELPER_6(vfmerge_vfm_d, void, ptr, ptr, i64, ptr, env, i32) | 27 | + abi_ptr uc_link; |
27 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 28 | target_stack_t uc_stack; |
28 | index XXXXXXX..XXXXXXX 100644 | 29 | target_sigset_t uc_sigmask; |
29 | --- a/target/riscv/insn32.decode | 30 | uint8_t __unused[1024 / 8 - sizeof(target_sigset_t)]; |
30 | +++ b/target/riscv/insn32.decode | ||
31 | @@ -XXX,XX +XXX,XX @@ vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm | ||
32 | vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm | ||
33 | vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm | ||
34 | vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm | ||
35 | +vfmerge_vfm 010111 0 ..... ..... 101 ..... 1010111 @r_vm_0 | ||
36 | +vfmv_v_f 010111 1 00000 ..... 101 ..... 1010111 @r2 | ||
37 | |||
38 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
39 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
40 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
43 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
44 | @@ -XXX,XX +XXX,XX @@ GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check) | ||
45 | |||
46 | /* Vector Floating-Point Classify Instruction */ | ||
47 | GEN_OPFV_TRANS(vfclass_v, opfv_check) | ||
48 | + | ||
49 | +/* Vector Floating-Point Merge Instruction */ | ||
50 | +GEN_OPFVF_TRANS(vfmerge_vfm, opfvf_check) | ||
51 | + | ||
52 | +static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) | ||
53 | +{ | ||
54 | + if (vext_check_isa_ill(s) && | ||
55 | + vext_check_reg(s, a->rd, false) && | ||
56 | + (s->sew != 0)) { | ||
57 | + | ||
58 | + if (s->vl_eq_vlmax) { | ||
59 | + tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), | ||
60 | + MAXSZ(s), MAXSZ(s), cpu_fpr[a->rs1]); | ||
61 | + } else { | ||
62 | + TCGv_ptr dest; | ||
63 | + TCGv_i32 desc; | ||
64 | + uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); | ||
65 | + static gen_helper_vmv_vx * const fns[3] = { | ||
66 | + gen_helper_vmv_v_x_h, | ||
67 | + gen_helper_vmv_v_x_w, | ||
68 | + gen_helper_vmv_v_x_d, | ||
69 | + }; | ||
70 | + TCGLabel *over = gen_new_label(); | ||
71 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
72 | + | ||
73 | + dest = tcg_temp_new_ptr(); | ||
74 | + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); | ||
75 | + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd)); | ||
76 | + fns[s->sew - 1](dest, cpu_fpr[a->rs1], cpu_env, desc); | ||
77 | + | ||
78 | + tcg_temp_free_ptr(dest); | ||
79 | + tcg_temp_free_i32(desc); | ||
80 | + gen_set_label(over); | ||
81 | + } | ||
82 | + return true; | ||
83 | + } | ||
84 | + return false; | ||
85 | +} | ||
86 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
87 | index XXXXXXX..XXXXXXX 100644 | ||
88 | --- a/target/riscv/vector_helper.c | ||
89 | +++ b/target/riscv/vector_helper.c | ||
90 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) | ||
91 | GEN_VEXT_V(vfclass_v_h, 2, 2, clearh) | ||
92 | GEN_VEXT_V(vfclass_v_w, 4, 4, clearl) | ||
93 | GEN_VEXT_V(vfclass_v_d, 8, 8, clearq) | ||
94 | + | ||
95 | +/* Vector Floating-Point Merge Instruction */ | ||
96 | +#define GEN_VFMERGE_VF(NAME, ETYPE, H, CLEAR_FN) \ | ||
97 | +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
98 | + CPURISCVState *env, uint32_t desc) \ | ||
99 | +{ \ | ||
100 | + uint32_t mlen = vext_mlen(desc); \ | ||
101 | + uint32_t vm = vext_vm(desc); \ | ||
102 | + uint32_t vl = env->vl; \ | ||
103 | + uint32_t esz = sizeof(ETYPE); \ | ||
104 | + uint32_t vlmax = vext_maxsz(desc) / esz; \ | ||
105 | + uint32_t i; \ | ||
106 | + \ | ||
107 | + for (i = 0; i < vl; i++) { \ | ||
108 | + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ | ||
109 | + *((ETYPE *)vd + H(i)) \ | ||
110 | + = (!vm && !vext_elem_mask(v0, mlen, i) ? s2 : s1); \ | ||
111 | + } \ | ||
112 | + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ | ||
113 | +} | ||
114 | + | ||
115 | +GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh) | ||
116 | +GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4, clearl) | ||
117 | +GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8, clearq) | ||
118 | -- | 31 | -- |
119 | 2.27.0 | 32 | 2.41.0 |
120 | 33 | ||
121 | 34 | diff view generated by jsdifflib |
1 | From: Jessica Clarke <jrtc27@jrtc27.com> | 1 | From: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | The source priorities can be used to order sources with respect to other | 3 | In this patch, we create the APLIC and IMSIC FDT helper functions and |
4 | sources, not just as a way to enable/disable them based off a threshold. | 4 | remove M mode AIA devices when using KVM acceleration. |
5 | We must therefore always claim the highest-priority source, rather than | ||
6 | the first source we find. | ||
7 | 5 | ||
8 | Signed-off-by: Jessica Clarke <jrtc27@jrtc27.com> | 6 | Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
9 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 7 | Reviewed-by: Jim Shu <jim.shu@sifive.com> |
10 | Message-Id: <20200618202343.20455-1-jrtc27@jrtc27.com> | 8 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
9 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> | ||
10 | Message-ID: <20230727102439.22554-2-yongxuan.wang@sifive.com> | ||
11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
12 | --- | 12 | --- |
13 | hw/riscv/sifive_plic.c | 17 ++++++++++++----- | 13 | hw/riscv/virt.c | 290 +++++++++++++++++++++++------------------------- |
14 | 1 file changed, 12 insertions(+), 5 deletions(-) | 14 | 1 file changed, 137 insertions(+), 153 deletions(-) |
15 | 15 | ||
16 | diff --git a/hw/riscv/sifive_plic.c b/hw/riscv/sifive_plic.c | 16 | diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c |
17 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/hw/riscv/sifive_plic.c | 18 | --- a/hw/riscv/virt.c |
19 | +++ b/hw/riscv/sifive_plic.c | 19 | +++ b/hw/riscv/virt.c |
20 | @@ -XXX,XX +XXX,XX @@ static void sifive_plic_update(SiFivePLICState *plic) | 20 | @@ -XXX,XX +XXX,XX @@ static uint32_t imsic_num_bits(uint32_t count) |
21 | static uint32_t sifive_plic_claim(SiFivePLICState *plic, uint32_t addrid) | 21 | return ret; |
22 | } | ||
23 | |||
24 | -static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, | ||
25 | - uint32_t *phandle, uint32_t *intc_phandles, | ||
26 | - uint32_t *msi_m_phandle, uint32_t *msi_s_phandle) | ||
27 | +static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr, | ||
28 | + uint32_t *intc_phandles, uint32_t msi_phandle, | ||
29 | + bool m_mode, uint32_t imsic_guest_bits) | ||
22 | { | 30 | { |
23 | int i, j; | 31 | int cpu, socket; |
24 | + uint32_t max_irq = 0; | 32 | char *imsic_name; |
25 | + uint32_t max_prio = plic->target_priority[addrid]; | 33 | MachineState *ms = MACHINE(s); |
26 | + | 34 | int socket_count = riscv_socket_count(ms); |
27 | for (i = 0; i < plic->bitfield_words; i++) { | 35 | - uint32_t imsic_max_hart_per_socket, imsic_guest_bits; |
28 | uint32_t pending_enabled_not_claimed = | 36 | + uint32_t imsic_max_hart_per_socket; |
29 | (plic->pending[i] & ~plic->claimed[i]) & | 37 | uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size; |
30 | @@ -XXX,XX +XXX,XX @@ static uint32_t sifive_plic_claim(SiFivePLICState *plic, uint32_t addrid) | 38 | |
31 | int irq = (i << 5) + j; | 39 | - *msi_m_phandle = (*phandle)++; |
32 | uint32_t prio = plic->source_priority[irq]; | 40 | - *msi_s_phandle = (*phandle)++; |
33 | int enabled = pending_enabled_not_claimed & (1 << j); | 41 | imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2); |
34 | - if (enabled && prio > plic->target_priority[addrid]) { | 42 | imsic_regs = g_new0(uint32_t, socket_count * 4); |
35 | - sifive_plic_set_pending(plic, irq, false); | 43 | |
36 | - sifive_plic_set_claimed(plic, irq, true); | 44 | - /* M-level IMSIC node */ |
37 | - return irq; | 45 | for (cpu = 0; cpu < ms->smp.cpus; cpu++) { |
38 | + if (enabled && prio > max_prio) { | 46 | imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); |
39 | + max_irq = irq; | 47 | - imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT); |
40 | + max_prio = prio; | 48 | + imsic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); |
41 | } | 49 | } |
50 | - imsic_max_hart_per_socket = 0; | ||
51 | - for (socket = 0; socket < socket_count; socket++) { | ||
52 | - imsic_addr = memmap[VIRT_IMSIC_M].base + | ||
53 | - socket * VIRT_IMSIC_GROUP_MAX_SIZE; | ||
54 | - imsic_size = IMSIC_HART_SIZE(0) * s->soc[socket].num_harts; | ||
55 | - imsic_regs[socket * 4 + 0] = 0; | ||
56 | - imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr); | ||
57 | - imsic_regs[socket * 4 + 2] = 0; | ||
58 | - imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size); | ||
59 | - if (imsic_max_hart_per_socket < s->soc[socket].num_harts) { | ||
60 | - imsic_max_hart_per_socket = s->soc[socket].num_harts; | ||
61 | - } | ||
62 | - } | ||
63 | - imsic_name = g_strdup_printf("/soc/imsics@%lx", | ||
64 | - (unsigned long)memmap[VIRT_IMSIC_M].base); | ||
65 | - qemu_fdt_add_subnode(ms->fdt, imsic_name); | ||
66 | - qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", | ||
67 | - "riscv,imsics"); | ||
68 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells", | ||
69 | - FDT_IMSIC_INT_CELLS); | ||
70 | - qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", | ||
71 | - NULL, 0); | ||
72 | - qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", | ||
73 | - NULL, 0); | ||
74 | - qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended", | ||
75 | - imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); | ||
76 | - qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs, | ||
77 | - socket_count * sizeof(uint32_t) * 4); | ||
78 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids", | ||
79 | - VIRT_IRQCHIP_NUM_MSIS); | ||
80 | - if (socket_count > 1) { | ||
81 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits", | ||
82 | - imsic_num_bits(imsic_max_hart_per_socket)); | ||
83 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits", | ||
84 | - imsic_num_bits(socket_count)); | ||
85 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift", | ||
86 | - IMSIC_MMIO_GROUP_MIN_SHIFT); | ||
87 | - } | ||
88 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_m_phandle); | ||
89 | - | ||
90 | - g_free(imsic_name); | ||
91 | |||
92 | - /* S-level IMSIC node */ | ||
93 | - for (cpu = 0; cpu < ms->smp.cpus; cpu++) { | ||
94 | - imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); | ||
95 | - imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); | ||
96 | - } | ||
97 | - imsic_guest_bits = imsic_num_bits(s->aia_guests + 1); | ||
98 | imsic_max_hart_per_socket = 0; | ||
99 | for (socket = 0; socket < socket_count; socket++) { | ||
100 | - imsic_addr = memmap[VIRT_IMSIC_S].base + | ||
101 | - socket * VIRT_IMSIC_GROUP_MAX_SIZE; | ||
102 | + imsic_addr = base_addr + socket * VIRT_IMSIC_GROUP_MAX_SIZE; | ||
103 | imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) * | ||
104 | s->soc[socket].num_harts; | ||
105 | imsic_regs[socket * 4 + 0] = 0; | ||
106 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, | ||
107 | imsic_max_hart_per_socket = s->soc[socket].num_harts; | ||
42 | } | 108 | } |
43 | } | 109 | } |
44 | - return 0; | 110 | - imsic_name = g_strdup_printf("/soc/imsics@%lx", |
45 | + | 111 | - (unsigned long)memmap[VIRT_IMSIC_S].base); |
46 | + if (max_irq) { | 112 | + |
47 | + sifive_plic_set_pending(plic, max_irq, false); | 113 | + imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr); |
48 | + sifive_plic_set_claimed(plic, max_irq, true); | 114 | qemu_fdt_add_subnode(ms->fdt, imsic_name); |
115 | - qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", | ||
116 | - "riscv,imsics"); | ||
117 | + qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics"); | ||
118 | qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells", | ||
119 | - FDT_IMSIC_INT_CELLS); | ||
120 | - qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", | ||
121 | - NULL, 0); | ||
122 | - qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", | ||
123 | - NULL, 0); | ||
124 | + FDT_IMSIC_INT_CELLS); | ||
125 | + qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0); | ||
126 | + qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0); | ||
127 | qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended", | ||
128 | - imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); | ||
129 | + imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); | ||
130 | qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs, | ||
131 | - socket_count * sizeof(uint32_t) * 4); | ||
132 | + socket_count * sizeof(uint32_t) * 4); | ||
133 | qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids", | ||
134 | - VIRT_IRQCHIP_NUM_MSIS); | ||
135 | + VIRT_IRQCHIP_NUM_MSIS); | ||
136 | + | ||
137 | if (imsic_guest_bits) { | ||
138 | qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,guest-index-bits", | ||
139 | - imsic_guest_bits); | ||
140 | + imsic_guest_bits); | ||
141 | } | ||
142 | + | ||
143 | if (socket_count > 1) { | ||
144 | qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits", | ||
145 | - imsic_num_bits(imsic_max_hart_per_socket)); | ||
146 | + imsic_num_bits(imsic_max_hart_per_socket)); | ||
147 | qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits", | ||
148 | - imsic_num_bits(socket_count)); | ||
149 | + imsic_num_bits(socket_count)); | ||
150 | qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift", | ||
151 | - IMSIC_MMIO_GROUP_MIN_SHIFT); | ||
152 | + IMSIC_MMIO_GROUP_MIN_SHIFT); | ||
153 | } | ||
154 | - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_s_phandle); | ||
155 | - g_free(imsic_name); | ||
156 | + qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle); | ||
157 | |||
158 | + g_free(imsic_name); | ||
159 | g_free(imsic_regs); | ||
160 | g_free(imsic_cells); | ||
161 | } | ||
162 | |||
163 | -static void create_fdt_socket_aplic(RISCVVirtState *s, | ||
164 | - const MemMapEntry *memmap, int socket, | ||
165 | - uint32_t msi_m_phandle, | ||
166 | - uint32_t msi_s_phandle, | ||
167 | - uint32_t *phandle, | ||
168 | - uint32_t *intc_phandles, | ||
169 | - uint32_t *aplic_phandles) | ||
170 | +static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, | ||
171 | + uint32_t *phandle, uint32_t *intc_phandles, | ||
172 | + uint32_t *msi_m_phandle, uint32_t *msi_s_phandle) | ||
173 | +{ | ||
174 | + *msi_m_phandle = (*phandle)++; | ||
175 | + *msi_s_phandle = (*phandle)++; | ||
176 | + | ||
177 | + if (!kvm_enabled()) { | ||
178 | + /* M-level IMSIC node */ | ||
179 | + create_fdt_one_imsic(s, memmap[VIRT_IMSIC_M].base, intc_phandles, | ||
180 | + *msi_m_phandle, true, 0); | ||
49 | + } | 181 | + } |
50 | + return max_irq; | 182 | + |
183 | + /* S-level IMSIC node */ | ||
184 | + create_fdt_one_imsic(s, memmap[VIRT_IMSIC_S].base, intc_phandles, | ||
185 | + *msi_s_phandle, false, | ||
186 | + imsic_num_bits(s->aia_guests + 1)); | ||
187 | + | ||
188 | +} | ||
189 | + | ||
190 | +static void create_fdt_one_aplic(RISCVVirtState *s, int socket, | ||
191 | + unsigned long aplic_addr, uint32_t aplic_size, | ||
192 | + uint32_t msi_phandle, | ||
193 | + uint32_t *intc_phandles, | ||
194 | + uint32_t aplic_phandle, | ||
195 | + uint32_t aplic_child_phandle, | ||
196 | + bool m_mode) | ||
197 | { | ||
198 | int cpu; | ||
199 | char *aplic_name; | ||
200 | uint32_t *aplic_cells; | ||
201 | - unsigned long aplic_addr; | ||
202 | MachineState *ms = MACHINE(s); | ||
203 | - uint32_t aplic_m_phandle, aplic_s_phandle; | ||
204 | |||
205 | - aplic_m_phandle = (*phandle)++; | ||
206 | - aplic_s_phandle = (*phandle)++; | ||
207 | aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); | ||
208 | |||
209 | - /* M-level APLIC node */ | ||
210 | for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { | ||
211 | aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); | ||
212 | - aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT); | ||
213 | + aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); | ||
214 | } | ||
215 | - aplic_addr = memmap[VIRT_APLIC_M].base + | ||
216 | - (memmap[VIRT_APLIC_M].size * socket); | ||
217 | + | ||
218 | aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); | ||
219 | qemu_fdt_add_subnode(ms->fdt, aplic_name); | ||
220 | qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic"); | ||
221 | qemu_fdt_setprop_cell(ms->fdt, aplic_name, | ||
222 | - "#interrupt-cells", FDT_APLIC_INT_CELLS); | ||
223 | + "#interrupt-cells", FDT_APLIC_INT_CELLS); | ||
224 | qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0); | ||
225 | + | ||
226 | if (s->aia_type == VIRT_AIA_TYPE_APLIC) { | ||
227 | qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended", | ||
228 | - aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2); | ||
229 | + aplic_cells, | ||
230 | + s->soc[socket].num_harts * sizeof(uint32_t) * 2); | ||
231 | } else { | ||
232 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", | ||
233 | - msi_m_phandle); | ||
234 | + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle); | ||
235 | } | ||
236 | + | ||
237 | qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", | ||
238 | - 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_M].size); | ||
239 | + 0x0, aplic_addr, 0x0, aplic_size); | ||
240 | qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", | ||
241 | - VIRT_IRQCHIP_NUM_SOURCES); | ||
242 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children", | ||
243 | - aplic_s_phandle); | ||
244 | - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate", | ||
245 | - aplic_s_phandle, 0x1, VIRT_IRQCHIP_NUM_SOURCES); | ||
246 | + VIRT_IRQCHIP_NUM_SOURCES); | ||
247 | + | ||
248 | + if (aplic_child_phandle) { | ||
249 | + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children", | ||
250 | + aplic_child_phandle); | ||
251 | + qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate", | ||
252 | + aplic_child_phandle, 0x1, | ||
253 | + VIRT_IRQCHIP_NUM_SOURCES); | ||
254 | + } | ||
255 | + | ||
256 | riscv_socket_fdt_write_id(ms, aplic_name, socket); | ||
257 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_m_phandle); | ||
258 | + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_phandle); | ||
259 | + | ||
260 | g_free(aplic_name); | ||
261 | + g_free(aplic_cells); | ||
262 | +} | ||
263 | |||
264 | - /* S-level APLIC node */ | ||
265 | - for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { | ||
266 | - aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); | ||
267 | - aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); | ||
268 | +static void create_fdt_socket_aplic(RISCVVirtState *s, | ||
269 | + const MemMapEntry *memmap, int socket, | ||
270 | + uint32_t msi_m_phandle, | ||
271 | + uint32_t msi_s_phandle, | ||
272 | + uint32_t *phandle, | ||
273 | + uint32_t *intc_phandles, | ||
274 | + uint32_t *aplic_phandles) | ||
275 | +{ | ||
276 | + char *aplic_name; | ||
277 | + unsigned long aplic_addr; | ||
278 | + MachineState *ms = MACHINE(s); | ||
279 | + uint32_t aplic_m_phandle, aplic_s_phandle; | ||
280 | + | ||
281 | + aplic_m_phandle = (*phandle)++; | ||
282 | + aplic_s_phandle = (*phandle)++; | ||
283 | + | ||
284 | + if (!kvm_enabled()) { | ||
285 | + /* M-level APLIC node */ | ||
286 | + aplic_addr = memmap[VIRT_APLIC_M].base + | ||
287 | + (memmap[VIRT_APLIC_M].size * socket); | ||
288 | + create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size, | ||
289 | + msi_m_phandle, intc_phandles, | ||
290 | + aplic_m_phandle, aplic_s_phandle, | ||
291 | + true); | ||
292 | } | ||
293 | + | ||
294 | + /* S-level APLIC node */ | ||
295 | aplic_addr = memmap[VIRT_APLIC_S].base + | ||
296 | (memmap[VIRT_APLIC_S].size * socket); | ||
297 | + create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size, | ||
298 | + msi_s_phandle, intc_phandles, | ||
299 | + aplic_s_phandle, 0, | ||
300 | + false); | ||
301 | + | ||
302 | aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); | ||
303 | - qemu_fdt_add_subnode(ms->fdt, aplic_name); | ||
304 | - qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic"); | ||
305 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, | ||
306 | - "#interrupt-cells", FDT_APLIC_INT_CELLS); | ||
307 | - qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0); | ||
308 | - if (s->aia_type == VIRT_AIA_TYPE_APLIC) { | ||
309 | - qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended", | ||
310 | - aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2); | ||
311 | - } else { | ||
312 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", | ||
313 | - msi_s_phandle); | ||
314 | - } | ||
315 | - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", | ||
316 | - 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_S].size); | ||
317 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", | ||
318 | - VIRT_IRQCHIP_NUM_SOURCES); | ||
319 | - riscv_socket_fdt_write_id(ms, aplic_name, socket); | ||
320 | - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_s_phandle); | ||
321 | |||
322 | if (!socket) { | ||
323 | platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name, | ||
324 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s, | ||
325 | |||
326 | g_free(aplic_name); | ||
327 | |||
328 | - g_free(aplic_cells); | ||
329 | aplic_phandles[socket] = aplic_s_phandle; | ||
51 | } | 330 | } |
52 | 331 | ||
53 | static uint64_t sifive_plic_read(void *opaque, hwaddr addr, unsigned size) | 332 | @@ -XXX,XX +XXX,XX @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests, |
333 | int i; | ||
334 | hwaddr addr; | ||
335 | uint32_t guest_bits; | ||
336 | - DeviceState *aplic_m; | ||
337 | - bool msimode = (aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) ? true : false; | ||
338 | + DeviceState *aplic_s = NULL; | ||
339 | + DeviceState *aplic_m = NULL; | ||
340 | + bool msimode = aia_type == VIRT_AIA_TYPE_APLIC_IMSIC; | ||
341 | |||
342 | if (msimode) { | ||
343 | - /* Per-socket M-level IMSICs */ | ||
344 | - addr = memmap[VIRT_IMSIC_M].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE; | ||
345 | - for (i = 0; i < hart_count; i++) { | ||
346 | - riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), | ||
347 | - base_hartid + i, true, 1, | ||
348 | - VIRT_IRQCHIP_NUM_MSIS); | ||
349 | + if (!kvm_enabled()) { | ||
350 | + /* Per-socket M-level IMSICs */ | ||
351 | + addr = memmap[VIRT_IMSIC_M].base + | ||
352 | + socket * VIRT_IMSIC_GROUP_MAX_SIZE; | ||
353 | + for (i = 0; i < hart_count; i++) { | ||
354 | + riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), | ||
355 | + base_hartid + i, true, 1, | ||
356 | + VIRT_IRQCHIP_NUM_MSIS); | ||
357 | + } | ||
358 | } | ||
359 | |||
360 | /* Per-socket S-level IMSICs */ | ||
361 | @@ -XXX,XX +XXX,XX @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests, | ||
362 | } | ||
363 | } | ||
364 | |||
365 | - /* Per-socket M-level APLIC */ | ||
366 | - aplic_m = riscv_aplic_create( | ||
367 | - memmap[VIRT_APLIC_M].base + socket * memmap[VIRT_APLIC_M].size, | ||
368 | - memmap[VIRT_APLIC_M].size, | ||
369 | - (msimode) ? 0 : base_hartid, | ||
370 | - (msimode) ? 0 : hart_count, | ||
371 | - VIRT_IRQCHIP_NUM_SOURCES, | ||
372 | - VIRT_IRQCHIP_NUM_PRIO_BITS, | ||
373 | - msimode, true, NULL); | ||
374 | - | ||
375 | - if (aplic_m) { | ||
376 | - /* Per-socket S-level APLIC */ | ||
377 | - riscv_aplic_create( | ||
378 | - memmap[VIRT_APLIC_S].base + socket * memmap[VIRT_APLIC_S].size, | ||
379 | - memmap[VIRT_APLIC_S].size, | ||
380 | - (msimode) ? 0 : base_hartid, | ||
381 | - (msimode) ? 0 : hart_count, | ||
382 | - VIRT_IRQCHIP_NUM_SOURCES, | ||
383 | - VIRT_IRQCHIP_NUM_PRIO_BITS, | ||
384 | - msimode, false, aplic_m); | ||
385 | + if (!kvm_enabled()) { | ||
386 | + /* Per-socket M-level APLIC */ | ||
387 | + aplic_m = riscv_aplic_create(memmap[VIRT_APLIC_M].base + | ||
388 | + socket * memmap[VIRT_APLIC_M].size, | ||
389 | + memmap[VIRT_APLIC_M].size, | ||
390 | + (msimode) ? 0 : base_hartid, | ||
391 | + (msimode) ? 0 : hart_count, | ||
392 | + VIRT_IRQCHIP_NUM_SOURCES, | ||
393 | + VIRT_IRQCHIP_NUM_PRIO_BITS, | ||
394 | + msimode, true, NULL); | ||
395 | } | ||
396 | |||
397 | - return aplic_m; | ||
398 | + /* Per-socket S-level APLIC */ | ||
399 | + aplic_s = riscv_aplic_create(memmap[VIRT_APLIC_S].base + | ||
400 | + socket * memmap[VIRT_APLIC_S].size, | ||
401 | + memmap[VIRT_APLIC_S].size, | ||
402 | + (msimode) ? 0 : base_hartid, | ||
403 | + (msimode) ? 0 : hart_count, | ||
404 | + VIRT_IRQCHIP_NUM_SOURCES, | ||
405 | + VIRT_IRQCHIP_NUM_PRIO_BITS, | ||
406 | + msimode, false, aplic_m); | ||
407 | + | ||
408 | + return kvm_enabled() ? aplic_s : aplic_m; | ||
409 | } | ||
410 | |||
411 | static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip) | ||
54 | -- | 412 | -- |
55 | 2.27.0 | 413 | 2.41.0 |
56 | |||
57 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | Vector AMOs operate as if aq and rl bits were zero on each element | 3 | We check the in-kernel irqchip support when using KVM acceleration. |
4 | with regard to ordering relative to other instructions in the same hart. | ||
5 | Vector AMOs provide no ordering guarantee between element operations | ||
6 | in the same vector AMO instruction | ||
7 | 4 | ||
8 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 5 | Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
9 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 6 | Reviewed-by: Jim Shu <jim.shu@sifive.com> |
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
11 | Message-id: 20200623215920.2594-10-zhiwei_liu@c-sky.com | 8 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> |
9 | Message-ID: <20230727102439.22554-3-yongxuan.wang@sifive.com> | ||
12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
13 | --- | 11 | --- |
14 | target/riscv/helper.h | 29 +++++ | 12 | target/riscv/kvm.c | 10 +++++++++- |
15 | target/riscv/internals.h | 1 + | 13 | 1 file changed, 9 insertions(+), 1 deletion(-) |
16 | target/riscv/insn32-64.decode | 11 ++ | ||
17 | target/riscv/insn32.decode | 13 +++ | ||
18 | target/riscv/insn_trans/trans_rvv.inc.c | 138 ++++++++++++++++++++++ | ||
19 | target/riscv/vector_helper.c | 147 ++++++++++++++++++++++++ | ||
20 | 6 files changed, 339 insertions(+) | ||
21 | 14 | ||
22 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 15 | diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c |
23 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/target/riscv/helper.h | 17 | --- a/target/riscv/kvm.c |
25 | +++ b/target/riscv/helper.h | 18 | +++ b/target/riscv/kvm.c |
26 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vlhuff_v_w, void, ptr, ptr, tl, env, i32) | 19 | @@ -XXX,XX +XXX,XX @@ int kvm_arch_init(MachineState *ms, KVMState *s) |
27 | DEF_HELPER_5(vlhuff_v_d, void, ptr, ptr, tl, env, i32) | 20 | |
28 | DEF_HELPER_5(vlwuff_v_w, void, ptr, ptr, tl, env, i32) | 21 | int kvm_arch_irqchip_create(KVMState *s) |
29 | DEF_HELPER_5(vlwuff_v_d, void, ptr, ptr, tl, env, i32) | 22 | { |
30 | +#ifdef TARGET_RISCV64 | 23 | - return 0; |
31 | +DEF_HELPER_6(vamoswapw_v_d, void, ptr, ptr, tl, ptr, env, i32) | 24 | + if (kvm_kernel_irqchip_split()) { |
32 | +DEF_HELPER_6(vamoswapd_v_d, void, ptr, ptr, tl, ptr, env, i32) | 25 | + error_report("-machine kernel_irqchip=split is not supported on RISC-V."); |
33 | +DEF_HELPER_6(vamoaddw_v_d, void, ptr, ptr, tl, ptr, env, i32) | 26 | + exit(1); |
34 | +DEF_HELPER_6(vamoaddd_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vamoxorw_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vamoxord_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vamoandw_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vamoandd_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vamoorw_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vamoord_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vamominw_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vamomind_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vamomaxw_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vamomaxd_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vamominuw_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vamominud_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vamomaxuw_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
48 | +DEF_HELPER_6(vamomaxud_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
49 | +#endif | ||
50 | +DEF_HELPER_6(vamoswapw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
51 | +DEF_HELPER_6(vamoaddw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
52 | +DEF_HELPER_6(vamoxorw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
53 | +DEF_HELPER_6(vamoandw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
54 | +DEF_HELPER_6(vamoorw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
55 | +DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
56 | +DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
57 | +DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
58 | +DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
59 | diff --git a/target/riscv/internals.h b/target/riscv/internals.h | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/target/riscv/internals.h | ||
62 | +++ b/target/riscv/internals.h | ||
63 | @@ -XXX,XX +XXX,XX @@ FIELD(VDATA, MLEN, 0, 8) | ||
64 | FIELD(VDATA, VM, 8, 1) | ||
65 | FIELD(VDATA, LMUL, 9, 2) | ||
66 | FIELD(VDATA, NF, 11, 4) | ||
67 | +FIELD(VDATA, WD, 11, 1) | ||
68 | #endif | ||
69 | diff --git a/target/riscv/insn32-64.decode b/target/riscv/insn32-64.decode | ||
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/target/riscv/insn32-64.decode | ||
72 | +++ b/target/riscv/insn32-64.decode | ||
73 | @@ -XXX,XX +XXX,XX @@ amomax_d 10100 . . ..... ..... 011 ..... 0101111 @atom_st | ||
74 | amominu_d 11000 . . ..... ..... 011 ..... 0101111 @atom_st | ||
75 | amomaxu_d 11100 . . ..... ..... 011 ..... 0101111 @atom_st | ||
76 | |||
77 | +#*** Vector AMO operations (in addition to Zvamo) *** | ||
78 | +vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm | ||
79 | +vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm | ||
80 | +vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm | ||
81 | +vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm | ||
82 | +vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm | ||
83 | +vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm | ||
84 | +vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm | ||
85 | +vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm | ||
86 | +vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm | ||
87 | + | ||
88 | # *** RV64F Standard Extension (in addition to RV32F) *** | ||
89 | fcvt_l_s 1100000 00010 ..... ... ..... 1010011 @r2_rm | ||
90 | fcvt_lu_s 1100000 00011 ..... ... ..... 1010011 @r2_rm | ||
91 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
92 | index XXXXXXX..XXXXXXX 100644 | ||
93 | --- a/target/riscv/insn32.decode | ||
94 | +++ b/target/riscv/insn32.decode | ||
95 | @@ -XXX,XX +XXX,XX @@ | ||
96 | &u imm rd | ||
97 | &shift shamt rs1 rd | ||
98 | &atomic aq rl rs2 rs1 rd | ||
99 | +&rwdvm vm wd rd rs1 rs2 | ||
100 | &r2nfvm vm rd rs1 nf | ||
101 | &rnfvm vm rd rs1 rs2 nf | ||
102 | |||
103 | @@ -XXX,XX +XXX,XX @@ | ||
104 | @r2 ....... ..... ..... ... ..... ....... %rs1 %rd | ||
105 | @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd | ||
106 | @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd | ||
107 | +@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd | ||
108 | @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd | ||
109 | |||
110 | @hfence_gvma ....... ..... ..... ... ..... ....... %rs2 %rs1 | ||
111 | @@ -XXX,XX +XXX,XX @@ vsxh_v ... -11 . ..... ..... 101 ..... 0100111 @r_nfvm | ||
112 | vsxw_v ... -11 . ..... ..... 110 ..... 0100111 @r_nfvm | ||
113 | vsxe_v ... -11 . ..... ..... 111 ..... 0100111 @r_nfvm | ||
114 | |||
115 | +#*** Vector AMO operations are encoded under the standard AMO major opcode *** | ||
116 | +vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm | ||
117 | +vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm | ||
118 | +vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm | ||
119 | +vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm | ||
120 | +vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm | ||
121 | +vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm | ||
122 | +vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm | ||
123 | +vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm | ||
124 | +vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm | ||
125 | + | ||
126 | # *** new major opcode OP-V *** | ||
127 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
128 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
129 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
130 | index XXXXXXX..XXXXXXX 100644 | ||
131 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
132 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
133 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_TRANS(vleff_v, 3, r2nfvm, ldff_op, ld_us_check) | ||
134 | GEN_VEXT_TRANS(vlbuff_v, 4, r2nfvm, ldff_op, ld_us_check) | ||
135 | GEN_VEXT_TRANS(vlhuff_v, 5, r2nfvm, ldff_op, ld_us_check) | ||
136 | GEN_VEXT_TRANS(vlwuff_v, 6, r2nfvm, ldff_op, ld_us_check) | ||
137 | + | ||
138 | +/* | ||
139 | + *** vector atomic operation | ||
140 | + */ | ||
141 | +typedef void gen_helper_amo(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr, | ||
142 | + TCGv_env, TCGv_i32); | ||
143 | + | ||
144 | +static bool amo_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, | ||
145 | + uint32_t data, gen_helper_amo *fn, DisasContext *s) | ||
146 | +{ | ||
147 | + TCGv_ptr dest, mask, index; | ||
148 | + TCGv base; | ||
149 | + TCGv_i32 desc; | ||
150 | + | ||
151 | + TCGLabel *over = gen_new_label(); | ||
152 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
153 | + | ||
154 | + dest = tcg_temp_new_ptr(); | ||
155 | + mask = tcg_temp_new_ptr(); | ||
156 | + index = tcg_temp_new_ptr(); | ||
157 | + base = tcg_temp_new(); | ||
158 | + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); | ||
159 | + | ||
160 | + gen_get_gpr(base, rs1); | ||
161 | + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); | ||
162 | + tcg_gen_addi_ptr(index, cpu_env, vreg_ofs(s, vs2)); | ||
163 | + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); | ||
164 | + | ||
165 | + fn(dest, mask, base, index, cpu_env, desc); | ||
166 | + | ||
167 | + tcg_temp_free_ptr(dest); | ||
168 | + tcg_temp_free_ptr(mask); | ||
169 | + tcg_temp_free_ptr(index); | ||
170 | + tcg_temp_free(base); | ||
171 | + tcg_temp_free_i32(desc); | ||
172 | + gen_set_label(over); | ||
173 | + return true; | ||
174 | +} | ||
175 | + | ||
176 | +static bool amo_op(DisasContext *s, arg_rwdvm *a, uint8_t seq) | ||
177 | +{ | ||
178 | + uint32_t data = 0; | ||
179 | + gen_helper_amo *fn; | ||
180 | + static gen_helper_amo *const fnsw[9] = { | ||
181 | + /* no atomic operation */ | ||
182 | + gen_helper_vamoswapw_v_w, | ||
183 | + gen_helper_vamoaddw_v_w, | ||
184 | + gen_helper_vamoxorw_v_w, | ||
185 | + gen_helper_vamoandw_v_w, | ||
186 | + gen_helper_vamoorw_v_w, | ||
187 | + gen_helper_vamominw_v_w, | ||
188 | + gen_helper_vamomaxw_v_w, | ||
189 | + gen_helper_vamominuw_v_w, | ||
190 | + gen_helper_vamomaxuw_v_w | ||
191 | + }; | ||
192 | +#ifdef TARGET_RISCV64 | ||
193 | + static gen_helper_amo *const fnsd[18] = { | ||
194 | + gen_helper_vamoswapw_v_d, | ||
195 | + gen_helper_vamoaddw_v_d, | ||
196 | + gen_helper_vamoxorw_v_d, | ||
197 | + gen_helper_vamoandw_v_d, | ||
198 | + gen_helper_vamoorw_v_d, | ||
199 | + gen_helper_vamominw_v_d, | ||
200 | + gen_helper_vamomaxw_v_d, | ||
201 | + gen_helper_vamominuw_v_d, | ||
202 | + gen_helper_vamomaxuw_v_d, | ||
203 | + gen_helper_vamoswapd_v_d, | ||
204 | + gen_helper_vamoaddd_v_d, | ||
205 | + gen_helper_vamoxord_v_d, | ||
206 | + gen_helper_vamoandd_v_d, | ||
207 | + gen_helper_vamoord_v_d, | ||
208 | + gen_helper_vamomind_v_d, | ||
209 | + gen_helper_vamomaxd_v_d, | ||
210 | + gen_helper_vamominud_v_d, | ||
211 | + gen_helper_vamomaxud_v_d | ||
212 | + }; | ||
213 | +#endif | ||
214 | + | ||
215 | + if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
216 | + gen_helper_exit_atomic(cpu_env); | ||
217 | + s->base.is_jmp = DISAS_NORETURN; | ||
218 | + return true; | ||
219 | + } else { | ||
220 | + if (s->sew == 3) { | ||
221 | +#ifdef TARGET_RISCV64 | ||
222 | + fn = fnsd[seq]; | ||
223 | +#else | ||
224 | + /* Check done in amo_check(). */ | ||
225 | + g_assert_not_reached(); | ||
226 | +#endif | ||
227 | + } else { | ||
228 | + fn = fnsw[seq]; | ||
229 | + } | ||
230 | + } | 27 | + } |
231 | + | 28 | + |
232 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | 29 | + /* |
233 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | 30 | + * We can create the VAIA using the newer device control API. |
234 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | 31 | + */ |
235 | + data = FIELD_DP32(data, VDATA, WD, a->wd); | 32 | + return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL); |
236 | + return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s); | ||
237 | +} | ||
238 | +/* | ||
239 | + * There are two rules check here. | ||
240 | + * | ||
241 | + * 1. SEW must be at least as wide as the AMO memory element size. | ||
242 | + * | ||
243 | + * 2. If SEW is greater than XLEN, an illegal instruction exception is raised. | ||
244 | + */ | ||
245 | +static bool amo_check(DisasContext *s, arg_rwdvm* a) | ||
246 | +{ | ||
247 | + return (!s->vill && has_ext(s, RVA) && | ||
248 | + (!a->wd || vext_check_overlap_mask(s, a->rd, a->vm, false)) && | ||
249 | + vext_check_reg(s, a->rd, false) && | ||
250 | + vext_check_reg(s, a->rs2, false) && | ||
251 | + ((1 << s->sew) <= sizeof(target_ulong)) && | ||
252 | + ((1 << s->sew) >= 4)); | ||
253 | +} | ||
254 | + | ||
255 | +GEN_VEXT_TRANS(vamoswapw_v, 0, rwdvm, amo_op, amo_check) | ||
256 | +GEN_VEXT_TRANS(vamoaddw_v, 1, rwdvm, amo_op, amo_check) | ||
257 | +GEN_VEXT_TRANS(vamoxorw_v, 2, rwdvm, amo_op, amo_check) | ||
258 | +GEN_VEXT_TRANS(vamoandw_v, 3, rwdvm, amo_op, amo_check) | ||
259 | +GEN_VEXT_TRANS(vamoorw_v, 4, rwdvm, amo_op, amo_check) | ||
260 | +GEN_VEXT_TRANS(vamominw_v, 5, rwdvm, amo_op, amo_check) | ||
261 | +GEN_VEXT_TRANS(vamomaxw_v, 6, rwdvm, amo_op, amo_check) | ||
262 | +GEN_VEXT_TRANS(vamominuw_v, 7, rwdvm, amo_op, amo_check) | ||
263 | +GEN_VEXT_TRANS(vamomaxuw_v, 8, rwdvm, amo_op, amo_check) | ||
264 | +#ifdef TARGET_RISCV64 | ||
265 | +GEN_VEXT_TRANS(vamoswapd_v, 9, rwdvm, amo_op, amo_check) | ||
266 | +GEN_VEXT_TRANS(vamoaddd_v, 10, rwdvm, amo_op, amo_check) | ||
267 | +GEN_VEXT_TRANS(vamoxord_v, 11, rwdvm, amo_op, amo_check) | ||
268 | +GEN_VEXT_TRANS(vamoandd_v, 12, rwdvm, amo_op, amo_check) | ||
269 | +GEN_VEXT_TRANS(vamoord_v, 13, rwdvm, amo_op, amo_check) | ||
270 | +GEN_VEXT_TRANS(vamomind_v, 14, rwdvm, amo_op, amo_check) | ||
271 | +GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check) | ||
272 | +GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check) | ||
273 | +GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check) | ||
274 | +#endif | ||
275 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
276 | index XXXXXXX..XXXXXXX 100644 | ||
277 | --- a/target/riscv/vector_helper.c | ||
278 | +++ b/target/riscv/vector_helper.c | ||
279 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_lmul(uint32_t desc) | ||
280 | return FIELD_EX32(simd_data(desc), VDATA, LMUL); | ||
281 | } | 33 | } |
282 | 34 | ||
283 | +static uint32_t vext_wd(uint32_t desc) | 35 | int kvm_arch_process_async_events(CPUState *cs) |
284 | +{ | ||
285 | + return (simd_data(desc) >> 11) & 0x1; | ||
286 | +} | ||
287 | + | ||
288 | /* | ||
289 | * Get vector group length in bytes. Its range is [64, 2048]. | ||
290 | * | ||
291 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl) | ||
292 | GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq) | ||
293 | GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl) | ||
294 | GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq) | ||
295 | + | ||
296 | +/* | ||
297 | + *** Vector AMO Operations (Zvamo) | ||
298 | + */ | ||
299 | +typedef void vext_amo_noatomic_fn(void *vs3, target_ulong addr, | ||
300 | + uint32_t wd, uint32_t idx, CPURISCVState *env, | ||
301 | + uintptr_t retaddr); | ||
302 | + | ||
303 | +/* no atomic opreation for vector atomic insructions */ | ||
304 | +#define DO_SWAP(N, M) (M) | ||
305 | +#define DO_AND(N, M) (N & M) | ||
306 | +#define DO_XOR(N, M) (N ^ M) | ||
307 | +#define DO_OR(N, M) (N | M) | ||
308 | +#define DO_ADD(N, M) (N + M) | ||
309 | + | ||
310 | +#define GEN_VEXT_AMO_NOATOMIC_OP(NAME, ESZ, MSZ, H, DO_OP, SUF) \ | ||
311 | +static void \ | ||
312 | +vext_##NAME##_noatomic_op(void *vs3, target_ulong addr, \ | ||
313 | + uint32_t wd, uint32_t idx, \ | ||
314 | + CPURISCVState *env, uintptr_t retaddr)\ | ||
315 | +{ \ | ||
316 | + typedef int##ESZ##_t ETYPE; \ | ||
317 | + typedef int##MSZ##_t MTYPE; \ | ||
318 | + typedef uint##MSZ##_t UMTYPE __attribute__((unused)); \ | ||
319 | + ETYPE *pe3 = (ETYPE *)vs3 + H(idx); \ | ||
320 | + MTYPE a = cpu_ld##SUF##_data(env, addr), b = *pe3; \ | ||
321 | + \ | ||
322 | + cpu_st##SUF##_data(env, addr, DO_OP(a, b)); \ | ||
323 | + if (wd) { \ | ||
324 | + *pe3 = a; \ | ||
325 | + } \ | ||
326 | +} | ||
327 | + | ||
328 | +/* Signed min/max */ | ||
329 | +#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) | ||
330 | +#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) | ||
331 | + | ||
332 | +/* Unsigned min/max */ | ||
333 | +#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) | ||
334 | +#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) | ||
335 | + | ||
336 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_w, 32, 32, H4, DO_SWAP, l) | ||
337 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_w, 32, 32, H4, DO_ADD, l) | ||
338 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_w, 32, 32, H4, DO_XOR, l) | ||
339 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_w, 32, 32, H4, DO_AND, l) | ||
340 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_w, 32, 32, H4, DO_OR, l) | ||
341 | +GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_w, 32, 32, H4, DO_MIN, l) | ||
342 | +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_w, 32, 32, H4, DO_MAX, l) | ||
343 | +GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_w, 32, 32, H4, DO_MINU, l) | ||
344 | +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_w, 32, 32, H4, DO_MAXU, l) | ||
345 | +#ifdef TARGET_RISCV64 | ||
346 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_d, 64, 32, H8, DO_SWAP, l) | ||
347 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoswapd_v_d, 64, 64, H8, DO_SWAP, q) | ||
348 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_d, 64, 32, H8, DO_ADD, l) | ||
349 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoaddd_v_d, 64, 64, H8, DO_ADD, q) | ||
350 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_d, 64, 32, H8, DO_XOR, l) | ||
351 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoxord_v_d, 64, 64, H8, DO_XOR, q) | ||
352 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_d, 64, 32, H8, DO_AND, l) | ||
353 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoandd_v_d, 64, 64, H8, DO_AND, q) | ||
354 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_d, 64, 32, H8, DO_OR, l) | ||
355 | +GEN_VEXT_AMO_NOATOMIC_OP(vamoord_v_d, 64, 64, H8, DO_OR, q) | ||
356 | +GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_d, 64, 32, H8, DO_MIN, l) | ||
357 | +GEN_VEXT_AMO_NOATOMIC_OP(vamomind_v_d, 64, 64, H8, DO_MIN, q) | ||
358 | +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_d, 64, 32, H8, DO_MAX, l) | ||
359 | +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxd_v_d, 64, 64, H8, DO_MAX, q) | ||
360 | +GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_d, 64, 32, H8, DO_MINU, l) | ||
361 | +GEN_VEXT_AMO_NOATOMIC_OP(vamominud_v_d, 64, 64, H8, DO_MINU, q) | ||
362 | +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_d, 64, 32, H8, DO_MAXU, l) | ||
363 | +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxud_v_d, 64, 64, H8, DO_MAXU, q) | ||
364 | +#endif | ||
365 | + | ||
366 | +static inline void | ||
367 | +vext_amo_noatomic(void *vs3, void *v0, target_ulong base, | ||
368 | + void *vs2, CPURISCVState *env, uint32_t desc, | ||
369 | + vext_get_index_addr get_index_addr, | ||
370 | + vext_amo_noatomic_fn *noatomic_op, | ||
371 | + clear_fn *clear_elem, | ||
372 | + uint32_t esz, uint32_t msz, uintptr_t ra) | ||
373 | +{ | ||
374 | + uint32_t i; | ||
375 | + target_long addr; | ||
376 | + uint32_t wd = vext_wd(desc); | ||
377 | + uint32_t vm = vext_vm(desc); | ||
378 | + uint32_t mlen = vext_mlen(desc); | ||
379 | + uint32_t vlmax = vext_maxsz(desc) / esz; | ||
380 | + | ||
381 | + for (i = 0; i < env->vl; i++) { | ||
382 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | ||
383 | + continue; | ||
384 | + } | ||
385 | + probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_LOAD); | ||
386 | + probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_STORE); | ||
387 | + } | ||
388 | + for (i = 0; i < env->vl; i++) { | ||
389 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | ||
390 | + continue; | ||
391 | + } | ||
392 | + addr = get_index_addr(base, i, vs2); | ||
393 | + noatomic_op(vs3, addr, wd, i, env, ra); | ||
394 | + } | ||
395 | + clear_elem(vs3, env->vl, env->vl * esz, vlmax * esz); | ||
396 | +} | ||
397 | + | ||
398 | +#define GEN_VEXT_AMO(NAME, MTYPE, ETYPE, INDEX_FN, CLEAR_FN) \ | ||
399 | +void HELPER(NAME)(void *vs3, void *v0, target_ulong base, \ | ||
400 | + void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
401 | +{ \ | ||
402 | + vext_amo_noatomic(vs3, v0, base, vs2, env, desc, \ | ||
403 | + INDEX_FN, vext_##NAME##_noatomic_op, \ | ||
404 | + CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ | ||
405 | + GETPC()); \ | ||
406 | +} | ||
407 | + | ||
408 | +#ifdef TARGET_RISCV64 | ||
409 | +GEN_VEXT_AMO(vamoswapw_v_d, int32_t, int64_t, idx_d, clearq) | ||
410 | +GEN_VEXT_AMO(vamoswapd_v_d, int64_t, int64_t, idx_d, clearq) | ||
411 | +GEN_VEXT_AMO(vamoaddw_v_d, int32_t, int64_t, idx_d, clearq) | ||
412 | +GEN_VEXT_AMO(vamoaddd_v_d, int64_t, int64_t, idx_d, clearq) | ||
413 | +GEN_VEXT_AMO(vamoxorw_v_d, int32_t, int64_t, idx_d, clearq) | ||
414 | +GEN_VEXT_AMO(vamoxord_v_d, int64_t, int64_t, idx_d, clearq) | ||
415 | +GEN_VEXT_AMO(vamoandw_v_d, int32_t, int64_t, idx_d, clearq) | ||
416 | +GEN_VEXT_AMO(vamoandd_v_d, int64_t, int64_t, idx_d, clearq) | ||
417 | +GEN_VEXT_AMO(vamoorw_v_d, int32_t, int64_t, idx_d, clearq) | ||
418 | +GEN_VEXT_AMO(vamoord_v_d, int64_t, int64_t, idx_d, clearq) | ||
419 | +GEN_VEXT_AMO(vamominw_v_d, int32_t, int64_t, idx_d, clearq) | ||
420 | +GEN_VEXT_AMO(vamomind_v_d, int64_t, int64_t, idx_d, clearq) | ||
421 | +GEN_VEXT_AMO(vamomaxw_v_d, int32_t, int64_t, idx_d, clearq) | ||
422 | +GEN_VEXT_AMO(vamomaxd_v_d, int64_t, int64_t, idx_d, clearq) | ||
423 | +GEN_VEXT_AMO(vamominuw_v_d, uint32_t, uint64_t, idx_d, clearq) | ||
424 | +GEN_VEXT_AMO(vamominud_v_d, uint64_t, uint64_t, idx_d, clearq) | ||
425 | +GEN_VEXT_AMO(vamomaxuw_v_d, uint32_t, uint64_t, idx_d, clearq) | ||
426 | +GEN_VEXT_AMO(vamomaxud_v_d, uint64_t, uint64_t, idx_d, clearq) | ||
427 | +#endif | ||
428 | +GEN_VEXT_AMO(vamoswapw_v_w, int32_t, int32_t, idx_w, clearl) | ||
429 | +GEN_VEXT_AMO(vamoaddw_v_w, int32_t, int32_t, idx_w, clearl) | ||
430 | +GEN_VEXT_AMO(vamoxorw_v_w, int32_t, int32_t, idx_w, clearl) | ||
431 | +GEN_VEXT_AMO(vamoandw_v_w, int32_t, int32_t, idx_w, clearl) | ||
432 | +GEN_VEXT_AMO(vamoorw_v_w, int32_t, int32_t, idx_w, clearl) | ||
433 | +GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl) | ||
434 | +GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl) | ||
435 | +GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl) | ||
436 | +GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) | ||
437 | -- | 36 | -- |
438 | 2.27.0 | 37 | 2.41.0 |
439 | |||
440 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | We create a vAIA chip by using the KVM_DEV_TYPE_RISCV_AIA and then set up |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | the chip with the KVM_DEV_RISCV_AIA_GRP_* APIs. |
5 | Message-id: 20200623215920.2594-51-zhiwei_liu@c-sky.com | 5 | We also extend KVM accelerator to specify the KVM AIA mode. The "riscv-aia" |
6 | parameter is passed along with --accel in QEMU command-line. | ||
7 | 1) "riscv-aia=emul": IMSIC is emulated by hypervisor | ||
8 | 2) "riscv-aia=hwaccel": use hardware guest IMSIC | ||
9 | 3) "riscv-aia=auto": use the hardware guest IMSICs whenever available | ||
10 | otherwise we fallback to software emulation. | ||
11 | |||
12 | Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com> | ||
13 | Reviewed-by: Jim Shu <jim.shu@sifive.com> | ||
14 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
15 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> | ||
16 | Message-ID: <20230727102439.22554-4-yongxuan.wang@sifive.com> | ||
6 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 17 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
7 | --- | 18 | --- |
8 | target/riscv/helper.h | 2 ++ | 19 | target/riscv/kvm_riscv.h | 4 + |
9 | target/riscv/insn32.decode | 1 + | 20 | target/riscv/kvm.c | 186 +++++++++++++++++++++++++++++++++++++++ |
10 | target/riscv/insn_trans/trans_rvv.inc.c | 32 +++++++++++++++++++++++++ | 21 | 2 files changed, 190 insertions(+) |
11 | target/riscv/vector_helper.c | 20 ++++++++++++++++ | ||
12 | 4 files changed, 55 insertions(+) | ||
13 | 22 | ||
14 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 23 | diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h |
15 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/riscv/helper.h | 25 | --- a/target/riscv/kvm_riscv.h |
17 | +++ b/target/riscv/helper.h | 26 | +++ b/target/riscv/kvm_riscv.h |
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmor_mm, void, ptr, ptr, ptr, ptr, env, i32) | 27 | @@ -XXX,XX +XXX,XX @@ |
19 | DEF_HELPER_6(vmnor_mm, void, ptr, ptr, ptr, ptr, env, i32) | 28 | void kvm_riscv_init_user_properties(Object *cpu_obj); |
20 | DEF_HELPER_6(vmornot_mm, void, ptr, ptr, ptr, ptr, env, i32) | 29 | void kvm_riscv_reset_vcpu(RISCVCPU *cpu); |
21 | DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32) | 30 | void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level); |
22 | + | 31 | +void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, |
23 | +DEF_HELPER_4(vmpopc_m, tl, ptr, ptr, env, i32) | 32 | + uint64_t aia_irq_num, uint64_t aia_msi_num, |
24 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 33 | + uint64_t aplic_base, uint64_t imsic_base, |
34 | + uint64_t guest_num); | ||
35 | |||
36 | #endif | ||
37 | diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c | ||
25 | index XXXXXXX..XXXXXXX 100644 | 38 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/target/riscv/insn32.decode | 39 | --- a/target/riscv/kvm.c |
27 | +++ b/target/riscv/insn32.decode | 40 | +++ b/target/riscv/kvm.c |
28 | @@ -XXX,XX +XXX,XX @@ vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r | 41 | @@ -XXX,XX +XXX,XX @@ |
29 | vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r | 42 | #include "exec/address-spaces.h" |
30 | vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r | 43 | #include "hw/boards.h" |
31 | vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r | 44 | #include "hw/irq.h" |
32 | +vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm | 45 | +#include "hw/intc/riscv_imsic.h" |
33 | 46 | #include "qemu/log.h" | |
34 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | 47 | #include "hw/loader.h" |
35 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | 48 | #include "kvm_riscv.h" |
36 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | 49 | @@ -XXX,XX +XXX,XX @@ |
37 | index XXXXXXX..XXXXXXX 100644 | 50 | #include "chardev/char-fe.h" |
38 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 51 | #include "migration/migration.h" |
39 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 52 | #include "sysemu/runstate.h" |
40 | @@ -XXX,XX +XXX,XX @@ GEN_MM_TRANS(vmor_mm) | 53 | +#include "hw/riscv/numa.h" |
41 | GEN_MM_TRANS(vmnor_mm) | 54 | |
42 | GEN_MM_TRANS(vmornot_mm) | 55 | static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, |
43 | GEN_MM_TRANS(vmxnor_mm) | 56 | uint64_t idx) |
44 | + | 57 | @@ -XXX,XX +XXX,XX @@ bool kvm_arch_cpu_check_are_resettable(void) |
45 | +/* Vector mask population count vmpopc */ | 58 | return true; |
46 | +static bool trans_vmpopc_m(DisasContext *s, arg_rmr *a) | 59 | } |
47 | +{ | 60 | |
48 | + if (vext_check_isa_ill(s)) { | 61 | +static int aia_mode; |
49 | + TCGv_ptr src2, mask; | 62 | + |
50 | + TCGv dst; | 63 | +static const char *kvm_aia_mode_str(uint64_t mode) |
51 | + TCGv_i32 desc; | 64 | +{ |
52 | + uint32_t data = 0; | 65 | + switch (mode) { |
53 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | 66 | + case KVM_DEV_RISCV_AIA_MODE_EMUL: |
54 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | 67 | + return "emul"; |
55 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | 68 | + case KVM_DEV_RISCV_AIA_MODE_HWACCEL: |
56 | + | 69 | + return "hwaccel"; |
57 | + mask = tcg_temp_new_ptr(); | 70 | + case KVM_DEV_RISCV_AIA_MODE_AUTO: |
58 | + src2 = tcg_temp_new_ptr(); | 71 | + default: |
59 | + dst = tcg_temp_new(); | 72 | + return "auto"; |
60 | + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); | 73 | + }; |
61 | + | 74 | +} |
62 | + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2)); | 75 | + |
63 | + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); | 76 | +static char *riscv_get_kvm_aia(Object *obj, Error **errp) |
64 | + | 77 | +{ |
65 | + gen_helper_vmpopc_m(dst, mask, src2, cpu_env, desc); | 78 | + return g_strdup(kvm_aia_mode_str(aia_mode)); |
66 | + gen_set_gpr(a->rd, dst); | 79 | +} |
67 | + | 80 | + |
68 | + tcg_temp_free_ptr(mask); | 81 | +static void riscv_set_kvm_aia(Object *obj, const char *val, Error **errp) |
69 | + tcg_temp_free_ptr(src2); | 82 | +{ |
70 | + tcg_temp_free(dst); | 83 | + if (!strcmp(val, "emul")) { |
71 | + tcg_temp_free_i32(desc); | 84 | + aia_mode = KVM_DEV_RISCV_AIA_MODE_EMUL; |
72 | + return true; | 85 | + } else if (!strcmp(val, "hwaccel")) { |
73 | + } | 86 | + aia_mode = KVM_DEV_RISCV_AIA_MODE_HWACCEL; |
74 | + return false; | 87 | + } else if (!strcmp(val, "auto")) { |
75 | +} | 88 | + aia_mode = KVM_DEV_RISCV_AIA_MODE_AUTO; |
76 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 89 | + } else { |
77 | index XXXXXXX..XXXXXXX 100644 | 90 | + error_setg(errp, "Invalid KVM AIA mode"); |
78 | --- a/target/riscv/vector_helper.c | 91 | + error_append_hint(errp, "Valid values are emul, hwaccel, and auto.\n"); |
79 | +++ b/target/riscv/vector_helper.c | 92 | + } |
80 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_MASK_VV(vmor_mm, DO_OR) | 93 | +} |
81 | GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) | 94 | + |
82 | GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) | 95 | void kvm_arch_accel_class_init(ObjectClass *oc) |
83 | GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) | 96 | { |
84 | + | 97 | + object_class_property_add_str(oc, "riscv-aia", riscv_get_kvm_aia, |
85 | +/* Vector mask population count vmpopc */ | 98 | + riscv_set_kvm_aia); |
86 | +target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env, | 99 | + object_class_property_set_description(oc, "riscv-aia", |
87 | + uint32_t desc) | 100 | + "Set KVM AIA mode. Valid values are " |
88 | +{ | 101 | + "emul, hwaccel, and auto. Default " |
89 | + target_ulong cnt = 0; | 102 | + "is auto."); |
90 | + uint32_t mlen = vext_mlen(desc); | 103 | + object_property_set_default_str(object_class_property_find(oc, "riscv-aia"), |
91 | + uint32_t vm = vext_vm(desc); | 104 | + "auto"); |
92 | + uint32_t vl = env->vl; | 105 | +} |
93 | + int i; | 106 | + |
94 | + | 107 | +void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, |
95 | + for (i = 0; i < vl; i++) { | 108 | + uint64_t aia_irq_num, uint64_t aia_msi_num, |
96 | + if (vm || vext_elem_mask(v0, mlen, i)) { | 109 | + uint64_t aplic_base, uint64_t imsic_base, |
97 | + if (vext_elem_mask(vs2, mlen, i)) { | 110 | + uint64_t guest_num) |
98 | + cnt++; | 111 | +{ |
112 | + int ret, i; | ||
113 | + int aia_fd = -1; | ||
114 | + uint64_t default_aia_mode; | ||
115 | + uint64_t socket_count = riscv_socket_count(machine); | ||
116 | + uint64_t max_hart_per_socket = 0; | ||
117 | + uint64_t socket, base_hart, hart_count, socket_imsic_base, imsic_addr; | ||
118 | + uint64_t socket_bits, hart_bits, guest_bits; | ||
119 | + | ||
120 | + aia_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_RISCV_AIA, false); | ||
121 | + | ||
122 | + if (aia_fd < 0) { | ||
123 | + error_report("Unable to create in-kernel irqchip"); | ||
124 | + exit(1); | ||
125 | + } | ||
126 | + | ||
127 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
128 | + KVM_DEV_RISCV_AIA_CONFIG_MODE, | ||
129 | + &default_aia_mode, false, NULL); | ||
130 | + if (ret < 0) { | ||
131 | + error_report("KVM AIA: failed to get current KVM AIA mode"); | ||
132 | + exit(1); | ||
133 | + } | ||
134 | + qemu_log("KVM AIA: default mode is %s\n", | ||
135 | + kvm_aia_mode_str(default_aia_mode)); | ||
136 | + | ||
137 | + if (default_aia_mode != aia_mode) { | ||
138 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
139 | + KVM_DEV_RISCV_AIA_CONFIG_MODE, | ||
140 | + &aia_mode, true, NULL); | ||
141 | + if (ret < 0) | ||
142 | + warn_report("KVM AIA: failed to set KVM AIA mode"); | ||
143 | + else | ||
144 | + qemu_log("KVM AIA: set current mode to %s\n", | ||
145 | + kvm_aia_mode_str(aia_mode)); | ||
146 | + } | ||
147 | + | ||
148 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
149 | + KVM_DEV_RISCV_AIA_CONFIG_SRCS, | ||
150 | + &aia_irq_num, true, NULL); | ||
151 | + if (ret < 0) { | ||
152 | + error_report("KVM AIA: failed to set number of input irq lines"); | ||
153 | + exit(1); | ||
154 | + } | ||
155 | + | ||
156 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
157 | + KVM_DEV_RISCV_AIA_CONFIG_IDS, | ||
158 | + &aia_msi_num, true, NULL); | ||
159 | + if (ret < 0) { | ||
160 | + error_report("KVM AIA: failed to set number of msi"); | ||
161 | + exit(1); | ||
162 | + } | ||
163 | + | ||
164 | + socket_bits = find_last_bit(&socket_count, BITS_PER_LONG) + 1; | ||
165 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
166 | + KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS, | ||
167 | + &socket_bits, true, NULL); | ||
168 | + if (ret < 0) { | ||
169 | + error_report("KVM AIA: failed to set group_bits"); | ||
170 | + exit(1); | ||
171 | + } | ||
172 | + | ||
173 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
174 | + KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT, | ||
175 | + &group_shift, true, NULL); | ||
176 | + if (ret < 0) { | ||
177 | + error_report("KVM AIA: failed to set group_shift"); | ||
178 | + exit(1); | ||
179 | + } | ||
180 | + | ||
181 | + guest_bits = guest_num == 0 ? 0 : | ||
182 | + find_last_bit(&guest_num, BITS_PER_LONG) + 1; | ||
183 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
184 | + KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS, | ||
185 | + &guest_bits, true, NULL); | ||
186 | + if (ret < 0) { | ||
187 | + error_report("KVM AIA: failed to set guest_bits"); | ||
188 | + exit(1); | ||
189 | + } | ||
190 | + | ||
191 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR, | ||
192 | + KVM_DEV_RISCV_AIA_ADDR_APLIC, | ||
193 | + &aplic_base, true, NULL); | ||
194 | + if (ret < 0) { | ||
195 | + error_report("KVM AIA: failed to set the base address of APLIC"); | ||
196 | + exit(1); | ||
197 | + } | ||
198 | + | ||
199 | + for (socket = 0; socket < socket_count; socket++) { | ||
200 | + socket_imsic_base = imsic_base + socket * (1U << group_shift); | ||
201 | + hart_count = riscv_socket_hart_count(machine, socket); | ||
202 | + base_hart = riscv_socket_first_hartid(machine, socket); | ||
203 | + | ||
204 | + if (max_hart_per_socket < hart_count) { | ||
205 | + max_hart_per_socket = hart_count; | ||
206 | + } | ||
207 | + | ||
208 | + for (i = 0; i < hart_count; i++) { | ||
209 | + imsic_addr = socket_imsic_base + i * IMSIC_HART_SIZE(guest_bits); | ||
210 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR, | ||
211 | + KVM_DEV_RISCV_AIA_ADDR_IMSIC(i + base_hart), | ||
212 | + &imsic_addr, true, NULL); | ||
213 | + if (ret < 0) { | ||
214 | + error_report("KVM AIA: failed to set the IMSIC address for hart %d", i); | ||
215 | + exit(1); | ||
99 | + } | 216 | + } |
100 | + } | 217 | + } |
101 | + } | 218 | + } |
102 | + return cnt; | 219 | + |
103 | +} | 220 | + hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1; |
221 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, | ||
222 | + KVM_DEV_RISCV_AIA_CONFIG_HART_BITS, | ||
223 | + &hart_bits, true, NULL); | ||
224 | + if (ret < 0) { | ||
225 | + error_report("KVM AIA: failed to set hart_bits"); | ||
226 | + exit(1); | ||
227 | + } | ||
228 | + | ||
229 | + if (kvm_has_gsi_routing()) { | ||
230 | + for (uint64_t idx = 0; idx < aia_irq_num + 1; ++idx) { | ||
231 | + /* KVM AIA only has one APLIC instance */ | ||
232 | + kvm_irqchip_add_irq_route(kvm_state, idx, 0, idx); | ||
233 | + } | ||
234 | + kvm_gsi_routing_allowed = true; | ||
235 | + kvm_irqchip_commit_routes(kvm_state); | ||
236 | + } | ||
237 | + | ||
238 | + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CTRL, | ||
239 | + KVM_DEV_RISCV_AIA_CTRL_INIT, | ||
240 | + NULL, true, NULL); | ||
241 | + if (ret < 0) { | ||
242 | + error_report("KVM AIA: initialized fail"); | ||
243 | + exit(1); | ||
244 | + } | ||
245 | + | ||
246 | + kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled(); | ||
247 | } | ||
104 | -- | 248 | -- |
105 | 2.27.0 | 249 | 2.41.0 |
106 | |||
107 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | The unit-stride fault-only-fault load instructions are used to | 3 | KVM AIA can't emulate APLIC only. When "aia=aplic" parameter is passed, |
4 | vectorize loops with data-dependent exit conditions(while loops). | 4 | APLIC devices is emulated by QEMU. For "aia=aplic-imsic", remove the |
5 | These instructions execute as a regular load except that they | 5 | mmio operations of APLIC when using KVM AIA and send wired interrupt |
6 | will only take a trap on element 0. | 6 | signal via KVM_IRQ_LINE API. |
7 | After KVM AIA enabled, MSI messages are delivered by KVM_SIGNAL_MSI API | ||
8 | when the IMSICs receive mmio write requests. | ||
7 | 9 | ||
8 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 10 | Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
9 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Reviewed-by: Jim Shu <jim.shu@sifive.com> |
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 12 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
11 | Message-id: 20200623215920.2594-9-zhiwei_liu@c-sky.com | 13 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> |
14 | Message-ID: <20230727102439.22554-5-yongxuan.wang@sifive.com> | ||
12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 15 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
13 | --- | 16 | --- |
14 | target/riscv/helper.h | 22 +++++ | 17 | hw/intc/riscv_aplic.c | 56 ++++++++++++++++++++++++++++++------------- |
15 | target/riscv/insn32.decode | 7 ++ | 18 | hw/intc/riscv_imsic.c | 25 +++++++++++++++---- |
16 | target/riscv/insn_trans/trans_rvv.inc.c | 73 ++++++++++++++++ | 19 | 2 files changed, 61 insertions(+), 20 deletions(-) |
17 | target/riscv/vector_helper.c | 110 ++++++++++++++++++++++++ | ||
18 | 4 files changed, 212 insertions(+) | ||
19 | 20 | ||
20 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 21 | diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c |
21 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/target/riscv/helper.h | 23 | --- a/hw/intc/riscv_aplic.c |
23 | +++ b/target/riscv/helper.h | 24 | +++ b/hw/intc/riscv_aplic.c |
24 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vsxe_v_b, void, ptr, ptr, tl, ptr, env, i32) | 25 | @@ -XXX,XX +XXX,XX @@ |
25 | DEF_HELPER_6(vsxe_v_h, void, ptr, ptr, tl, ptr, env, i32) | 26 | #include "hw/irq.h" |
26 | DEF_HELPER_6(vsxe_v_w, void, ptr, ptr, tl, ptr, env, i32) | 27 | #include "target/riscv/cpu.h" |
27 | DEF_HELPER_6(vsxe_v_d, void, ptr, ptr, tl, ptr, env, i32) | 28 | #include "sysemu/sysemu.h" |
28 | +DEF_HELPER_5(vlbff_v_b, void, ptr, ptr, tl, env, i32) | 29 | +#include "sysemu/kvm.h" |
29 | +DEF_HELPER_5(vlbff_v_h, void, ptr, ptr, tl, env, i32) | 30 | #include "migration/vmstate.h" |
30 | +DEF_HELPER_5(vlbff_v_w, void, ptr, ptr, tl, env, i32) | 31 | |
31 | +DEF_HELPER_5(vlbff_v_d, void, ptr, ptr, tl, env, i32) | 32 | #define APLIC_MAX_IDC (1UL << 14) |
32 | +DEF_HELPER_5(vlhff_v_h, void, ptr, ptr, tl, env, i32) | 33 | @@ -XXX,XX +XXX,XX @@ |
33 | +DEF_HELPER_5(vlhff_v_w, void, ptr, ptr, tl, env, i32) | 34 | |
34 | +DEF_HELPER_5(vlhff_v_d, void, ptr, ptr, tl, env, i32) | 35 | #define APLIC_IDC_CLAIMI 0x1c |
35 | +DEF_HELPER_5(vlwff_v_w, void, ptr, ptr, tl, env, i32) | 36 | |
36 | +DEF_HELPER_5(vlwff_v_d, void, ptr, ptr, tl, env, i32) | ||
37 | +DEF_HELPER_5(vleff_v_b, void, ptr, ptr, tl, env, i32) | ||
38 | +DEF_HELPER_5(vleff_v_h, void, ptr, ptr, tl, env, i32) | ||
39 | +DEF_HELPER_5(vleff_v_w, void, ptr, ptr, tl, env, i32) | ||
40 | +DEF_HELPER_5(vleff_v_d, void, ptr, ptr, tl, env, i32) | ||
41 | +DEF_HELPER_5(vlbuff_v_b, void, ptr, ptr, tl, env, i32) | ||
42 | +DEF_HELPER_5(vlbuff_v_h, void, ptr, ptr, tl, env, i32) | ||
43 | +DEF_HELPER_5(vlbuff_v_w, void, ptr, ptr, tl, env, i32) | ||
44 | +DEF_HELPER_5(vlbuff_v_d, void, ptr, ptr, tl, env, i32) | ||
45 | +DEF_HELPER_5(vlhuff_v_h, void, ptr, ptr, tl, env, i32) | ||
46 | +DEF_HELPER_5(vlhuff_v_w, void, ptr, ptr, tl, env, i32) | ||
47 | +DEF_HELPER_5(vlhuff_v_d, void, ptr, ptr, tl, env, i32) | ||
48 | +DEF_HELPER_5(vlwuff_v_w, void, ptr, ptr, tl, env, i32) | ||
49 | +DEF_HELPER_5(vlwuff_v_d, void, ptr, ptr, tl, env, i32) | ||
50 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/target/riscv/insn32.decode | ||
53 | +++ b/target/riscv/insn32.decode | ||
54 | @@ -XXX,XX +XXX,XX @@ vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm | ||
55 | vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm | ||
56 | vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm | ||
57 | vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm | ||
58 | +vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm | ||
59 | +vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm | ||
60 | +vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm | ||
61 | +vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm | ||
62 | +vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm | ||
63 | +vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm | ||
64 | +vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm | ||
65 | vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm | ||
66 | vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm | ||
67 | vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm | ||
68 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
71 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
72 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_TRANS(vsxb_v, 0, rnfvm, st_index_op, st_index_check) | ||
73 | GEN_VEXT_TRANS(vsxh_v, 1, rnfvm, st_index_op, st_index_check) | ||
74 | GEN_VEXT_TRANS(vsxw_v, 2, rnfvm, st_index_op, st_index_check) | ||
75 | GEN_VEXT_TRANS(vsxe_v, 3, rnfvm, st_index_op, st_index_check) | ||
76 | + | ||
77 | +/* | 37 | +/* |
78 | + *** unit stride fault-only-first load | 38 | + * KVM AIA only supports APLIC MSI, fallback to QEMU emulation if we want to use |
39 | + * APLIC Wired. | ||
79 | + */ | 40 | + */ |
80 | +static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, | 41 | +static bool is_kvm_aia(bool msimode) |
81 | + gen_helper_ldst_us *fn, DisasContext *s) | ||
82 | +{ | 42 | +{ |
83 | + TCGv_ptr dest, mask; | 43 | + return kvm_irqchip_in_kernel() && msimode; |
84 | + TCGv base; | ||
85 | + TCGv_i32 desc; | ||
86 | + | ||
87 | + TCGLabel *over = gen_new_label(); | ||
88 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
89 | + | ||
90 | + dest = tcg_temp_new_ptr(); | ||
91 | + mask = tcg_temp_new_ptr(); | ||
92 | + base = tcg_temp_new(); | ||
93 | + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); | ||
94 | + | ||
95 | + gen_get_gpr(base, rs1); | ||
96 | + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); | ||
97 | + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); | ||
98 | + | ||
99 | + fn(dest, mask, base, cpu_env, desc); | ||
100 | + | ||
101 | + tcg_temp_free_ptr(dest); | ||
102 | + tcg_temp_free_ptr(mask); | ||
103 | + tcg_temp_free(base); | ||
104 | + tcg_temp_free_i32(desc); | ||
105 | + gen_set_label(over); | ||
106 | + return true; | ||
107 | +} | 44 | +} |
108 | + | 45 | + |
109 | +static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq) | 46 | static uint32_t riscv_aplic_read_input_word(RISCVAPLICState *aplic, |
47 | uint32_t word) | ||
48 | { | ||
49 | @@ -XXX,XX +XXX,XX @@ static uint32_t riscv_aplic_idc_claimi(RISCVAPLICState *aplic, uint32_t idc) | ||
50 | return topi; | ||
51 | } | ||
52 | |||
53 | +static void riscv_kvm_aplic_request(void *opaque, int irq, int level) | ||
110 | +{ | 54 | +{ |
111 | + uint32_t data = 0; | 55 | + kvm_set_irq(kvm_state, irq, !!level); |
112 | + gen_helper_ldst_us *fn; | ||
113 | + static gen_helper_ldst_us * const fns[7][4] = { | ||
114 | + { gen_helper_vlbff_v_b, gen_helper_vlbff_v_h, | ||
115 | + gen_helper_vlbff_v_w, gen_helper_vlbff_v_d }, | ||
116 | + { NULL, gen_helper_vlhff_v_h, | ||
117 | + gen_helper_vlhff_v_w, gen_helper_vlhff_v_d }, | ||
118 | + { NULL, NULL, | ||
119 | + gen_helper_vlwff_v_w, gen_helper_vlwff_v_d }, | ||
120 | + { gen_helper_vleff_v_b, gen_helper_vleff_v_h, | ||
121 | + gen_helper_vleff_v_w, gen_helper_vleff_v_d }, | ||
122 | + { gen_helper_vlbuff_v_b, gen_helper_vlbuff_v_h, | ||
123 | + gen_helper_vlbuff_v_w, gen_helper_vlbuff_v_d }, | ||
124 | + { NULL, gen_helper_vlhuff_v_h, | ||
125 | + gen_helper_vlhuff_v_w, gen_helper_vlhuff_v_d }, | ||
126 | + { NULL, NULL, | ||
127 | + gen_helper_vlwuff_v_w, gen_helper_vlwuff_v_d } | ||
128 | + }; | ||
129 | + | ||
130 | + fn = fns[seq][s->sew]; | ||
131 | + if (fn == NULL) { | ||
132 | + return false; | ||
133 | + } | ||
134 | + | ||
135 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | ||
136 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
137 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
138 | + data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
139 | + return ldff_trans(a->rd, a->rs1, data, fn, s); | ||
140 | +} | 56 | +} |
141 | + | 57 | + |
142 | +GEN_VEXT_TRANS(vlbff_v, 0, r2nfvm, ldff_op, ld_us_check) | 58 | static void riscv_aplic_request(void *opaque, int irq, int level) |
143 | +GEN_VEXT_TRANS(vlhff_v, 1, r2nfvm, ldff_op, ld_us_check) | 59 | { |
144 | +GEN_VEXT_TRANS(vlwff_v, 2, r2nfvm, ldff_op, ld_us_check) | 60 | bool update = false; |
145 | +GEN_VEXT_TRANS(vleff_v, 3, r2nfvm, ldff_op, ld_us_check) | 61 | @@ -XXX,XX +XXX,XX @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp) |
146 | +GEN_VEXT_TRANS(vlbuff_v, 4, r2nfvm, ldff_op, ld_us_check) | 62 | uint32_t i; |
147 | +GEN_VEXT_TRANS(vlhuff_v, 5, r2nfvm, ldff_op, ld_us_check) | 63 | RISCVAPLICState *aplic = RISCV_APLIC(dev); |
148 | +GEN_VEXT_TRANS(vlwuff_v, 6, r2nfvm, ldff_op, ld_us_check) | 64 | |
149 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | 65 | - aplic->bitfield_words = (aplic->num_irqs + 31) >> 5; |
66 | - aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs); | ||
67 | - aplic->state = g_new0(uint32_t, aplic->num_irqs); | ||
68 | - aplic->target = g_new0(uint32_t, aplic->num_irqs); | ||
69 | - if (!aplic->msimode) { | ||
70 | - for (i = 0; i < aplic->num_irqs; i++) { | ||
71 | - aplic->target[i] = 1; | ||
72 | + if (!is_kvm_aia(aplic->msimode)) { | ||
73 | + aplic->bitfield_words = (aplic->num_irqs + 31) >> 5; | ||
74 | + aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs); | ||
75 | + aplic->state = g_new0(uint32_t, aplic->num_irqs); | ||
76 | + aplic->target = g_new0(uint32_t, aplic->num_irqs); | ||
77 | + if (!aplic->msimode) { | ||
78 | + for (i = 0; i < aplic->num_irqs; i++) { | ||
79 | + aplic->target[i] = 1; | ||
80 | + } | ||
81 | } | ||
82 | - } | ||
83 | - aplic->idelivery = g_new0(uint32_t, aplic->num_harts); | ||
84 | - aplic->iforce = g_new0(uint32_t, aplic->num_harts); | ||
85 | - aplic->ithreshold = g_new0(uint32_t, aplic->num_harts); | ||
86 | + aplic->idelivery = g_new0(uint32_t, aplic->num_harts); | ||
87 | + aplic->iforce = g_new0(uint32_t, aplic->num_harts); | ||
88 | + aplic->ithreshold = g_new0(uint32_t, aplic->num_harts); | ||
89 | |||
90 | - memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops, aplic, | ||
91 | - TYPE_RISCV_APLIC, aplic->aperture_size); | ||
92 | - sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio); | ||
93 | + memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops, | ||
94 | + aplic, TYPE_RISCV_APLIC, aplic->aperture_size); | ||
95 | + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio); | ||
96 | + } | ||
97 | |||
98 | /* | ||
99 | * Only root APLICs have hardware IRQ lines. All non-root APLICs | ||
100 | * have IRQ lines delegated by their parent APLIC. | ||
101 | */ | ||
102 | if (!aplic->parent) { | ||
103 | - qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs); | ||
104 | + if (is_kvm_aia(aplic->msimode)) { | ||
105 | + qdev_init_gpio_in(dev, riscv_kvm_aplic_request, aplic->num_irqs); | ||
106 | + } else { | ||
107 | + qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs); | ||
108 | + } | ||
109 | } | ||
110 | |||
111 | /* Create output IRQ lines for non-MSI mode */ | ||
112 | @@ -XXX,XX +XXX,XX @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size, | ||
113 | qdev_prop_set_bit(dev, "mmode", mmode); | ||
114 | |||
115 | sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); | ||
116 | - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); | ||
117 | + | ||
118 | + if (!is_kvm_aia(msimode)) { | ||
119 | + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); | ||
120 | + } | ||
121 | |||
122 | if (parent) { | ||
123 | riscv_aplic_add_child(parent, dev); | ||
124 | diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c | ||
150 | index XXXXXXX..XXXXXXX 100644 | 125 | index XXXXXXX..XXXXXXX 100644 |
151 | --- a/target/riscv/vector_helper.c | 126 | --- a/hw/intc/riscv_imsic.c |
152 | +++ b/target/riscv/vector_helper.c | 127 | +++ b/hw/intc/riscv_imsic.c |
153 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b) | 128 | @@ -XXX,XX +XXX,XX @@ |
154 | GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h) | 129 | #include "target/riscv/cpu.h" |
155 | GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w) | 130 | #include "target/riscv/cpu_bits.h" |
156 | GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d) | 131 | #include "sysemu/sysemu.h" |
132 | +#include "sysemu/kvm.h" | ||
133 | #include "migration/vmstate.h" | ||
134 | |||
135 | #define IMSIC_MMIO_PAGE_LE 0x00 | ||
136 | @@ -XXX,XX +XXX,XX @@ static void riscv_imsic_write(void *opaque, hwaddr addr, uint64_t value, | ||
137 | goto err; | ||
138 | } | ||
139 | |||
140 | +#if defined(CONFIG_KVM) | ||
141 | + if (kvm_irqchip_in_kernel()) { | ||
142 | + struct kvm_msi msi; | ||
157 | + | 143 | + |
158 | +/* | 144 | + msi.address_lo = extract64(imsic->mmio.addr + addr, 0, 32); |
159 | + *** unit-stride fault-only-fisrt load instructions | 145 | + msi.address_hi = extract64(imsic->mmio.addr + addr, 32, 32); |
160 | + */ | 146 | + msi.data = le32_to_cpu(value); |
161 | +static inline void | ||
162 | +vext_ldff(void *vd, void *v0, target_ulong base, | ||
163 | + CPURISCVState *env, uint32_t desc, | ||
164 | + vext_ldst_elem_fn *ldst_elem, | ||
165 | + clear_fn *clear_elem, | ||
166 | + uint32_t esz, uint32_t msz, uintptr_t ra) | ||
167 | +{ | ||
168 | + void *host; | ||
169 | + uint32_t i, k, vl = 0; | ||
170 | + uint32_t mlen = vext_mlen(desc); | ||
171 | + uint32_t nf = vext_nf(desc); | ||
172 | + uint32_t vm = vext_vm(desc); | ||
173 | + uint32_t vlmax = vext_maxsz(desc) / esz; | ||
174 | + target_ulong addr, offset, remain; | ||
175 | + | 147 | + |
176 | + /* probe every access*/ | 148 | + kvm_vm_ioctl(kvm_state, KVM_SIGNAL_MSI, &msi); |
177 | + for (i = 0; i < env->vl; i++) { | 149 | + |
178 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | ||
179 | + continue; | ||
180 | + } | ||
181 | + addr = base + nf * i * msz; | ||
182 | + if (i == 0) { | ||
183 | + probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); | ||
184 | + } else { | ||
185 | + /* if it triggers an exception, no need to check watchpoint */ | ||
186 | + remain = nf * msz; | ||
187 | + while (remain > 0) { | ||
188 | + offset = -(addr | TARGET_PAGE_MASK); | ||
189 | + host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, | ||
190 | + cpu_mmu_index(env, false)); | ||
191 | + if (host) { | ||
192 | +#ifdef CONFIG_USER_ONLY | ||
193 | + if (page_check_range(addr, nf * msz, PAGE_READ) < 0) { | ||
194 | + vl = i; | ||
195 | + goto ProbeSuccess; | ||
196 | + } | ||
197 | +#else | ||
198 | + probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); | ||
199 | +#endif | ||
200 | + } else { | ||
201 | + vl = i; | ||
202 | + goto ProbeSuccess; | ||
203 | + } | ||
204 | + if (remain <= offset) { | ||
205 | + break; | ||
206 | + } | ||
207 | + remain -= offset; | ||
208 | + addr += offset; | ||
209 | + } | ||
210 | + } | ||
211 | + } | ||
212 | +ProbeSuccess: | ||
213 | + /* load bytes from guest memory */ | ||
214 | + if (vl != 0) { | ||
215 | + env->vl = vl; | ||
216 | + } | ||
217 | + for (i = 0; i < env->vl; i++) { | ||
218 | + k = 0; | ||
219 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | ||
220 | + continue; | ||
221 | + } | ||
222 | + while (k < nf) { | ||
223 | + target_ulong addr = base + (i * nf + k) * msz; | ||
224 | + ldst_elem(env, addr, i + k * vlmax, vd, ra); | ||
225 | + k++; | ||
226 | + } | ||
227 | + } | ||
228 | + /* clear tail elements */ | ||
229 | + if (vl != 0) { | ||
230 | + return; | 150 | + return; |
231 | + } | 151 | + } |
232 | + for (k = 0; k < nf; k++) { | 152 | +#endif |
233 | + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); | 153 | + |
154 | /* Writes only supported for MSI little-endian registers */ | ||
155 | page = addr >> IMSIC_MMIO_PAGE_SHIFT; | ||
156 | if ((addr & (IMSIC_MMIO_PAGE_SZ - 1)) == IMSIC_MMIO_PAGE_LE) { | ||
157 | @@ -XXX,XX +XXX,XX @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp) | ||
158 | CPUState *cpu = cpu_by_arch_id(imsic->hartid); | ||
159 | CPURISCVState *env = cpu ? cpu->env_ptr : NULL; | ||
160 | |||
161 | - imsic->num_eistate = imsic->num_pages * imsic->num_irqs; | ||
162 | - imsic->eidelivery = g_new0(uint32_t, imsic->num_pages); | ||
163 | - imsic->eithreshold = g_new0(uint32_t, imsic->num_pages); | ||
164 | - imsic->eistate = g_new0(uint32_t, imsic->num_eistate); | ||
165 | + if (!kvm_irqchip_in_kernel()) { | ||
166 | + imsic->num_eistate = imsic->num_pages * imsic->num_irqs; | ||
167 | + imsic->eidelivery = g_new0(uint32_t, imsic->num_pages); | ||
168 | + imsic->eithreshold = g_new0(uint32_t, imsic->num_pages); | ||
169 | + imsic->eistate = g_new0(uint32_t, imsic->num_eistate); | ||
234 | + } | 170 | + } |
235 | +} | 171 | |
236 | + | 172 | memory_region_init_io(&imsic->mmio, OBJECT(dev), &riscv_imsic_ops, |
237 | +#define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ | 173 | imsic, TYPE_RISCV_IMSIC, |
238 | +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
239 | + CPURISCVState *env, uint32_t desc) \ | ||
240 | +{ \ | ||
241 | + vext_ldff(vd, v0, base, env, desc, LOAD_FN, CLEAR_FN, \ | ||
242 | + sizeof(ETYPE), sizeof(MTYPE), GETPC()); \ | ||
243 | +} | ||
244 | + | ||
245 | +GEN_VEXT_LDFF(vlbff_v_b, int8_t, int8_t, ldb_b, clearb) | ||
246 | +GEN_VEXT_LDFF(vlbff_v_h, int8_t, int16_t, ldb_h, clearh) | ||
247 | +GEN_VEXT_LDFF(vlbff_v_w, int8_t, int32_t, ldb_w, clearl) | ||
248 | +GEN_VEXT_LDFF(vlbff_v_d, int8_t, int64_t, ldb_d, clearq) | ||
249 | +GEN_VEXT_LDFF(vlhff_v_h, int16_t, int16_t, ldh_h, clearh) | ||
250 | +GEN_VEXT_LDFF(vlhff_v_w, int16_t, int32_t, ldh_w, clearl) | ||
251 | +GEN_VEXT_LDFF(vlhff_v_d, int16_t, int64_t, ldh_d, clearq) | ||
252 | +GEN_VEXT_LDFF(vlwff_v_w, int32_t, int32_t, ldw_w, clearl) | ||
253 | +GEN_VEXT_LDFF(vlwff_v_d, int32_t, int64_t, ldw_d, clearq) | ||
254 | +GEN_VEXT_LDFF(vleff_v_b, int8_t, int8_t, lde_b, clearb) | ||
255 | +GEN_VEXT_LDFF(vleff_v_h, int16_t, int16_t, lde_h, clearh) | ||
256 | +GEN_VEXT_LDFF(vleff_v_w, int32_t, int32_t, lde_w, clearl) | ||
257 | +GEN_VEXT_LDFF(vleff_v_d, int64_t, int64_t, lde_d, clearq) | ||
258 | +GEN_VEXT_LDFF(vlbuff_v_b, uint8_t, uint8_t, ldbu_b, clearb) | ||
259 | +GEN_VEXT_LDFF(vlbuff_v_h, uint8_t, uint16_t, ldbu_h, clearh) | ||
260 | +GEN_VEXT_LDFF(vlbuff_v_w, uint8_t, uint32_t, ldbu_w, clearl) | ||
261 | +GEN_VEXT_LDFF(vlbuff_v_d, uint8_t, uint64_t, ldbu_d, clearq) | ||
262 | +GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h, clearh) | ||
263 | +GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl) | ||
264 | +GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq) | ||
265 | +GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl) | ||
266 | +GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq) | ||
267 | -- | 174 | -- |
268 | 2.27.0 | 175 | 2.41.0 |
269 | |||
270 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Yong-Xuan Wang <yongxuan.wang@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Select KVM AIA when the host kernel has in-kernel AIA chip support. |
4 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 4 | Since KVM AIA only has one APLIC instance, we map the QEMU APLIC |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 5 | devices to KVM APLIC. |
6 | Message-id: 20200623215920.2594-27-zhiwei_liu@c-sky.com | 6 | |
7 | Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com> | ||
8 | Reviewed-by: Jim Shu <jim.shu@sifive.com> | ||
9 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
10 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> | ||
11 | Message-ID: <20230727102439.22554-6-yongxuan.wang@sifive.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 13 | --- |
9 | target/riscv/helper.h | 9 ++ | 14 | hw/riscv/virt.c | 94 +++++++++++++++++++++++++++++++++---------------- |
10 | target/riscv/insn32.decode | 2 + | 15 | 1 file changed, 63 insertions(+), 31 deletions(-) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 4 + | ||
12 | target/riscv/vector_helper.c | 107 ++++++++++++++++++++++++ | ||
13 | 4 files changed, 122 insertions(+) | ||
14 | 16 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 17 | diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c |
16 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 19 | --- a/hw/riscv/virt.c |
18 | +++ b/target/riscv/helper.h | 20 | +++ b/hw/riscv/virt.c |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vasub_vx_b, void, ptr, ptr, tl, ptr, env, i32) | 21 | @@ -XXX,XX +XXX,XX @@ |
20 | DEF_HELPER_6(vasub_vx_h, void, ptr, ptr, tl, ptr, env, i32) | 22 | #include "hw/riscv/virt.h" |
21 | DEF_HELPER_6(vasub_vx_w, void, ptr, ptr, tl, ptr, env, i32) | 23 | #include "hw/riscv/boot.h" |
22 | DEF_HELPER_6(vasub_vx_d, void, ptr, ptr, tl, ptr, env, i32) | 24 | #include "hw/riscv/numa.h" |
23 | + | 25 | +#include "kvm_riscv.h" |
24 | +DEF_HELPER_6(vsmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | 26 | #include "hw/intc/riscv_aclint.h" |
25 | +DEF_HELPER_6(vsmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 27 | #include "hw/intc/riscv_aplic.h" |
26 | +DEF_HELPER_6(vsmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 28 | #include "hw/intc/riscv_imsic.h" |
27 | +DEF_HELPER_6(vsmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | 29 | @@ -XXX,XX +XXX,XX @@ |
28 | +DEF_HELPER_6(vsmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) | 30 | #error "Can't accommodate all IMSIC groups in address space" |
29 | +DEF_HELPER_6(vsmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) | 31 | #endif |
30 | +DEF_HELPER_6(vsmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) | 32 | |
31 | +DEF_HELPER_6(vsmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) | 33 | +/* KVM AIA only supports APLIC MSI. APLIC Wired is always emulated by QEMU. */ |
32 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 34 | +static bool virt_use_kvm_aia(RISCVVirtState *s) |
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/riscv/insn32.decode | ||
35 | +++ b/target/riscv/insn32.decode | ||
36 | @@ -XXX,XX +XXX,XX @@ vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm | ||
37 | vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm | ||
38 | vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm | ||
39 | vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm | ||
40 | +vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm | ||
41 | +vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm | ||
42 | |||
43 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
44 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
45 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
48 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
49 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVV_TRANS(vasub_vv, opivv_check) | ||
50 | GEN_OPIVX_TRANS(vaadd_vx, opivx_check) | ||
51 | GEN_OPIVX_TRANS(vasub_vx, opivx_check) | ||
52 | GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check) | ||
53 | + | ||
54 | +/* Vector Single-Width Fractional Multiply with Rounding and Saturation */ | ||
55 | +GEN_OPIVV_TRANS(vsmul_vv, opivv_check) | ||
56 | +GEN_OPIVX_TRANS(vsmul_vx, opivx_check) | ||
57 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/target/riscv/vector_helper.c | ||
60 | +++ b/target/riscv/vector_helper.c | ||
61 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX_RM(vasub_vx_b, 1, 1, clearb) | ||
62 | GEN_VEXT_VX_RM(vasub_vx_h, 2, 2, clearh) | ||
63 | GEN_VEXT_VX_RM(vasub_vx_w, 4, 4, clearl) | ||
64 | GEN_VEXT_VX_RM(vasub_vx_d, 8, 8, clearq) | ||
65 | + | ||
66 | +/* Vector Single-Width Fractional Multiply with Rounding and Saturation */ | ||
67 | +static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
68 | +{ | 35 | +{ |
69 | + uint8_t round; | 36 | + return kvm_irqchip_in_kernel() && s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC; |
70 | + int16_t res; | ||
71 | + | ||
72 | + res = (int16_t)a * (int16_t)b; | ||
73 | + round = get_round(vxrm, res, 7); | ||
74 | + res = (res >> 7) + round; | ||
75 | + | ||
76 | + if (res > INT8_MAX) { | ||
77 | + env->vxsat = 0x1; | ||
78 | + return INT8_MAX; | ||
79 | + } else if (res < INT8_MIN) { | ||
80 | + env->vxsat = 0x1; | ||
81 | + return INT8_MIN; | ||
82 | + } else { | ||
83 | + return res; | ||
84 | + } | ||
85 | +} | 37 | +} |
86 | + | 38 | + |
87 | +static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) | 39 | static const MemMapEntry virt_memmap[] = { |
88 | +{ | 40 | [VIRT_DEBUG] = { 0x0, 0x100 }, |
89 | + uint8_t round; | 41 | [VIRT_MROM] = { 0x1000, 0xf000 }, |
90 | + int32_t res; | 42 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, |
43 | uint32_t *intc_phandles, | ||
44 | uint32_t aplic_phandle, | ||
45 | uint32_t aplic_child_phandle, | ||
46 | - bool m_mode) | ||
47 | + bool m_mode, int num_harts) | ||
48 | { | ||
49 | int cpu; | ||
50 | char *aplic_name; | ||
51 | uint32_t *aplic_cells; | ||
52 | MachineState *ms = MACHINE(s); | ||
53 | |||
54 | - aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); | ||
55 | + aplic_cells = g_new0(uint32_t, num_harts * 2); | ||
56 | |||
57 | - for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { | ||
58 | + for (cpu = 0; cpu < num_harts; cpu++) { | ||
59 | aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); | ||
60 | aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); | ||
61 | } | ||
62 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, | ||
63 | |||
64 | if (s->aia_type == VIRT_AIA_TYPE_APLIC) { | ||
65 | qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended", | ||
66 | - aplic_cells, | ||
67 | - s->soc[socket].num_harts * sizeof(uint32_t) * 2); | ||
68 | + aplic_cells, num_harts * sizeof(uint32_t) * 2); | ||
69 | } else { | ||
70 | qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle); | ||
71 | } | ||
72 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s, | ||
73 | uint32_t msi_s_phandle, | ||
74 | uint32_t *phandle, | ||
75 | uint32_t *intc_phandles, | ||
76 | - uint32_t *aplic_phandles) | ||
77 | + uint32_t *aplic_phandles, | ||
78 | + int num_harts) | ||
79 | { | ||
80 | char *aplic_name; | ||
81 | unsigned long aplic_addr; | ||
82 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s, | ||
83 | create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size, | ||
84 | msi_m_phandle, intc_phandles, | ||
85 | aplic_m_phandle, aplic_s_phandle, | ||
86 | - true); | ||
87 | + true, num_harts); | ||
88 | } | ||
89 | |||
90 | /* S-level APLIC node */ | ||
91 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aplic(RISCVVirtState *s, | ||
92 | create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size, | ||
93 | msi_s_phandle, intc_phandles, | ||
94 | aplic_s_phandle, 0, | ||
95 | - false); | ||
96 | + false, num_harts); | ||
97 | |||
98 | aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); | ||
99 | |||
100 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, | ||
101 | *msi_pcie_phandle = msi_s_phandle; | ||
102 | } | ||
103 | |||
104 | - phandle_pos = ms->smp.cpus; | ||
105 | - for (socket = (socket_count - 1); socket >= 0; socket--) { | ||
106 | - phandle_pos -= s->soc[socket].num_harts; | ||
107 | - | ||
108 | - if (s->aia_type == VIRT_AIA_TYPE_NONE) { | ||
109 | - create_fdt_socket_plic(s, memmap, socket, phandle, | ||
110 | - &intc_phandles[phandle_pos], xplic_phandles); | ||
111 | - } else { | ||
112 | - create_fdt_socket_aplic(s, memmap, socket, | ||
113 | - msi_m_phandle, msi_s_phandle, phandle, | ||
114 | - &intc_phandles[phandle_pos], xplic_phandles); | ||
115 | + /* KVM AIA only has one APLIC instance */ | ||
116 | + if (virt_use_kvm_aia(s)) { | ||
117 | + create_fdt_socket_aplic(s, memmap, 0, | ||
118 | + msi_m_phandle, msi_s_phandle, phandle, | ||
119 | + &intc_phandles[0], xplic_phandles, | ||
120 | + ms->smp.cpus); | ||
121 | + } else { | ||
122 | + phandle_pos = ms->smp.cpus; | ||
123 | + for (socket = (socket_count - 1); socket >= 0; socket--) { | ||
124 | + phandle_pos -= s->soc[socket].num_harts; | ||
91 | + | 125 | + |
92 | + res = (int32_t)a * (int32_t)b; | 126 | + if (s->aia_type == VIRT_AIA_TYPE_NONE) { |
93 | + round = get_round(vxrm, res, 15); | 127 | + create_fdt_socket_plic(s, memmap, socket, phandle, |
94 | + res = (res >> 15) + round; | 128 | + &intc_phandles[phandle_pos], |
95 | + | 129 | + xplic_phandles); |
96 | + if (res > INT16_MAX) { | 130 | + } else { |
97 | + env->vxsat = 0x1; | 131 | + create_fdt_socket_aplic(s, memmap, socket, |
98 | + return INT16_MAX; | 132 | + msi_m_phandle, msi_s_phandle, phandle, |
99 | + } else if (res < INT16_MIN) { | 133 | + &intc_phandles[phandle_pos], |
100 | + env->vxsat = 0x1; | 134 | + xplic_phandles, |
101 | + return INT16_MIN; | 135 | + s->soc[socket].num_harts); |
136 | + } | ||
137 | } | ||
138 | } | ||
139 | |||
140 | g_free(intc_phandles); | ||
141 | |||
142 | - for (socket = 0; socket < socket_count; socket++) { | ||
143 | - if (socket == 0) { | ||
144 | - *irq_mmio_phandle = xplic_phandles[socket]; | ||
145 | - *irq_virtio_phandle = xplic_phandles[socket]; | ||
146 | - *irq_pcie_phandle = xplic_phandles[socket]; | ||
147 | - } | ||
148 | - if (socket == 1) { | ||
149 | - *irq_virtio_phandle = xplic_phandles[socket]; | ||
150 | - *irq_pcie_phandle = xplic_phandles[socket]; | ||
151 | - } | ||
152 | - if (socket == 2) { | ||
153 | - *irq_pcie_phandle = xplic_phandles[socket]; | ||
154 | + if (virt_use_kvm_aia(s)) { | ||
155 | + *irq_mmio_phandle = xplic_phandles[0]; | ||
156 | + *irq_virtio_phandle = xplic_phandles[0]; | ||
157 | + *irq_pcie_phandle = xplic_phandles[0]; | ||
102 | + } else { | 158 | + } else { |
103 | + return res; | 159 | + for (socket = 0; socket < socket_count; socket++) { |
104 | + } | 160 | + if (socket == 0) { |
105 | +} | 161 | + *irq_mmio_phandle = xplic_phandles[socket]; |
106 | + | 162 | + *irq_virtio_phandle = xplic_phandles[socket]; |
107 | +static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) | 163 | + *irq_pcie_phandle = xplic_phandles[socket]; |
108 | +{ | 164 | + } |
109 | + uint8_t round; | 165 | + if (socket == 1) { |
110 | + int64_t res; | 166 | + *irq_virtio_phandle = xplic_phandles[socket]; |
111 | + | 167 | + *irq_pcie_phandle = xplic_phandles[socket]; |
112 | + res = (int64_t)a * (int64_t)b; | 168 | + } |
113 | + round = get_round(vxrm, res, 31); | 169 | + if (socket == 2) { |
114 | + res = (res >> 31) + round; | 170 | + *irq_pcie_phandle = xplic_phandles[socket]; |
115 | + | 171 | + } |
116 | + if (res > INT32_MAX) { | 172 | } |
117 | + env->vxsat = 0x1; | 173 | } |
118 | + return INT32_MAX; | 174 | |
119 | + } else if (res < INT32_MIN) { | 175 | @@ -XXX,XX +XXX,XX @@ static void virt_machine_init(MachineState *machine) |
120 | + env->vxsat = 0x1; | 176 | } |
121 | + return INT32_MIN; | 177 | } |
122 | + } else { | 178 | |
123 | + return res; | 179 | + if (virt_use_kvm_aia(s)) { |
124 | + } | 180 | + kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT, |
125 | +} | 181 | + VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS, |
126 | + | 182 | + memmap[VIRT_APLIC_S].base, |
127 | +static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) | 183 | + memmap[VIRT_IMSIC_S].base, |
128 | +{ | 184 | + s->aia_guests); |
129 | + uint8_t round; | ||
130 | + uint64_t hi_64, lo_64; | ||
131 | + int64_t res; | ||
132 | + | ||
133 | + if (a == INT64_MIN && b == INT64_MIN) { | ||
134 | + env->vxsat = 1; | ||
135 | + return INT64_MAX; | ||
136 | + } | 185 | + } |
137 | + | 186 | + |
138 | + muls64(&lo_64, &hi_64, a, b); | 187 | if (riscv_is_32bit(&s->soc[0])) { |
139 | + round = get_round(vxrm, lo_64, 63); | 188 | #if HOST_LONG_BITS == 64 |
140 | + /* | 189 | /* limit RAM size in a 32-bit system */ |
141 | + * Cannot overflow, as there are always | ||
142 | + * 2 sign bits after multiply. | ||
143 | + */ | ||
144 | + res = (hi_64 << 1) | (lo_64 >> 63); | ||
145 | + if (round) { | ||
146 | + if (res == INT64_MAX) { | ||
147 | + env->vxsat = 1; | ||
148 | + } else { | ||
149 | + res += 1; | ||
150 | + } | ||
151 | + } | ||
152 | + return res; | ||
153 | +} | ||
154 | + | ||
155 | +RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) | ||
156 | +RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) | ||
157 | +RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) | ||
158 | +RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) | ||
159 | +GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1, clearb) | ||
160 | +GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2, clearh) | ||
161 | +GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4, clearl) | ||
162 | +GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8, clearq) | ||
163 | + | ||
164 | +RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) | ||
165 | +RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) | ||
166 | +RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) | ||
167 | +RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) | ||
168 | +GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb) | ||
169 | +GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh) | ||
170 | +GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl) | ||
171 | +GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq) | ||
172 | -- | 190 | -- |
173 | 2.27.0 | 191 | 2.41.0 |
174 | |||
175 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Conor Dooley <conor.dooley@microchip.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | On a dtb dumped from the virt machine, dt-validate complains: |
4 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 4 | soc: pmu: {'riscv,event-to-mhpmcounters': [[1, 1, 524281], [2, 2, 524284], [65561, 65561, 524280], [65563, 65563, 524280], [65569, 65569, 524280]], 'compatible': ['riscv,pmu']} should not be valid under {'type': 'object'} |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 5 | from schema $id: http://devicetree.org/schemas/simple-bus.yaml# |
6 | Message-id: 20200623215920.2594-40-zhiwei_liu@c-sky.com | 6 | That's pretty cryptic, but running the dtb back through dtc produces |
7 | something a lot more reasonable: | ||
8 | Warning (simple_bus_reg): /soc/pmu: missing or empty reg/ranges property | ||
9 | |||
10 | Moving the riscv,pmu node out of the soc bus solves the problem. | ||
11 | |||
12 | Signed-off-by: Conor Dooley <conor.dooley@microchip.com> | ||
13 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
14 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
15 | Message-ID: <20230727-groom-decline-2c57ce42841c@spud> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 16 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 17 | --- |
9 | target/riscv/helper.h | 37 +++++ | 18 | hw/riscv/virt.c | 2 +- |
10 | target/riscv/insn32.decode | 12 ++ | 19 | 1 file changed, 1 insertion(+), 1 deletion(-) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 35 +++++ | ||
12 | target/riscv/vector_helper.c | 174 ++++++++++++++++++++++++ | ||
13 | 4 files changed, 258 insertions(+) | ||
14 | 20 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 21 | diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c |
16 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 23 | --- a/hw/riscv/virt.c |
18 | +++ b/target/riscv/helper.h | 24 | +++ b/hw/riscv/virt.c |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfsgnjn_vf_d, void, ptr, ptr, i64, ptr, env, i32) | 25 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_pmu(RISCVVirtState *s) |
20 | DEF_HELPER_6(vfsgnjx_vf_h, void, ptr, ptr, i64, ptr, env, i32) | 26 | MachineState *ms = MACHINE(s); |
21 | DEF_HELPER_6(vfsgnjx_vf_w, void, ptr, ptr, i64, ptr, env, i32) | 27 | RISCVCPU hart = s->soc[0].harts[0]; |
22 | DEF_HELPER_6(vfsgnjx_vf_d, void, ptr, ptr, i64, ptr, env, i32) | 28 | |
23 | + | 29 | - pmu_name = g_strdup_printf("/soc/pmu"); |
24 | +DEF_HELPER_6(vmfeq_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 30 | + pmu_name = g_strdup_printf("/pmu"); |
25 | +DEF_HELPER_6(vmfeq_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 31 | qemu_fdt_add_subnode(ms->fdt, pmu_name); |
26 | +DEF_HELPER_6(vmfeq_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | 32 | qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible", "riscv,pmu"); |
27 | +DEF_HELPER_6(vmfne_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 33 | riscv_pmu_generate_fdt_node(ms->fdt, hart.cfg.pmu_num, pmu_name); |
28 | +DEF_HELPER_6(vmfne_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vmfne_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vmflt_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vmflt_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vmflt_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vmfle_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vmfle_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vmfle_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vmfeq_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vmfeq_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vmfeq_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vmfne_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vmfne_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vmfne_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vmflt_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vmflt_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vmflt_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vmfle_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vmfle_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vmfle_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
48 | +DEF_HELPER_6(vmfgt_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
49 | +DEF_HELPER_6(vmfgt_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
50 | +DEF_HELPER_6(vmfgt_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
51 | +DEF_HELPER_6(vmfge_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
52 | +DEF_HELPER_6(vmfge_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
53 | +DEF_HELPER_6(vmfge_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
54 | +DEF_HELPER_6(vmford_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
55 | +DEF_HELPER_6(vmford_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
56 | +DEF_HELPER_6(vmford_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
57 | +DEF_HELPER_6(vmford_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
58 | +DEF_HELPER_6(vmford_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
59 | +DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
60 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/target/riscv/insn32.decode | ||
63 | +++ b/target/riscv/insn32.decode | ||
64 | @@ -XXX,XX +XXX,XX @@ vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm | ||
65 | vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm | ||
66 | vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm | ||
67 | vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm | ||
68 | +vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm | ||
69 | +vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm | ||
70 | +vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm | ||
71 | +vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm | ||
72 | +vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm | ||
73 | +vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm | ||
74 | +vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm | ||
75 | +vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm | ||
76 | +vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm | ||
77 | +vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm | ||
78 | +vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm | ||
79 | +vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm | ||
80 | |||
81 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
82 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
83 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
84 | index XXXXXXX..XXXXXXX 100644 | ||
85 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
86 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
87 | @@ -XXX,XX +XXX,XX @@ GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check) | ||
88 | GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check) | ||
89 | GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check) | ||
90 | GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check) | ||
91 | + | ||
92 | +/* Vector Floating-Point Compare Instructions */ | ||
93 | +static bool opfvv_cmp_check(DisasContext *s, arg_rmrr *a) | ||
94 | +{ | ||
95 | + return (vext_check_isa_ill(s) && | ||
96 | + vext_check_reg(s, a->rs2, false) && | ||
97 | + vext_check_reg(s, a->rs1, false) && | ||
98 | + (s->sew != 0) && | ||
99 | + ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) && | ||
100 | + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) || | ||
101 | + (s->lmul == 0))); | ||
102 | +} | ||
103 | + | ||
104 | +GEN_OPFVV_TRANS(vmfeq_vv, opfvv_cmp_check) | ||
105 | +GEN_OPFVV_TRANS(vmfne_vv, opfvv_cmp_check) | ||
106 | +GEN_OPFVV_TRANS(vmflt_vv, opfvv_cmp_check) | ||
107 | +GEN_OPFVV_TRANS(vmfle_vv, opfvv_cmp_check) | ||
108 | +GEN_OPFVV_TRANS(vmford_vv, opfvv_cmp_check) | ||
109 | + | ||
110 | +static bool opfvf_cmp_check(DisasContext *s, arg_rmrr *a) | ||
111 | +{ | ||
112 | + return (vext_check_isa_ill(s) && | ||
113 | + vext_check_reg(s, a->rs2, false) && | ||
114 | + (s->sew != 0) && | ||
115 | + (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) || | ||
116 | + (s->lmul == 0))); | ||
117 | +} | ||
118 | + | ||
119 | +GEN_OPFVF_TRANS(vmfeq_vf, opfvf_cmp_check) | ||
120 | +GEN_OPFVF_TRANS(vmfne_vf, opfvf_cmp_check) | ||
121 | +GEN_OPFVF_TRANS(vmflt_vf, opfvf_cmp_check) | ||
122 | +GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check) | ||
123 | +GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check) | ||
124 | +GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check) | ||
125 | +GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check) | ||
126 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
127 | index XXXXXXX..XXXXXXX 100644 | ||
128 | --- a/target/riscv/vector_helper.c | ||
129 | +++ b/target/riscv/vector_helper.c | ||
130 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) | ||
131 | GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2, clearh) | ||
132 | GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4, clearl) | ||
133 | GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8, clearq) | ||
134 | + | ||
135 | +/* Vector Floating-Point Compare Instructions */ | ||
136 | +#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ | ||
137 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
138 | + CPURISCVState *env, uint32_t desc) \ | ||
139 | +{ \ | ||
140 | + uint32_t mlen = vext_mlen(desc); \ | ||
141 | + uint32_t vm = vext_vm(desc); \ | ||
142 | + uint32_t vl = env->vl; \ | ||
143 | + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ | ||
144 | + uint32_t i; \ | ||
145 | + \ | ||
146 | + for (i = 0; i < vl; i++) { \ | ||
147 | + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ | ||
148 | + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ | ||
149 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
150 | + continue; \ | ||
151 | + } \ | ||
152 | + vext_set_elem_mask(vd, mlen, i, \ | ||
153 | + DO_OP(s2, s1, &env->fp_status)); \ | ||
154 | + } \ | ||
155 | + for (; i < vlmax; i++) { \ | ||
156 | + vext_set_elem_mask(vd, mlen, i, 0); \ | ||
157 | + } \ | ||
158 | +} | ||
159 | + | ||
160 | +static bool float16_eq_quiet(uint16_t a, uint16_t b, float_status *s) | ||
161 | +{ | ||
162 | + FloatRelation compare = float16_compare_quiet(a, b, s); | ||
163 | + return compare == float_relation_equal; | ||
164 | +} | ||
165 | + | ||
166 | +GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) | ||
167 | +GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) | ||
168 | +GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) | ||
169 | + | ||
170 | +#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ | ||
171 | +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
172 | + CPURISCVState *env, uint32_t desc) \ | ||
173 | +{ \ | ||
174 | + uint32_t mlen = vext_mlen(desc); \ | ||
175 | + uint32_t vm = vext_vm(desc); \ | ||
176 | + uint32_t vl = env->vl; \ | ||
177 | + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ | ||
178 | + uint32_t i; \ | ||
179 | + \ | ||
180 | + for (i = 0; i < vl; i++) { \ | ||
181 | + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ | ||
182 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
183 | + continue; \ | ||
184 | + } \ | ||
185 | + vext_set_elem_mask(vd, mlen, i, \ | ||
186 | + DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ | ||
187 | + } \ | ||
188 | + for (; i < vlmax; i++) { \ | ||
189 | + vext_set_elem_mask(vd, mlen, i, 0); \ | ||
190 | + } \ | ||
191 | +} | ||
192 | + | ||
193 | +GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) | ||
194 | +GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) | ||
195 | +GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) | ||
196 | + | ||
197 | +static bool vmfne16(uint16_t a, uint16_t b, float_status *s) | ||
198 | +{ | ||
199 | + FloatRelation compare = float16_compare_quiet(a, b, s); | ||
200 | + return compare != float_relation_equal; | ||
201 | +} | ||
202 | + | ||
203 | +static bool vmfne32(uint32_t a, uint32_t b, float_status *s) | ||
204 | +{ | ||
205 | + FloatRelation compare = float32_compare_quiet(a, b, s); | ||
206 | + return compare != float_relation_equal; | ||
207 | +} | ||
208 | + | ||
209 | +static bool vmfne64(uint64_t a, uint64_t b, float_status *s) | ||
210 | +{ | ||
211 | + FloatRelation compare = float64_compare_quiet(a, b, s); | ||
212 | + return compare != float_relation_equal; | ||
213 | +} | ||
214 | + | ||
215 | +GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) | ||
216 | +GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) | ||
217 | +GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) | ||
218 | +GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) | ||
219 | +GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) | ||
220 | +GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) | ||
221 | + | ||
222 | +static bool float16_lt(uint16_t a, uint16_t b, float_status *s) | ||
223 | +{ | ||
224 | + FloatRelation compare = float16_compare(a, b, s); | ||
225 | + return compare == float_relation_less; | ||
226 | +} | ||
227 | + | ||
228 | +GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) | ||
229 | +GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) | ||
230 | +GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) | ||
231 | +GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) | ||
232 | +GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) | ||
233 | +GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) | ||
234 | + | ||
235 | +static bool float16_le(uint16_t a, uint16_t b, float_status *s) | ||
236 | +{ | ||
237 | + FloatRelation compare = float16_compare(a, b, s); | ||
238 | + return compare == float_relation_less || | ||
239 | + compare == float_relation_equal; | ||
240 | +} | ||
241 | + | ||
242 | +GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) | ||
243 | +GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) | ||
244 | +GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) | ||
245 | +GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) | ||
246 | +GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) | ||
247 | +GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) | ||
248 | + | ||
249 | +static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) | ||
250 | +{ | ||
251 | + FloatRelation compare = float16_compare(a, b, s); | ||
252 | + return compare == float_relation_greater; | ||
253 | +} | ||
254 | + | ||
255 | +static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) | ||
256 | +{ | ||
257 | + FloatRelation compare = float32_compare(a, b, s); | ||
258 | + return compare == float_relation_greater; | ||
259 | +} | ||
260 | + | ||
261 | +static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) | ||
262 | +{ | ||
263 | + FloatRelation compare = float64_compare(a, b, s); | ||
264 | + return compare == float_relation_greater; | ||
265 | +} | ||
266 | + | ||
267 | +GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) | ||
268 | +GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) | ||
269 | +GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) | ||
270 | + | ||
271 | +static bool vmfge16(uint16_t a, uint16_t b, float_status *s) | ||
272 | +{ | ||
273 | + FloatRelation compare = float16_compare(a, b, s); | ||
274 | + return compare == float_relation_greater || | ||
275 | + compare == float_relation_equal; | ||
276 | +} | ||
277 | + | ||
278 | +static bool vmfge32(uint32_t a, uint32_t b, float_status *s) | ||
279 | +{ | ||
280 | + FloatRelation compare = float32_compare(a, b, s); | ||
281 | + return compare == float_relation_greater || | ||
282 | + compare == float_relation_equal; | ||
283 | +} | ||
284 | + | ||
285 | +static bool vmfge64(uint64_t a, uint64_t b, float_status *s) | ||
286 | +{ | ||
287 | + FloatRelation compare = float64_compare(a, b, s); | ||
288 | + return compare == float_relation_greater || | ||
289 | + compare == float_relation_equal; | ||
290 | +} | ||
291 | + | ||
292 | +GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) | ||
293 | +GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) | ||
294 | +GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) | ||
295 | + | ||
296 | +static bool float16_unordered_quiet(uint16_t a, uint16_t b, float_status *s) | ||
297 | +{ | ||
298 | + FloatRelation compare = float16_compare_quiet(a, b, s); | ||
299 | + return compare == float_relation_unordered; | ||
300 | +} | ||
301 | + | ||
302 | +GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) | ||
303 | +GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) | ||
304 | +GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) | ||
305 | +GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) | ||
306 | +GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) | ||
307 | +GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) | ||
308 | -- | 34 | -- |
309 | 2.27.0 | 35 | 2.41.0 |
310 | |||
311 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Weiwei Li <liweiwei@iscas.ac.cn> |
---|---|---|---|
2 | 2 | ||
3 | The v0.7.1 specification does not define vector status within mstatus. | 3 | The Svadu specification updated the name of the *envcfg bit from |
4 | A future revision will define the privileged portion of the vector status. | 4 | HADE to ADUE. |
5 | 5 | ||
6 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 6 | Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> |
7 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 7 | Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> |
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 8 | Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
9 | Message-id: 20200623215920.2594-4-zhiwei_liu@c-sky.com | 9 | Message-ID: <20230816141916.66898-1-liweiwei@iscas.ac.cn> |
10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
11 | --- | 11 | --- |
12 | target/riscv/cpu_bits.h | 15 +++++++++ | 12 | target/riscv/cpu_bits.h | 8 ++++---- |
13 | target/riscv/csr.c | 75 ++++++++++++++++++++++++++++++++++++++++- | 13 | target/riscv/cpu.c | 4 ++-- |
14 | 2 files changed, 89 insertions(+), 1 deletion(-) | 14 | target/riscv/cpu_helper.c | 6 +++--- |
15 | target/riscv/csr.c | 12 ++++++------ | ||
16 | 4 files changed, 15 insertions(+), 15 deletions(-) | ||
15 | 17 | ||
16 | diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h | 18 | diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h |
17 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/riscv/cpu_bits.h | 20 | --- a/target/riscv/cpu_bits.h |
19 | +++ b/target/riscv/cpu_bits.h | 21 | +++ b/target/riscv/cpu_bits.h |
20 | @@ -XXX,XX +XXX,XX @@ | 22 | @@ -XXX,XX +XXX,XX @@ typedef enum RISCVException { |
21 | #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) | 23 | #define MENVCFG_CBIE (3UL << 4) |
22 | #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA) | 24 | #define MENVCFG_CBCFE BIT(6) |
23 | 25 | #define MENVCFG_CBZE BIT(7) | |
24 | +/* Vector Fixed-Point round model */ | 26 | -#define MENVCFG_HADE (1ULL << 61) |
25 | +#define FSR_VXRM_SHIFT 9 | 27 | +#define MENVCFG_ADUE (1ULL << 61) |
26 | +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT) | 28 | #define MENVCFG_PBMTE (1ULL << 62) |
27 | + | 29 | #define MENVCFG_STCE (1ULL << 63) |
28 | +/* Vector Fixed-Point saturation flag */ | 30 | |
29 | +#define FSR_VXSAT_SHIFT 8 | 31 | /* For RV32 */ |
30 | +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT) | 32 | -#define MENVCFGH_HADE BIT(29) |
31 | + | 33 | +#define MENVCFGH_ADUE BIT(29) |
32 | /* Control and Status Registers */ | 34 | #define MENVCFGH_PBMTE BIT(30) |
33 | 35 | #define MENVCFGH_STCE BIT(31) | |
34 | /* User Trap Setup */ | 36 | |
35 | @@ -XXX,XX +XXX,XX @@ | 37 | @@ -XXX,XX +XXX,XX @@ typedef enum RISCVException { |
36 | #define CSR_FRM 0x002 | 38 | #define HENVCFG_CBIE MENVCFG_CBIE |
37 | #define CSR_FCSR 0x003 | 39 | #define HENVCFG_CBCFE MENVCFG_CBCFE |
38 | 40 | #define HENVCFG_CBZE MENVCFG_CBZE | |
39 | +/* User Vector CSRs */ | 41 | -#define HENVCFG_HADE MENVCFG_HADE |
40 | +#define CSR_VSTART 0x008 | 42 | +#define HENVCFG_ADUE MENVCFG_ADUE |
41 | +#define CSR_VXSAT 0x009 | 43 | #define HENVCFG_PBMTE MENVCFG_PBMTE |
42 | +#define CSR_VXRM 0x00a | 44 | #define HENVCFG_STCE MENVCFG_STCE |
43 | +#define CSR_VL 0xc20 | 45 | |
44 | +#define CSR_VTYPE 0xc21 | 46 | /* For RV32 */ |
45 | + | 47 | -#define HENVCFGH_HADE MENVCFGH_HADE |
46 | /* User Timers and Counters */ | 48 | +#define HENVCFGH_ADUE MENVCFGH_ADUE |
47 | #define CSR_CYCLE 0xc00 | 49 | #define HENVCFGH_PBMTE MENVCFGH_PBMTE |
48 | #define CSR_TIME 0xc01 | 50 | #define HENVCFGH_STCE MENVCFGH_STCE |
51 | |||
52 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/target/riscv/cpu.c | ||
55 | +++ b/target/riscv/cpu.c | ||
56 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_reset_hold(Object *obj) | ||
57 | env->two_stage_lookup = false; | ||
58 | |||
59 | env->menvcfg = (cpu->cfg.ext_svpbmt ? MENVCFG_PBMTE : 0) | | ||
60 | - (cpu->cfg.ext_svadu ? MENVCFG_HADE : 0); | ||
61 | + (cpu->cfg.ext_svadu ? MENVCFG_ADUE : 0); | ||
62 | env->henvcfg = (cpu->cfg.ext_svpbmt ? HENVCFG_PBMTE : 0) | | ||
63 | - (cpu->cfg.ext_svadu ? HENVCFG_HADE : 0); | ||
64 | + (cpu->cfg.ext_svadu ? HENVCFG_ADUE : 0); | ||
65 | |||
66 | /* Initialized default priorities of local interrupts. */ | ||
67 | for (i = 0; i < ARRAY_SIZE(env->miprio); i++) { | ||
68 | diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/riscv/cpu_helper.c | ||
71 | +++ b/target/riscv/cpu_helper.c | ||
72 | @@ -XXX,XX +XXX,XX @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, | ||
73 | } | ||
74 | |||
75 | bool pbmte = env->menvcfg & MENVCFG_PBMTE; | ||
76 | - bool hade = env->menvcfg & MENVCFG_HADE; | ||
77 | + bool adue = env->menvcfg & MENVCFG_ADUE; | ||
78 | |||
79 | if (first_stage && two_stage && env->virt_enabled) { | ||
80 | pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE); | ||
81 | - hade = hade && (env->henvcfg & HENVCFG_HADE); | ||
82 | + adue = adue && (env->henvcfg & HENVCFG_ADUE); | ||
83 | } | ||
84 | |||
85 | int ptshift = (levels - 1) * ptidxbits; | ||
86 | @@ -XXX,XX +XXX,XX @@ restart: | ||
87 | |||
88 | /* Page table updates need to be atomic with MTTCG enabled */ | ||
89 | if (updated_pte != pte && !is_debug) { | ||
90 | - if (!hade) { | ||
91 | + if (!adue) { | ||
92 | return TRANSLATE_FAIL; | ||
93 | } | ||
94 | |||
49 | diff --git a/target/riscv/csr.c b/target/riscv/csr.c | 95 | diff --git a/target/riscv/csr.c b/target/riscv/csr.c |
50 | index XXXXXXX..XXXXXXX 100644 | 96 | index XXXXXXX..XXXXXXX 100644 |
51 | --- a/target/riscv/csr.c | 97 | --- a/target/riscv/csr.c |
52 | +++ b/target/riscv/csr.c | 98 | +++ b/target/riscv/csr.c |
53 | @@ -XXX,XX +XXX,XX @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops) | 99 | @@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, |
54 | static int fs(CPURISCVState *env, int csrno) | 100 | if (riscv_cpu_mxl(env) == MXL_RV64) { |
101 | mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | | ||
102 | (cfg->ext_sstc ? MENVCFG_STCE : 0) | | ||
103 | - (cfg->ext_svadu ? MENVCFG_HADE : 0); | ||
104 | + (cfg->ext_svadu ? MENVCFG_ADUE : 0); | ||
105 | } | ||
106 | env->menvcfg = (env->menvcfg & ~mask) | (val & mask); | ||
107 | |||
108 | @@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfgh(CPURISCVState *env, int csrno, | ||
109 | const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); | ||
110 | uint64_t mask = (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | | ||
111 | (cfg->ext_sstc ? MENVCFG_STCE : 0) | | ||
112 | - (cfg->ext_svadu ? MENVCFG_HADE : 0); | ||
113 | + (cfg->ext_svadu ? MENVCFG_ADUE : 0); | ||
114 | uint64_t valh = (uint64_t)val << 32; | ||
115 | |||
116 | env->menvcfg = (env->menvcfg & ~mask) | (valh & mask); | ||
117 | @@ -XXX,XX +XXX,XX @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno, | ||
118 | * henvcfg.stce is read_only 0 when menvcfg.stce = 0 | ||
119 | * henvcfg.hade is read_only 0 when menvcfg.hade = 0 | ||
120 | */ | ||
121 | - *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) | | ||
122 | + *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) | | ||
123 | env->menvcfg); | ||
124 | return RISCV_EXCP_NONE; | ||
125 | } | ||
126 | @@ -XXX,XX +XXX,XX @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, | ||
127 | } | ||
128 | |||
129 | if (riscv_cpu_mxl(env) == MXL_RV64) { | ||
130 | - mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE); | ||
131 | + mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE); | ||
132 | } | ||
133 | |||
134 | env->henvcfg = (env->henvcfg & ~mask) | (val & mask); | ||
135 | @@ -XXX,XX +XXX,XX @@ static RISCVException read_henvcfgh(CPURISCVState *env, int csrno, | ||
136 | return ret; | ||
137 | } | ||
138 | |||
139 | - *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) | | ||
140 | + *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) | | ||
141 | env->menvcfg)) >> 32; | ||
142 | return RISCV_EXCP_NONE; | ||
143 | } | ||
144 | @@ -XXX,XX +XXX,XX @@ static RISCVException write_henvcfgh(CPURISCVState *env, int csrno, | ||
145 | target_ulong val) | ||
55 | { | 146 | { |
56 | #if !defined(CONFIG_USER_ONLY) | 147 | uint64_t mask = env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | |
57 | + /* loose check condition for fcsr in vector extension */ | 148 | - HENVCFG_HADE); |
58 | + if ((csrno == CSR_FCSR) && (env->misa & RVV)) { | 149 | + HENVCFG_ADUE); |
59 | + return 0; | 150 | uint64_t valh = (uint64_t)val << 32; |
60 | + } | 151 | RISCVException ret; |
61 | if (!env->debugger && !riscv_cpu_fp_enabled(env)) { | 152 | |
62 | return -1; | ||
63 | } | ||
64 | @@ -XXX,XX +XXX,XX @@ static int fs(CPURISCVState *env, int csrno) | ||
65 | return 0; | ||
66 | } | ||
67 | |||
68 | +static int vs(CPURISCVState *env, int csrno) | ||
69 | +{ | ||
70 | + if (env->misa & RVV) { | ||
71 | + return 0; | ||
72 | + } | ||
73 | + return -1; | ||
74 | +} | ||
75 | + | ||
76 | static int ctr(CPURISCVState *env, int csrno) | ||
77 | { | ||
78 | #if !defined(CONFIG_USER_ONLY) | ||
79 | @@ -XXX,XX +XXX,XX @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val) | ||
80 | #endif | ||
81 | *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) | ||
82 | | (env->frm << FSR_RD_SHIFT); | ||
83 | + if (vs(env, csrno) >= 0) { | ||
84 | + *val |= (env->vxrm << FSR_VXRM_SHIFT) | ||
85 | + | (env->vxsat << FSR_VXSAT_SHIFT); | ||
86 | + } | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | @@ -XXX,XX +XXX,XX @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val) | ||
91 | env->mstatus |= MSTATUS_FS; | ||
92 | #endif | ||
93 | env->frm = (val & FSR_RD) >> FSR_RD_SHIFT; | ||
94 | + if (vs(env, csrno) >= 0) { | ||
95 | + env->vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT; | ||
96 | + env->vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT; | ||
97 | + } | ||
98 | riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT); | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val) | ||
103 | +{ | ||
104 | + *val = env->vtype; | ||
105 | + return 0; | ||
106 | +} | ||
107 | + | ||
108 | +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val) | ||
109 | +{ | ||
110 | + *val = env->vl; | ||
111 | + return 0; | ||
112 | +} | ||
113 | + | ||
114 | +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val) | ||
115 | +{ | ||
116 | + *val = env->vxrm; | ||
117 | + return 0; | ||
118 | +} | ||
119 | + | ||
120 | +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val) | ||
121 | +{ | ||
122 | + env->vxrm = val; | ||
123 | + return 0; | ||
124 | +} | ||
125 | + | ||
126 | +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val) | ||
127 | +{ | ||
128 | + *val = env->vxsat; | ||
129 | + return 0; | ||
130 | +} | ||
131 | + | ||
132 | +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val) | ||
133 | +{ | ||
134 | + env->vxsat = val; | ||
135 | + return 0; | ||
136 | +} | ||
137 | + | ||
138 | +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val) | ||
139 | +{ | ||
140 | + *val = env->vstart; | ||
141 | + return 0; | ||
142 | +} | ||
143 | + | ||
144 | +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val) | ||
145 | +{ | ||
146 | + env->vstart = val; | ||
147 | + return 0; | ||
148 | +} | ||
149 | + | ||
150 | /* User Timers and Counters */ | ||
151 | static int read_instret(CPURISCVState *env, int csrno, target_ulong *val) | ||
152 | { | ||
153 | @@ -XXX,XX +XXX,XX @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { | ||
154 | [CSR_FFLAGS] = { fs, read_fflags, write_fflags }, | ||
155 | [CSR_FRM] = { fs, read_frm, write_frm }, | ||
156 | [CSR_FCSR] = { fs, read_fcsr, write_fcsr }, | ||
157 | - | ||
158 | + /* Vector CSRs */ | ||
159 | + [CSR_VSTART] = { vs, read_vstart, write_vstart }, | ||
160 | + [CSR_VXSAT] = { vs, read_vxsat, write_vxsat }, | ||
161 | + [CSR_VXRM] = { vs, read_vxrm, write_vxrm }, | ||
162 | + [CSR_VL] = { vs, read_vl }, | ||
163 | + [CSR_VTYPE] = { vs, read_vtype }, | ||
164 | /* User Timers and Counters */ | ||
165 | [CSR_CYCLE] = { ctr, read_instret }, | ||
166 | [CSR_INSTRET] = { ctr, read_instret }, | ||
167 | -- | 153 | -- |
168 | 2.27.0 | 154 | 2.41.0 |
169 | |||
170 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | In the same emulated RISC-V host, the 'host' KVM CPU takes 4 times |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | longer to boot than the 'rv64' KVM CPU. |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 5 | |
6 | Message-id: 20200623215920.2594-25-zhiwei_liu@c-sky.com | 6 | The reason is an unintended behavior of riscv_cpu_satp_mode_finalize() |
7 | when satp_mode.supported = 0, i.e. when cpu_init() does not set | ||
8 | satp_mode_max_supported(). satp_mode_max_from_map(map) does: | ||
9 | |||
10 | 31 - __builtin_clz(map) | ||
11 | |||
12 | This means that, if satp_mode.supported = 0, satp_mode_supported_max | ||
13 | wil be '31 - 32'. But this is C, so satp_mode_supported_max will gladly | ||
14 | set it to UINT_MAX (4294967295). After that, if the user didn't set a | ||
15 | satp_mode, set_satp_mode_default_map(cpu) will make | ||
16 | |||
17 | cfg.satp_mode.map = cfg.satp_mode.supported | ||
18 | |||
19 | So satp_mode.map = 0. And then satp_mode_map_max will be set to | ||
20 | satp_mode_max_from_map(cpu->cfg.satp_mode.map), i.e. also UINT_MAX. The | ||
21 | guard "satp_mode_map_max > satp_mode_supported_max" doesn't protect us | ||
22 | here since both are UINT_MAX. | ||
23 | |||
24 | And finally we have 2 loops: | ||
25 | |||
26 | for (int i = satp_mode_map_max - 1; i >= 0; --i) { | ||
27 | |||
28 | Which are, in fact, 2 loops from UINT_MAX -1 to -1. This is where the | ||
29 | extra delay when booting the 'host' CPU is coming from. | ||
30 | |||
31 | Commit 43d1de32f8 already set a precedence for satp_mode.supported = 0 | ||
32 | in a different manner. We're doing the same here. If supported == 0, | ||
33 | interpret as 'the CPU wants the OS to handle satp mode alone' and skip | ||
34 | satp_mode_finalize(). | ||
35 | |||
36 | We'll also put a guard in satp_mode_max_from_map() to assert out if map | ||
37 | is 0 since the function is not ready to deal with it. | ||
38 | |||
39 | Cc: Alexandre Ghiti <alexghiti@rivosinc.com> | ||
40 | Fixes: 6f23aaeb9b ("riscv: Allow user to set the satp mode") | ||
41 | Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
42 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> | ||
43 | Message-ID: <20230817152903.694926-1-dbarboza@ventanamicro.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 44 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 45 | --- |
9 | target/riscv/helper.h | 33 ++ | 46 | target/riscv/cpu.c | 23 ++++++++++++++++++++--- |
10 | target/riscv/insn32.decode | 10 + | 47 | 1 file changed, 20 insertions(+), 3 deletions(-) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 16 + | ||
12 | target/riscv/vector_helper.c | 385 ++++++++++++++++++++++++ | ||
13 | 4 files changed, 444 insertions(+) | ||
14 | 48 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 49 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
16 | index XXXXXXX..XXXXXXX 100644 | 50 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 51 | --- a/target/riscv/cpu.c |
18 | +++ b/target/riscv/helper.h | 52 | +++ b/target/riscv/cpu.c |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vmv_v_x_b, void, ptr, i64, env, i32) | 53 | @@ -XXX,XX +XXX,XX @@ static uint8_t satp_mode_from_str(const char *satp_mode_str) |
20 | DEF_HELPER_4(vmv_v_x_h, void, ptr, i64, env, i32) | 54 | |
21 | DEF_HELPER_4(vmv_v_x_w, void, ptr, i64, env, i32) | 55 | uint8_t satp_mode_max_from_map(uint32_t map) |
22 | DEF_HELPER_4(vmv_v_x_d, void, ptr, i64, env, i32) | 56 | { |
57 | + /* | ||
58 | + * 'map = 0' will make us return (31 - 32), which C will | ||
59 | + * happily overflow to UINT_MAX. There's no good result to | ||
60 | + * return if 'map = 0' (e.g. returning 0 will be ambiguous | ||
61 | + * with the result for 'map = 1'). | ||
62 | + * | ||
63 | + * Assert out if map = 0. Callers will have to deal with | ||
64 | + * it outside of this function. | ||
65 | + */ | ||
66 | + g_assert(map > 0); | ||
23 | + | 67 | + |
24 | +DEF_HELPER_6(vsaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | 68 | /* map here has at least one bit set, so no problem with clz */ |
25 | +DEF_HELPER_6(vsaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 69 | return 31 - __builtin_clz(map); |
26 | +DEF_HELPER_6(vsaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 70 | } |
27 | +DEF_HELPER_6(vsaddu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | 71 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) |
28 | +DEF_HELPER_6(vsadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | 72 | static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) |
29 | +DEF_HELPER_6(vsadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 73 | { |
30 | +DEF_HELPER_6(vsadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 74 | bool rv32 = riscv_cpu_mxl(&cpu->env) == MXL_RV32; |
31 | +DEF_HELPER_6(vsadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | 75 | - uint8_t satp_mode_map_max; |
32 | +DEF_HELPER_6(vssubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | 76 | - uint8_t satp_mode_supported_max = |
33 | +DEF_HELPER_6(vssubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 77 | - satp_mode_max_from_map(cpu->cfg.satp_mode.supported); |
34 | +DEF_HELPER_6(vssubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 78 | + uint8_t satp_mode_map_max, satp_mode_supported_max; |
35 | +DEF_HELPER_6(vssubu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vssub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vssub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vssub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vssub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vsaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vsaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vsaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vsaddu_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vsadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vsadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vsadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vsadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
48 | +DEF_HELPER_6(vssubu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
49 | +DEF_HELPER_6(vssubu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
50 | +DEF_HELPER_6(vssubu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
51 | +DEF_HELPER_6(vssubu_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
52 | +DEF_HELPER_6(vssub_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
53 | +DEF_HELPER_6(vssub_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
54 | +DEF_HELPER_6(vssub_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
55 | +DEF_HELPER_6(vssub_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
56 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/riscv/insn32.decode | ||
59 | +++ b/target/riscv/insn32.decode | ||
60 | @@ -XXX,XX +XXX,XX @@ vmv_v_i 010111 1 00000 ..... 011 ..... 1010111 @r2 | ||
61 | vmerge_vvm 010111 0 ..... ..... 000 ..... 1010111 @r_vm_0 | ||
62 | vmerge_vxm 010111 0 ..... ..... 100 ..... 1010111 @r_vm_0 | ||
63 | vmerge_vim 010111 0 ..... ..... 011 ..... 1010111 @r_vm_0 | ||
64 | +vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm | ||
65 | +vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm | ||
66 | +vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm | ||
67 | +vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm | ||
68 | +vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm | ||
69 | +vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm | ||
70 | +vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm | ||
71 | +vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm | ||
72 | +vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm | ||
73 | +vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm | ||
74 | |||
75 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
76 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
77 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
80 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
81 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) | ||
82 | GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check) | ||
83 | GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check) | ||
84 | GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vadc_check) | ||
85 | + | 79 | + |
86 | +/* | 80 | + /* The CPU wants the OS to decide which satp mode to use */ |
87 | + *** Vector Fixed-Point Arithmetic Instructions | 81 | + if (cpu->cfg.satp_mode.supported == 0) { |
88 | + */ | 82 | + return; |
89 | + | ||
90 | +/* Vector Single-Width Saturating Add and Subtract */ | ||
91 | +GEN_OPIVV_TRANS(vsaddu_vv, opivv_check) | ||
92 | +GEN_OPIVV_TRANS(vsadd_vv, opivv_check) | ||
93 | +GEN_OPIVV_TRANS(vssubu_vv, opivv_check) | ||
94 | +GEN_OPIVV_TRANS(vssub_vv, opivv_check) | ||
95 | +GEN_OPIVX_TRANS(vsaddu_vx, opivx_check) | ||
96 | +GEN_OPIVX_TRANS(vsadd_vx, opivx_check) | ||
97 | +GEN_OPIVX_TRANS(vssubu_vx, opivx_check) | ||
98 | +GEN_OPIVX_TRANS(vssub_vx, opivx_check) | ||
99 | +GEN_OPIVI_TRANS(vsaddu_vi, 1, vsaddu_vx, opivx_check) | ||
100 | +GEN_OPIVI_TRANS(vsadd_vi, 0, vsadd_vx, opivx_check) | ||
101 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
102 | index XXXXXXX..XXXXXXX 100644 | ||
103 | --- a/target/riscv/vector_helper.c | ||
104 | +++ b/target/riscv/vector_helper.c | ||
105 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1, clearb) | ||
106 | GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh) | ||
107 | GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl) | ||
108 | GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq) | ||
109 | + | ||
110 | +/* | ||
111 | + *** Vector Fixed-Point Arithmetic Instructions | ||
112 | + */ | ||
113 | + | ||
114 | +/* Vector Single-Width Saturating Add and Subtract */ | ||
115 | + | ||
116 | +/* | ||
117 | + * As fixed point instructions probably have round mode and saturation, | ||
118 | + * define common macros for fixed point here. | ||
119 | + */ | ||
120 | +typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, | ||
121 | + CPURISCVState *env, int vxrm); | ||
122 | + | ||
123 | +#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
124 | +static inline void \ | ||
125 | +do_##NAME(void *vd, void *vs1, void *vs2, int i, \ | ||
126 | + CPURISCVState *env, int vxrm) \ | ||
127 | +{ \ | ||
128 | + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ | ||
129 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
130 | + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ | ||
131 | +} | ||
132 | + | ||
133 | +static inline void | ||
134 | +vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, | ||
135 | + CPURISCVState *env, | ||
136 | + uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm, | ||
137 | + opivv2_rm_fn *fn) | ||
138 | +{ | ||
139 | + for (uint32_t i = 0; i < vl; i++) { | ||
140 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | ||
141 | + continue; | ||
142 | + } | ||
143 | + fn(vd, vs1, vs2, i, env, vxrm); | ||
144 | + } | ||
145 | +} | ||
146 | + | ||
147 | +static inline void | ||
148 | +vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, | ||
149 | + CPURISCVState *env, | ||
150 | + uint32_t desc, uint32_t esz, uint32_t dsz, | ||
151 | + opivv2_rm_fn *fn, clear_fn *clearfn) | ||
152 | +{ | ||
153 | + uint32_t vlmax = vext_maxsz(desc) / esz; | ||
154 | + uint32_t mlen = vext_mlen(desc); | ||
155 | + uint32_t vm = vext_vm(desc); | ||
156 | + uint32_t vl = env->vl; | ||
157 | + | ||
158 | + switch (env->vxrm) { | ||
159 | + case 0: /* rnu */ | ||
160 | + vext_vv_rm_1(vd, v0, vs1, vs2, | ||
161 | + env, vl, vm, mlen, 0, fn); | ||
162 | + break; | ||
163 | + case 1: /* rne */ | ||
164 | + vext_vv_rm_1(vd, v0, vs1, vs2, | ||
165 | + env, vl, vm, mlen, 1, fn); | ||
166 | + break; | ||
167 | + case 2: /* rdn */ | ||
168 | + vext_vv_rm_1(vd, v0, vs1, vs2, | ||
169 | + env, vl, vm, mlen, 2, fn); | ||
170 | + break; | ||
171 | + default: /* rod */ | ||
172 | + vext_vv_rm_1(vd, v0, vs1, vs2, | ||
173 | + env, vl, vm, mlen, 3, fn); | ||
174 | + break; | ||
175 | + } | 83 | + } |
176 | + | 84 | + |
177 | + clearfn(vd, vl, vl * dsz, vlmax * dsz); | 85 | + satp_mode_supported_max = |
178 | +} | 86 | + satp_mode_max_from_map(cpu->cfg.satp_mode.supported); |
179 | + | 87 | |
180 | +/* generate helpers for fixed point instructions with OPIVV format */ | 88 | if (cpu->cfg.satp_mode.map == 0) { |
181 | +#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ, CLEAR_FN) \ | 89 | if (cpu->cfg.satp_mode.init == 0) { |
182 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
183 | + CPURISCVState *env, uint32_t desc) \ | ||
184 | +{ \ | ||
185 | + vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ | ||
186 | + do_##NAME, CLEAR_FN); \ | ||
187 | +} | ||
188 | + | ||
189 | +static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) | ||
190 | +{ | ||
191 | + uint8_t res = a + b; | ||
192 | + if (res < a) { | ||
193 | + res = UINT8_MAX; | ||
194 | + env->vxsat = 0x1; | ||
195 | + } | ||
196 | + return res; | ||
197 | +} | ||
198 | + | ||
199 | +static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, | ||
200 | + uint16_t b) | ||
201 | +{ | ||
202 | + uint16_t res = a + b; | ||
203 | + if (res < a) { | ||
204 | + res = UINT16_MAX; | ||
205 | + env->vxsat = 0x1; | ||
206 | + } | ||
207 | + return res; | ||
208 | +} | ||
209 | + | ||
210 | +static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, | ||
211 | + uint32_t b) | ||
212 | +{ | ||
213 | + uint32_t res = a + b; | ||
214 | + if (res < a) { | ||
215 | + res = UINT32_MAX; | ||
216 | + env->vxsat = 0x1; | ||
217 | + } | ||
218 | + return res; | ||
219 | +} | ||
220 | + | ||
221 | +static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, | ||
222 | + uint64_t b) | ||
223 | +{ | ||
224 | + uint64_t res = a + b; | ||
225 | + if (res < a) { | ||
226 | + res = UINT64_MAX; | ||
227 | + env->vxsat = 0x1; | ||
228 | + } | ||
229 | + return res; | ||
230 | +} | ||
231 | + | ||
232 | +RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) | ||
233 | +RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) | ||
234 | +RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) | ||
235 | +RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) | ||
236 | +GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1, clearb) | ||
237 | +GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2, clearh) | ||
238 | +GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4, clearl) | ||
239 | +GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8, clearq) | ||
240 | + | ||
241 | +typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, | ||
242 | + CPURISCVState *env, int vxrm); | ||
243 | + | ||
244 | +#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
245 | +static inline void \ | ||
246 | +do_##NAME(void *vd, target_long s1, void *vs2, int i, \ | ||
247 | + CPURISCVState *env, int vxrm) \ | ||
248 | +{ \ | ||
249 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
250 | + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ | ||
251 | +} | ||
252 | + | ||
253 | +static inline void | ||
254 | +vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, | ||
255 | + CPURISCVState *env, | ||
256 | + uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm, | ||
257 | + opivx2_rm_fn *fn) | ||
258 | +{ | ||
259 | + for (uint32_t i = 0; i < vl; i++) { | ||
260 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | ||
261 | + continue; | ||
262 | + } | ||
263 | + fn(vd, s1, vs2, i, env, vxrm); | ||
264 | + } | ||
265 | +} | ||
266 | + | ||
267 | +static inline void | ||
268 | +vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, | ||
269 | + CPURISCVState *env, | ||
270 | + uint32_t desc, uint32_t esz, uint32_t dsz, | ||
271 | + opivx2_rm_fn *fn, clear_fn *clearfn) | ||
272 | +{ | ||
273 | + uint32_t vlmax = vext_maxsz(desc) / esz; | ||
274 | + uint32_t mlen = vext_mlen(desc); | ||
275 | + uint32_t vm = vext_vm(desc); | ||
276 | + uint32_t vl = env->vl; | ||
277 | + | ||
278 | + switch (env->vxrm) { | ||
279 | + case 0: /* rnu */ | ||
280 | + vext_vx_rm_1(vd, v0, s1, vs2, | ||
281 | + env, vl, vm, mlen, 0, fn); | ||
282 | + break; | ||
283 | + case 1: /* rne */ | ||
284 | + vext_vx_rm_1(vd, v0, s1, vs2, | ||
285 | + env, vl, vm, mlen, 1, fn); | ||
286 | + break; | ||
287 | + case 2: /* rdn */ | ||
288 | + vext_vx_rm_1(vd, v0, s1, vs2, | ||
289 | + env, vl, vm, mlen, 2, fn); | ||
290 | + break; | ||
291 | + default: /* rod */ | ||
292 | + vext_vx_rm_1(vd, v0, s1, vs2, | ||
293 | + env, vl, vm, mlen, 3, fn); | ||
294 | + break; | ||
295 | + } | ||
296 | + | ||
297 | + clearfn(vd, vl, vl * dsz, vlmax * dsz); | ||
298 | +} | ||
299 | + | ||
300 | +/* generate helpers for fixed point instructions with OPIVX format */ | ||
301 | +#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ, CLEAR_FN) \ | ||
302 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
303 | + void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
304 | +{ \ | ||
305 | + vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ | ||
306 | + do_##NAME, CLEAR_FN); \ | ||
307 | +} | ||
308 | + | ||
309 | +RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) | ||
310 | +RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) | ||
311 | +RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) | ||
312 | +RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) | ||
313 | +GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1, clearb) | ||
314 | +GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2, clearh) | ||
315 | +GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4, clearl) | ||
316 | +GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8, clearq) | ||
317 | + | ||
318 | +static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
319 | +{ | ||
320 | + int8_t res = a + b; | ||
321 | + if ((res ^ a) & (res ^ b) & INT8_MIN) { | ||
322 | + res = a > 0 ? INT8_MAX : INT8_MIN; | ||
323 | + env->vxsat = 0x1; | ||
324 | + } | ||
325 | + return res; | ||
326 | +} | ||
327 | + | ||
328 | +static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) | ||
329 | +{ | ||
330 | + int16_t res = a + b; | ||
331 | + if ((res ^ a) & (res ^ b) & INT16_MIN) { | ||
332 | + res = a > 0 ? INT16_MAX : INT16_MIN; | ||
333 | + env->vxsat = 0x1; | ||
334 | + } | ||
335 | + return res; | ||
336 | +} | ||
337 | + | ||
338 | +static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) | ||
339 | +{ | ||
340 | + int32_t res = a + b; | ||
341 | + if ((res ^ a) & (res ^ b) & INT32_MIN) { | ||
342 | + res = a > 0 ? INT32_MAX : INT32_MIN; | ||
343 | + env->vxsat = 0x1; | ||
344 | + } | ||
345 | + return res; | ||
346 | +} | ||
347 | + | ||
348 | +static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) | ||
349 | +{ | ||
350 | + int64_t res = a + b; | ||
351 | + if ((res ^ a) & (res ^ b) & INT64_MIN) { | ||
352 | + res = a > 0 ? INT64_MAX : INT64_MIN; | ||
353 | + env->vxsat = 0x1; | ||
354 | + } | ||
355 | + return res; | ||
356 | +} | ||
357 | + | ||
358 | +RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) | ||
359 | +RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) | ||
360 | +RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) | ||
361 | +RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) | ||
362 | +GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1, clearb) | ||
363 | +GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2, clearh) | ||
364 | +GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4, clearl) | ||
365 | +GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8, clearq) | ||
366 | + | ||
367 | +RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) | ||
368 | +RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) | ||
369 | +RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) | ||
370 | +RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) | ||
371 | +GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1, clearb) | ||
372 | +GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2, clearh) | ||
373 | +GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4, clearl) | ||
374 | +GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8, clearq) | ||
375 | + | ||
376 | +static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) | ||
377 | +{ | ||
378 | + uint8_t res = a - b; | ||
379 | + if (res > a) { | ||
380 | + res = 0; | ||
381 | + env->vxsat = 0x1; | ||
382 | + } | ||
383 | + return res; | ||
384 | +} | ||
385 | + | ||
386 | +static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, | ||
387 | + uint16_t b) | ||
388 | +{ | ||
389 | + uint16_t res = a - b; | ||
390 | + if (res > a) { | ||
391 | + res = 0; | ||
392 | + env->vxsat = 0x1; | ||
393 | + } | ||
394 | + return res; | ||
395 | +} | ||
396 | + | ||
397 | +static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, | ||
398 | + uint32_t b) | ||
399 | +{ | ||
400 | + uint32_t res = a - b; | ||
401 | + if (res > a) { | ||
402 | + res = 0; | ||
403 | + env->vxsat = 0x1; | ||
404 | + } | ||
405 | + return res; | ||
406 | +} | ||
407 | + | ||
408 | +static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, | ||
409 | + uint64_t b) | ||
410 | +{ | ||
411 | + uint64_t res = a - b; | ||
412 | + if (res > a) { | ||
413 | + res = 0; | ||
414 | + env->vxsat = 0x1; | ||
415 | + } | ||
416 | + return res; | ||
417 | +} | ||
418 | + | ||
419 | +RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) | ||
420 | +RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) | ||
421 | +RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) | ||
422 | +RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) | ||
423 | +GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1, clearb) | ||
424 | +GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2, clearh) | ||
425 | +GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4, clearl) | ||
426 | +GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8, clearq) | ||
427 | + | ||
428 | +RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) | ||
429 | +RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) | ||
430 | +RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) | ||
431 | +RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) | ||
432 | +GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1, clearb) | ||
433 | +GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2, clearh) | ||
434 | +GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4, clearl) | ||
435 | +GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8, clearq) | ||
436 | + | ||
437 | +static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
438 | +{ | ||
439 | + int8_t res = a - b; | ||
440 | + if ((res ^ a) & (a ^ b) & INT8_MIN) { | ||
441 | + res = a > 0 ? INT8_MAX : INT8_MIN; | ||
442 | + env->vxsat = 0x1; | ||
443 | + } | ||
444 | + return res; | ||
445 | +} | ||
446 | + | ||
447 | +static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) | ||
448 | +{ | ||
449 | + int16_t res = a - b; | ||
450 | + if ((res ^ a) & (a ^ b) & INT16_MIN) { | ||
451 | + res = a > 0 ? INT16_MAX : INT16_MIN; | ||
452 | + env->vxsat = 0x1; | ||
453 | + } | ||
454 | + return res; | ||
455 | +} | ||
456 | + | ||
457 | +static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) | ||
458 | +{ | ||
459 | + int32_t res = a - b; | ||
460 | + if ((res ^ a) & (a ^ b) & INT32_MIN) { | ||
461 | + res = a > 0 ? INT32_MAX : INT32_MIN; | ||
462 | + env->vxsat = 0x1; | ||
463 | + } | ||
464 | + return res; | ||
465 | +} | ||
466 | + | ||
467 | +static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) | ||
468 | +{ | ||
469 | + int64_t res = a - b; | ||
470 | + if ((res ^ a) & (a ^ b) & INT64_MIN) { | ||
471 | + res = a > 0 ? INT64_MAX : INT64_MIN; | ||
472 | + env->vxsat = 0x1; | ||
473 | + } | ||
474 | + return res; | ||
475 | +} | ||
476 | + | ||
477 | +RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) | ||
478 | +RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) | ||
479 | +RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) | ||
480 | +RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) | ||
481 | +GEN_VEXT_VV_RM(vssub_vv_b, 1, 1, clearb) | ||
482 | +GEN_VEXT_VV_RM(vssub_vv_h, 2, 2, clearh) | ||
483 | +GEN_VEXT_VV_RM(vssub_vv_w, 4, 4, clearl) | ||
484 | +GEN_VEXT_VV_RM(vssub_vv_d, 8, 8, clearq) | ||
485 | + | ||
486 | +RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) | ||
487 | +RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) | ||
488 | +RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) | ||
489 | +RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) | ||
490 | +GEN_VEXT_VX_RM(vssub_vx_b, 1, 1, clearb) | ||
491 | +GEN_VEXT_VX_RM(vssub_vx_h, 2, 2, clearh) | ||
492 | +GEN_VEXT_VX_RM(vssub_vx_w, 4, 4, clearl) | ||
493 | +GEN_VEXT_VX_RM(vssub_vx_d, 8, 8, clearq) | ||
494 | -- | 90 | -- |
495 | 2.27.0 | 91 | 2.41.0 |
496 | |||
497 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Vineet Gupta <vineetg@rivosinc.com> |
---|---|---|---|
2 | 2 | ||
3 | vlen is the vector register length in bits. | 3 | zicond is now codegen supported in both llvm and gcc. |
4 | elen is the max element size in bits. | ||
5 | vext_spec is the vector specification version, default value is v0.7.1. | ||
6 | 4 | ||
7 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 5 | This change allows seamless enabling/testing of zicond in downstream |
6 | projects. e.g. currently riscv-gnu-toolchain parses elf attributes | ||
7 | to create a cmdline for qemu but fails short of enabling it because of | ||
8 | the "x-" prefix. | ||
9 | |||
10 | Signed-off-by: Vineet Gupta <vineetg@rivosinc.com> | ||
11 | Message-ID: <20230808181715.436395-1-vineetg@rivosinc.com> | ||
8 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 12 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> |
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Message-id: 20200623215920.2594-3-zhiwei_liu@c-sky.com | ||
11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 13 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
12 | --- | 14 | --- |
13 | target/riscv/cpu.h | 5 +++++ | 15 | target/riscv/cpu.c | 2 +- |
14 | target/riscv/cpu.c | 7 +++++++ | 16 | 1 file changed, 1 insertion(+), 1 deletion(-) |
15 | 2 files changed, 12 insertions(+) | ||
16 | 17 | ||
17 | diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/riscv/cpu.h | ||
20 | +++ b/target/riscv/cpu.h | ||
21 | @@ -XXX,XX +XXX,XX @@ enum { | ||
22 | #define PRIV_VERSION_1_10_0 0x00011000 | ||
23 | #define PRIV_VERSION_1_11_0 0x00011100 | ||
24 | |||
25 | +#define VEXT_VERSION_0_07_1 0x00000701 | ||
26 | + | ||
27 | #define TRANSLATE_PMP_FAIL 2 | ||
28 | #define TRANSLATE_FAIL 1 | ||
29 | #define TRANSLATE_SUCCESS 0 | ||
30 | @@ -XXX,XX +XXX,XX @@ struct CPURISCVState { | ||
31 | target_ulong guest_phys_fault_addr; | ||
32 | |||
33 | target_ulong priv_ver; | ||
34 | + target_ulong vext_ver; | ||
35 | target_ulong misa; | ||
36 | target_ulong misa_mask; | ||
37 | |||
38 | @@ -XXX,XX +XXX,XX @@ typedef struct RISCVCPU { | ||
39 | |||
40 | char *priv_spec; | ||
41 | char *user_spec; | ||
42 | + uint16_t vlen; | ||
43 | + uint16_t elen; | ||
44 | bool mmu; | ||
45 | bool pmp; | ||
46 | } cfg; | ||
47 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | 18 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c |
48 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
49 | --- a/target/riscv/cpu.c | 20 | --- a/target/riscv/cpu.c |
50 | +++ b/target/riscv/cpu.c | 21 | +++ b/target/riscv/cpu.c |
51 | @@ -XXX,XX +XXX,XX @@ static void set_priv_version(CPURISCVState *env, int priv_ver) | 22 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { |
52 | env->priv_ver = priv_ver; | 23 | DEFINE_PROP_BOOL("zcf", RISCVCPU, cfg.ext_zcf, false), |
53 | } | 24 | DEFINE_PROP_BOOL("zcmp", RISCVCPU, cfg.ext_zcmp, false), |
54 | 25 | DEFINE_PROP_BOOL("zcmt", RISCVCPU, cfg.ext_zcmt, false), | |
55 | +static void set_vext_version(CPURISCVState *env, int vext_ver) | 26 | + DEFINE_PROP_BOOL("zicond", RISCVCPU, cfg.ext_zicond, false), |
56 | +{ | 27 | |
57 | + env->vext_ver = vext_ver; | 28 | /* Vendor-specific custom extensions */ |
58 | +} | 29 | DEFINE_PROP_BOOL("xtheadba", RISCVCPU, cfg.ext_xtheadba, false), |
59 | + | 30 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_extensions[] = { |
60 | static void set_feature(CPURISCVState *env, int feature) | 31 | DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false), |
61 | { | 32 | |
62 | env->features |= (1ULL << feature); | 33 | /* These are experimental so mark with 'x-' */ |
63 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) | 34 | - DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false), |
64 | CPURISCVState *env = &cpu->env; | 35 | |
65 | RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev); | 36 | /* ePMP 0.9.3 */ |
66 | int priv_version = PRIV_VERSION_1_11_0; | 37 | DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false), |
67 | + int vext_version = VEXT_VERSION_0_07_1; | ||
68 | target_ulong target_misa = 0; | ||
69 | Error *local_err = NULL; | ||
70 | |||
71 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) | ||
72 | } | ||
73 | |||
74 | set_priv_version(env, priv_version); | ||
75 | + set_vext_version(env, vext_version); | ||
76 | |||
77 | if (cpu->cfg.mmu) { | ||
78 | set_feature(env, RISCV_FEATURE_MMU); | ||
79 | -- | 38 | -- |
80 | 2.27.0 | 39 | 2.41.0 |
81 | |||
82 | diff view generated by jsdifflib |
1 | From: Jessica Clarke <jrtc27@jrtc27.com> | 1 | From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
---|---|---|---|
2 | 2 | ||
3 | Claiming an interrupt and changing the source priority both potentially | 3 | A build with --enable-debug and without KVM will fail as follows: |
4 | affect whether an interrupt is pending, thus we must re-compute xEIP. | ||
5 | Note that we don't put the sifive_plic_update inside sifive_plic_claim | ||
6 | so that the logging of a claim (and the resulting IRQ) happens before | ||
7 | the state update, making the causal effect clear, and that we drop the | ||
8 | explicit call to sifive_plic_print_state when claiming since | ||
9 | sifive_plic_update already does that automatically at the end for us. | ||
10 | 4 | ||
11 | This can result in both spurious interrupt storms if you fail to | 5 | /usr/bin/ld: libqemu-riscv64-softmmu.fa.p/hw_riscv_virt.c.o: in function `virt_machine_init': |
12 | complete an IRQ before enabling interrupts (and no other actions occur | 6 | ./qemu/build/../hw/riscv/virt.c:1465: undefined reference to `kvm_riscv_aia_create' |
13 | that result in a call to sifive_plic_update), but also more importantly | ||
14 | lost interrupts if a disabled interrupt is pending and then becomes | ||
15 | enabled. | ||
16 | 7 | ||
17 | Signed-off-by: Jessica Clarke <jrtc27@jrtc27.com> | 8 | This happens because the code block with "if virt_use_kvm_aia(s)" isn't |
18 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 9 | being ignored by the debug build, resulting in an undefined reference to |
19 | Message-id: 20200618210649.22451-1-jrtc27@jrtc27.com | 10 | a KVM only function. |
20 | Message-Id: <20200618210649.22451-1-jrtc27@jrtc27.com> | 11 | |
12 | Add a 'kvm_enabled()' conditional together with virt_use_kvm_aia() will | ||
13 | make the compiler crop the kvm_riscv_aia_create() call entirely from a | ||
14 | non-KVM build. Note that adding the 'kvm_enabled()' conditional inside | ||
15 | virt_use_kvm_aia() won't fix the build because this function would need | ||
16 | to be inlined multiple times to make the compiler zero out the entire | ||
17 | block. | ||
18 | |||
19 | While we're at it, use kvm_enabled() in all instances where | ||
20 | virt_use_kvm_aia() is checked to allow the compiler to elide these other | ||
21 | kvm-only instances as well. | ||
22 | |||
23 | Suggested-by: Richard Henderson <richard.henderson@linaro.org> | ||
24 | Fixes: dbdb99948e ("target/riscv: select KVM AIA in riscv virt machine") | ||
25 | Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
26 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> | ||
27 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
28 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
29 | Message-ID: <20230830133503.711138-2-dbarboza@ventanamicro.com> | ||
21 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 30 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
22 | --- | 31 | --- |
23 | hw/riscv/sifive_plic.c | 3 ++- | 32 | hw/riscv/virt.c | 6 +++--- |
24 | 1 file changed, 2 insertions(+), 1 deletion(-) | 33 | 1 file changed, 3 insertions(+), 3 deletions(-) |
25 | 34 | ||
26 | diff --git a/hw/riscv/sifive_plic.c b/hw/riscv/sifive_plic.c | 35 | diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c |
27 | index XXXXXXX..XXXXXXX 100644 | 36 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/hw/riscv/sifive_plic.c | 37 | --- a/hw/riscv/virt.c |
29 | +++ b/hw/riscv/sifive_plic.c | 38 | +++ b/hw/riscv/virt.c |
30 | @@ -XXX,XX +XXX,XX @@ static uint64_t sifive_plic_read(void *opaque, hwaddr addr, unsigned size) | 39 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, |
31 | plic->addr_config[addrid].hartid, | 40 | } |
32 | mode_to_char(plic->addr_config[addrid].mode), | 41 | |
33 | value); | 42 | /* KVM AIA only has one APLIC instance */ |
34 | - sifive_plic_print_state(plic); | 43 | - if (virt_use_kvm_aia(s)) { |
35 | } | 44 | + if (kvm_enabled() && virt_use_kvm_aia(s)) { |
36 | + sifive_plic_update(plic); | 45 | create_fdt_socket_aplic(s, memmap, 0, |
37 | return value; | 46 | msi_m_phandle, msi_s_phandle, phandle, |
47 | &intc_phandles[0], xplic_phandles, | ||
48 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, | ||
49 | |||
50 | g_free(intc_phandles); | ||
51 | |||
52 | - if (virt_use_kvm_aia(s)) { | ||
53 | + if (kvm_enabled() && virt_use_kvm_aia(s)) { | ||
54 | *irq_mmio_phandle = xplic_phandles[0]; | ||
55 | *irq_virtio_phandle = xplic_phandles[0]; | ||
56 | *irq_pcie_phandle = xplic_phandles[0]; | ||
57 | @@ -XXX,XX +XXX,XX @@ static void virt_machine_init(MachineState *machine) | ||
38 | } | 58 | } |
39 | } | 59 | } |
40 | @@ -XXX,XX +XXX,XX @@ static void sifive_plic_write(void *opaque, hwaddr addr, uint64_t value, | 60 | |
41 | qemu_log("plic: write priority: irq=%d priority=%d\n", | 61 | - if (virt_use_kvm_aia(s)) { |
42 | irq, plic->source_priority[irq]); | 62 | + if (kvm_enabled() && virt_use_kvm_aia(s)) { |
43 | } | 63 | kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT, |
44 | + sifive_plic_update(plic); | 64 | VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS, |
45 | return; | 65 | memmap[VIRT_APLIC_S].base, |
46 | } else if (addr >= plic->pending_base && /* 1 bit per source */ | ||
47 | addr < plic->pending_base + (plic->num_sources >> 3)) | ||
48 | -- | 66 | -- |
49 | 2.27.0 | 67 | 2.41.0 |
50 | 68 | ||
51 | 69 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Commit 6df0b37e2ab breaks a --enable-debug build in a non-KVM |
4 | environment with the following error: | ||
5 | |||
6 | /usr/bin/ld: libqemu-riscv64-softmmu.fa.p/hw_intc_riscv_aplic.c.o: in function `riscv_kvm_aplic_request': | ||
7 | ./qemu/build/../hw/intc/riscv_aplic.c:486: undefined reference to `kvm_set_irq' | ||
8 | collect2: error: ld returned 1 exit status | ||
9 | |||
10 | This happens because the debug build will poke into the | ||
11 | 'if (is_kvm_aia(aplic->msimode))' block and fail to find a reference to | ||
12 | the KVM only function riscv_kvm_aplic_request(). | ||
13 | |||
14 | There are multiple solutions to fix this. We'll go with the same | ||
15 | solution from the previous patch, i.e. add a kvm_enabled() conditional | ||
16 | to filter out the block. But there's a catch: riscv_kvm_aplic_request() | ||
17 | is a local function that would end up being used if the compiler crops | ||
18 | the block, and this won't work. Quoting Richard Henderson's explanation | ||
19 | in [1]: | ||
20 | |||
21 | "(...) the compiler won't eliminate entire unused functions with -O0" | ||
22 | |||
23 | We'll solve it by moving riscv_kvm_aplic_request() to kvm.c and add its | ||
24 | declaration in kvm_riscv.h, where all other KVM specific public | ||
25 | functions are already declared. Other archs handles KVM specific code in | ||
26 | this manner and we expect to do the same from now on. | ||
27 | |||
28 | [1] https://lore.kernel.org/qemu-riscv/d2f1ad02-eb03-138f-9d08-db676deeed05@linaro.org/ | ||
29 | |||
30 | Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
31 | Reviewed-by: Andrew Jones <ajones@ventanamicro.com> | ||
32 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 33 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 34 | Message-ID: <20230830133503.711138-3-dbarboza@ventanamicro.com> |
6 | Message-id: 20200623215920.2594-39-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 35 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 36 | --- |
9 | target/riscv/helper.h | 19 ++++++ | 37 | target/riscv/kvm_riscv.h | 1 + |
10 | target/riscv/insn32.decode | 6 ++ | 38 | hw/intc/riscv_aplic.c | 8 ++------ |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 8 +++ | 39 | target/riscv/kvm.c | 5 +++++ |
12 | target/riscv/vector_helper.c | 85 +++++++++++++++++++++++++ | 40 | 3 files changed, 8 insertions(+), 6 deletions(-) |
13 | 4 files changed, 118 insertions(+) | ||
14 | 41 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 42 | diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h |
16 | index XXXXXXX..XXXXXXX 100644 | 43 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 44 | --- a/target/riscv/kvm_riscv.h |
18 | +++ b/target/riscv/helper.h | 45 | +++ b/target/riscv/kvm_riscv.h |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfmin_vf_d, void, ptr, ptr, i64, ptr, env, i32) | 46 | @@ -XXX,XX +XXX,XX @@ void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, |
20 | DEF_HELPER_6(vfmax_vf_h, void, ptr, ptr, i64, ptr, env, i32) | 47 | uint64_t aia_irq_num, uint64_t aia_msi_num, |
21 | DEF_HELPER_6(vfmax_vf_w, void, ptr, ptr, i64, ptr, env, i32) | 48 | uint64_t aplic_base, uint64_t imsic_base, |
22 | DEF_HELPER_6(vfmax_vf_d, void, ptr, ptr, i64, ptr, env, i32) | 49 | uint64_t guest_num); |
23 | + | 50 | +void riscv_kvm_aplic_request(void *opaque, int irq, int level); |
24 | +DEF_HELPER_6(vfsgnj_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 51 | |
25 | +DEF_HELPER_6(vfsgnj_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 52 | #endif |
26 | +DEF_HELPER_6(vfsgnj_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | 53 | diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c |
27 | +DEF_HELPER_6(vfsgnjn_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vfsgnjn_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vfsgnjn_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vfsgnjx_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vfsgnjx_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vfsgnjx_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vfsgnj_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vfsgnj_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vfsgnj_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vfsgnjn_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vfsgnjn_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vfsgnjn_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vfsgnjx_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vfsgnjx_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vfsgnjx_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
42 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
43 | index XXXXXXX..XXXXXXX 100644 | 54 | index XXXXXXX..XXXXXXX 100644 |
44 | --- a/target/riscv/insn32.decode | 55 | --- a/hw/intc/riscv_aplic.c |
45 | +++ b/target/riscv/insn32.decode | 56 | +++ b/hw/intc/riscv_aplic.c |
46 | @@ -XXX,XX +XXX,XX @@ vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm | 57 | @@ -XXX,XX +XXX,XX @@ |
47 | vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm | 58 | #include "target/riscv/cpu.h" |
48 | vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm | 59 | #include "sysemu/sysemu.h" |
49 | vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm | 60 | #include "sysemu/kvm.h" |
50 | +vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm | 61 | +#include "kvm_riscv.h" |
51 | +vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm | 62 | #include "migration/vmstate.h" |
52 | +vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm | 63 | |
53 | +vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm | 64 | #define APLIC_MAX_IDC (1UL << 14) |
54 | +vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm | 65 | @@ -XXX,XX +XXX,XX @@ static uint32_t riscv_aplic_idc_claimi(RISCVAPLICState *aplic, uint32_t idc) |
55 | +vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm | 66 | return topi; |
56 | 67 | } | |
57 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | 68 | |
58 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | 69 | -static void riscv_kvm_aplic_request(void *opaque, int irq, int level) |
59 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | 70 | -{ |
71 | - kvm_set_irq(kvm_state, irq, !!level); | ||
72 | -} | ||
73 | - | ||
74 | static void riscv_aplic_request(void *opaque, int irq, int level) | ||
75 | { | ||
76 | bool update = false; | ||
77 | @@ -XXX,XX +XXX,XX @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp) | ||
78 | * have IRQ lines delegated by their parent APLIC. | ||
79 | */ | ||
80 | if (!aplic->parent) { | ||
81 | - if (is_kvm_aia(aplic->msimode)) { | ||
82 | + if (kvm_enabled() && is_kvm_aia(aplic->msimode)) { | ||
83 | qdev_init_gpio_in(dev, riscv_kvm_aplic_request, aplic->num_irqs); | ||
84 | } else { | ||
85 | qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs); | ||
86 | diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c | ||
60 | index XXXXXXX..XXXXXXX 100644 | 87 | index XXXXXXX..XXXXXXX 100644 |
61 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | 88 | --- a/target/riscv/kvm.c |
62 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | 89 | +++ b/target/riscv/kvm.c |
63 | @@ -XXX,XX +XXX,XX @@ GEN_OPFVV_TRANS(vfmin_vv, opfvv_check) | 90 | @@ -XXX,XX +XXX,XX @@ |
64 | GEN_OPFVV_TRANS(vfmax_vv, opfvv_check) | 91 | #include "sysemu/runstate.h" |
65 | GEN_OPFVF_TRANS(vfmin_vf, opfvf_check) | 92 | #include "hw/riscv/numa.h" |
66 | GEN_OPFVF_TRANS(vfmax_vf, opfvf_check) | 93 | |
67 | + | 94 | +void riscv_kvm_aplic_request(void *opaque, int irq, int level) |
68 | +/* Vector Floating-Point Sign-Injection Instructions */ | ||
69 | +GEN_OPFVV_TRANS(vfsgnj_vv, opfvv_check) | ||
70 | +GEN_OPFVV_TRANS(vfsgnjn_vv, opfvv_check) | ||
71 | +GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check) | ||
72 | +GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check) | ||
73 | +GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check) | ||
74 | +GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check) | ||
75 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/target/riscv/vector_helper.c | ||
78 | +++ b/target/riscv/vector_helper.c | ||
79 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) | ||
80 | GEN_VEXT_VF(vfmax_vf_h, 2, 2, clearh) | ||
81 | GEN_VEXT_VF(vfmax_vf_w, 4, 4, clearl) | ||
82 | GEN_VEXT_VF(vfmax_vf_d, 8, 8, clearq) | ||
83 | + | ||
84 | +/* Vector Floating-Point Sign-Injection Instructions */ | ||
85 | +static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) | ||
86 | +{ | 95 | +{ |
87 | + return deposit64(b, 0, 15, a); | 96 | + kvm_set_irq(kvm_state, irq, !!level); |
88 | +} | 97 | +} |
89 | + | 98 | + |
90 | +static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) | 99 | static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, |
91 | +{ | 100 | uint64_t idx) |
92 | + return deposit64(b, 0, 31, a); | 101 | { |
93 | +} | ||
94 | + | ||
95 | +static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) | ||
96 | +{ | ||
97 | + return deposit64(b, 0, 63, a); | ||
98 | +} | ||
99 | + | ||
100 | +RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) | ||
101 | +RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) | ||
102 | +RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) | ||
103 | +GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2, clearh) | ||
104 | +GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4, clearl) | ||
105 | +GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8, clearq) | ||
106 | +RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) | ||
107 | +RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) | ||
108 | +RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) | ||
109 | +GEN_VEXT_VF(vfsgnj_vf_h, 2, 2, clearh) | ||
110 | +GEN_VEXT_VF(vfsgnj_vf_w, 4, 4, clearl) | ||
111 | +GEN_VEXT_VF(vfsgnj_vf_d, 8, 8, clearq) | ||
112 | + | ||
113 | +static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) | ||
114 | +{ | ||
115 | + return deposit64(~b, 0, 15, a); | ||
116 | +} | ||
117 | + | ||
118 | +static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) | ||
119 | +{ | ||
120 | + return deposit64(~b, 0, 31, a); | ||
121 | +} | ||
122 | + | ||
123 | +static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) | ||
124 | +{ | ||
125 | + return deposit64(~b, 0, 63, a); | ||
126 | +} | ||
127 | + | ||
128 | +RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) | ||
129 | +RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) | ||
130 | +RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) | ||
131 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2, clearh) | ||
132 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4, clearl) | ||
133 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8, clearq) | ||
134 | +RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) | ||
135 | +RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) | ||
136 | +RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) | ||
137 | +GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2, clearh) | ||
138 | +GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4, clearl) | ||
139 | +GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8, clearq) | ||
140 | + | ||
141 | +static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) | ||
142 | +{ | ||
143 | + return deposit64(b ^ a, 0, 15, a); | ||
144 | +} | ||
145 | + | ||
146 | +static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) | ||
147 | +{ | ||
148 | + return deposit64(b ^ a, 0, 31, a); | ||
149 | +} | ||
150 | + | ||
151 | +static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) | ||
152 | +{ | ||
153 | + return deposit64(b ^ a, 0, 63, a); | ||
154 | +} | ||
155 | + | ||
156 | +RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) | ||
157 | +RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) | ||
158 | +RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) | ||
159 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2, clearh) | ||
160 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4, clearl) | ||
161 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8, clearq) | ||
162 | +RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) | ||
163 | +RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) | ||
164 | +RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) | ||
165 | +GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2, clearh) | ||
166 | +GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4, clearl) | ||
167 | +GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8, clearq) | ||
168 | -- | 102 | -- |
169 | 2.27.0 | 103 | 2.41.0 |
170 | 104 | ||
171 | 105 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Robbin Ehn <rehn@rivosinc.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | This patch adds the new extensions in |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | linux 6.5 to the hwprobe syscall. |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 5 | |
6 | Message-id: 20200623215920.2594-37-zhiwei_liu@c-sky.com | 6 | And fixes RVC check to OR with correct value. |
7 | The previous variable contains 0 therefore it | ||
8 | did work. | ||
9 | |||
10 | Signed-off-by: Robbin Ehn <rehn@rivosinc.com> | ||
11 | Acked-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | Message-ID: <bc82203b72d7efb30f1b4a8f9eb3d94699799dc8.camel@rivosinc.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 14 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 15 | --- |
9 | target/riscv/helper.h | 4 +++ | 16 | linux-user/syscall.c | 14 +++++++++++++- |
10 | target/riscv/insn32.decode | 3 ++ | 17 | 1 file changed, 13 insertions(+), 1 deletion(-) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 43 +++++++++++++++++++++++++ | ||
12 | target/riscv/vector_helper.c | 43 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 93 insertions(+) | ||
14 | 18 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 19 | diff --git a/linux-user/syscall.c b/linux-user/syscall.c |
16 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 21 | --- a/linux-user/syscall.c |
18 | +++ b/target/riscv/helper.h | 22 | +++ b/linux-user/syscall.c |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) | 23 | @@ -XXX,XX +XXX,XX @@ static int do_getdents64(abi_long dirfd, abi_long arg2, abi_long count) |
20 | DEF_HELPER_6(vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) | 24 | #define RISCV_HWPROBE_KEY_IMA_EXT_0 4 |
21 | DEF_HELPER_6(vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) | 25 | #define RISCV_HWPROBE_IMA_FD (1 << 0) |
22 | DEF_HELPER_6(vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) | 26 | #define RISCV_HWPROBE_IMA_C (1 << 1) |
23 | + | 27 | +#define RISCV_HWPROBE_IMA_V (1 << 2) |
24 | +DEF_HELPER_5(vfsqrt_v_h, void, ptr, ptr, ptr, env, i32) | 28 | +#define RISCV_HWPROBE_EXT_ZBA (1 << 3) |
25 | +DEF_HELPER_5(vfsqrt_v_w, void, ptr, ptr, ptr, env, i32) | 29 | +#define RISCV_HWPROBE_EXT_ZBB (1 << 4) |
26 | +DEF_HELPER_5(vfsqrt_v_d, void, ptr, ptr, ptr, env, i32) | 30 | +#define RISCV_HWPROBE_EXT_ZBS (1 << 5) |
27 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 31 | |
28 | index XXXXXXX..XXXXXXX 100644 | 32 | #define RISCV_HWPROBE_KEY_CPUPERF_0 5 |
29 | --- a/target/riscv/insn32.decode | 33 | #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) |
30 | +++ b/target/riscv/insn32.decode | 34 | @@ -XXX,XX +XXX,XX @@ static void risc_hwprobe_fill_pairs(CPURISCVState *env, |
31 | @@ -XXX,XX +XXX,XX @@ | 35 | riscv_has_ext(env, RVD) ? |
32 | &shift shamt rs1 rd | 36 | RISCV_HWPROBE_IMA_FD : 0; |
33 | &atomic aq rl rs2 rs1 rd | 37 | value |= riscv_has_ext(env, RVC) ? |
34 | &rmrr vm rd rs1 rs2 | 38 | - RISCV_HWPROBE_IMA_C : pair->value; |
35 | +&rmr vm rd rs2 | 39 | + RISCV_HWPROBE_IMA_C : 0; |
36 | &rwdvm vm wd rd rs1 rs2 | 40 | + value |= riscv_has_ext(env, RVV) ? |
37 | &r2nfvm vm rd rs1 nf | 41 | + RISCV_HWPROBE_IMA_V : 0; |
38 | &rnfvm vm rd rs1 rs2 nf | 42 | + value |= cfg->ext_zba ? |
39 | @@ -XXX,XX +XXX,XX @@ | 43 | + RISCV_HWPROBE_EXT_ZBA : 0; |
40 | @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd | 44 | + value |= cfg->ext_zbb ? |
41 | @r2 ....... ..... ..... ... ..... ....... %rs1 %rd | 45 | + RISCV_HWPROBE_EXT_ZBB : 0; |
42 | @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd | 46 | + value |= cfg->ext_zbs ? |
43 | +@r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd | 47 | + RISCV_HWPROBE_EXT_ZBS : 0; |
44 | @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd | 48 | __put_user(value, &pair->value); |
45 | @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd | 49 | break; |
46 | @r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd | 50 | case RISCV_HWPROBE_KEY_CPUPERF_0: |
47 | @@ -XXX,XX +XXX,XX @@ vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm | ||
48 | vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm | ||
49 | vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm | ||
50 | vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm | ||
51 | +vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm | ||
52 | |||
53 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
54 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
55 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
58 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
59 | @@ -XXX,XX +XXX,XX @@ GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf) | ||
60 | GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf) | ||
61 | GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf) | ||
62 | GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf) | ||
63 | + | ||
64 | +/* Vector Floating-Point Square-Root Instruction */ | ||
65 | + | ||
66 | +/* | ||
67 | + * If the current SEW does not correspond to a supported IEEE floating-point | ||
68 | + * type, an illegal instruction exception is raised | ||
69 | + */ | ||
70 | +static bool opfv_check(DisasContext *s, arg_rmr *a) | ||
71 | +{ | ||
72 | + return (vext_check_isa_ill(s) && | ||
73 | + vext_check_overlap_mask(s, a->rd, a->vm, false) && | ||
74 | + vext_check_reg(s, a->rd, false) && | ||
75 | + vext_check_reg(s, a->rs2, false) && | ||
76 | + (s->sew != 0)); | ||
77 | +} | ||
78 | + | ||
79 | +#define GEN_OPFV_TRANS(NAME, CHECK) \ | ||
80 | +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
81 | +{ \ | ||
82 | + if (CHECK(s, a)) { \ | ||
83 | + uint32_t data = 0; \ | ||
84 | + static gen_helper_gvec_3_ptr * const fns[3] = { \ | ||
85 | + gen_helper_##NAME##_h, \ | ||
86 | + gen_helper_##NAME##_w, \ | ||
87 | + gen_helper_##NAME##_d, \ | ||
88 | + }; \ | ||
89 | + TCGLabel *over = gen_new_label(); \ | ||
90 | + gen_set_rm(s, 7); \ | ||
91 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
92 | + \ | ||
93 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ | ||
94 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
95 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
96 | + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
97 | + vreg_ofs(s, a->rs2), cpu_env, 0, \ | ||
98 | + s->vlen / 8, data, fns[s->sew - 1]); \ | ||
99 | + gen_set_label(over); \ | ||
100 | + return true; \ | ||
101 | + } \ | ||
102 | + return false; \ | ||
103 | +} | ||
104 | + | ||
105 | +GEN_OPFV_TRANS(vfsqrt_v, opfv_check) | ||
106 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
107 | index XXXXXXX..XXXXXXX 100644 | ||
108 | --- a/target/riscv/vector_helper.c | ||
109 | +++ b/target/riscv/vector_helper.c | ||
110 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) | ||
111 | RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) | ||
112 | GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4, clearl) | ||
113 | GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8, clearq) | ||
114 | + | ||
115 | +/* Vector Floating-Point Square-Root Instruction */ | ||
116 | +/* (TD, T2, TX2) */ | ||
117 | +#define OP_UU_H uint16_t, uint16_t, uint16_t | ||
118 | +#define OP_UU_W uint32_t, uint32_t, uint32_t | ||
119 | +#define OP_UU_D uint64_t, uint64_t, uint64_t | ||
120 | + | ||
121 | +#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ | ||
122 | +static void do_##NAME(void *vd, void *vs2, int i, \ | ||
123 | + CPURISCVState *env) \ | ||
124 | +{ \ | ||
125 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
126 | + *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ | ||
127 | +} | ||
128 | + | ||
129 | +#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ, CLEAR_FN) \ | ||
130 | +void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
131 | + CPURISCVState *env, uint32_t desc) \ | ||
132 | +{ \ | ||
133 | + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ | ||
134 | + uint32_t mlen = vext_mlen(desc); \ | ||
135 | + uint32_t vm = vext_vm(desc); \ | ||
136 | + uint32_t vl = env->vl; \ | ||
137 | + uint32_t i; \ | ||
138 | + \ | ||
139 | + if (vl == 0) { \ | ||
140 | + return; \ | ||
141 | + } \ | ||
142 | + for (i = 0; i < vl; i++) { \ | ||
143 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
144 | + continue; \ | ||
145 | + } \ | ||
146 | + do_##NAME(vd, vs2, i, env); \ | ||
147 | + } \ | ||
148 | + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ | ||
149 | +} | ||
150 | + | ||
151 | +RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) | ||
152 | +RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) | ||
153 | +RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) | ||
154 | +GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2, clearh) | ||
155 | +GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4, clearl) | ||
156 | +GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8, clearq) | ||
157 | -- | 51 | -- |
158 | 2.27.0 | 52 | 2.41.0 |
159 | |||
160 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Ard Biesheuvel <ardb@kernel.org> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | Use the accelerated SubBytes/ShiftRows/AddRoundKey AES helper to |
4 | implement the first half of the key schedule derivation. This does not | ||
5 | actually involve shifting rows, so clone the same value into all four | ||
6 | columns of the AES vector to counter that operation. | ||
7 | |||
8 | Cc: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Cc: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | Cc: Palmer Dabbelt <palmer@dabbelt.com> | ||
11 | Cc: Alistair Francis <alistair.francis@wdc.com> | ||
12 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | ||
13 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 15 | Message-ID: <20230831154118.138727-1-ardb@kernel.org> |
6 | Message-id: 20200623215920.2594-38-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 16 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 17 | --- |
9 | target/riscv/helper.h | 13 ++++++++++++ | 18 | target/riscv/crypto_helper.c | 17 +++++------------ |
10 | target/riscv/insn32.decode | 4 ++++ | 19 | 1 file changed, 5 insertions(+), 12 deletions(-) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 6 ++++++ | ||
12 | target/riscv/vector_helper.c | 27 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 50 insertions(+) | ||
14 | 20 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 21 | diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c |
16 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 23 | --- a/target/riscv/crypto_helper.c |
18 | +++ b/target/riscv/helper.h | 24 | +++ b/target/riscv/crypto_helper.c |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) | 25 | @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(aes64ks1i)(target_ulong rs1, target_ulong rnum) |
20 | DEF_HELPER_5(vfsqrt_v_h, void, ptr, ptr, ptr, env, i32) | 26 | |
21 | DEF_HELPER_5(vfsqrt_v_w, void, ptr, ptr, ptr, env, i32) | 27 | uint8_t enc_rnum = rnum; |
22 | DEF_HELPER_5(vfsqrt_v_d, void, ptr, ptr, ptr, env, i32) | 28 | uint32_t temp = (RS1 >> 32) & 0xFFFFFFFF; |
23 | + | 29 | - uint8_t rcon_ = 0; |
24 | +DEF_HELPER_6(vfmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 30 | - target_ulong result; |
25 | +DEF_HELPER_6(vfmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 31 | + AESState t, rc = {}; |
26 | +DEF_HELPER_6(vfmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | 32 | |
27 | +DEF_HELPER_6(vfmax_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 33 | if (enc_rnum != 0xA) { |
28 | +DEF_HELPER_6(vfmax_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 34 | temp = ror32(temp, 8); /* Rotate right by 8 */ |
29 | +DEF_HELPER_6(vfmax_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | 35 | - rcon_ = round_consts[enc_rnum]; |
30 | +DEF_HELPER_6(vfmin_vf_h, void, ptr, ptr, i64, ptr, env, i32) | 36 | + rc.w[0] = rc.w[1] = round_consts[enc_rnum]; |
31 | +DEF_HELPER_6(vfmin_vf_w, void, ptr, ptr, i64, ptr, env, i32) | 37 | } |
32 | +DEF_HELPER_6(vfmin_vf_d, void, ptr, ptr, i64, ptr, env, i32) | 38 | |
33 | +DEF_HELPER_6(vfmax_vf_h, void, ptr, ptr, i64, ptr, env, i32) | 39 | - temp = ((uint32_t)AES_sbox[(temp >> 24) & 0xFF] << 24) | |
34 | +DEF_HELPER_6(vfmax_vf_w, void, ptr, ptr, i64, ptr, env, i32) | 40 | - ((uint32_t)AES_sbox[(temp >> 16) & 0xFF] << 16) | |
35 | +DEF_HELPER_6(vfmax_vf_d, void, ptr, ptr, i64, ptr, env, i32) | 41 | - ((uint32_t)AES_sbox[(temp >> 8) & 0xFF] << 8) | |
36 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 42 | - ((uint32_t)AES_sbox[(temp >> 0) & 0xFF] << 0); |
37 | index XXXXXXX..XXXXXXX 100644 | 43 | + t.w[0] = t.w[1] = t.w[2] = t.w[3] = temp; |
38 | --- a/target/riscv/insn32.decode | 44 | + aesenc_SB_SR_AK(&t, &t, &rc, false); |
39 | +++ b/target/riscv/insn32.decode | 45 | |
40 | @@ -XXX,XX +XXX,XX @@ vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm | 46 | - temp ^= rcon_; |
41 | vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm | 47 | - |
42 | vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm | 48 | - result = ((uint64_t)temp << 32) | temp; |
43 | vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm | 49 | - |
44 | +vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm | 50 | - return result; |
45 | +vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm | 51 | + return t.d[0]; |
46 | +vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm | ||
47 | +vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm | ||
48 | |||
49 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
50 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
51 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
54 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
55 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
56 | } | 52 | } |
57 | 53 | ||
58 | GEN_OPFV_TRANS(vfsqrt_v, opfv_check) | 54 | target_ulong HELPER(aes64im)(target_ulong rs1) |
59 | + | ||
60 | +/* Vector Floating-Point MIN/MAX Instructions */ | ||
61 | +GEN_OPFVV_TRANS(vfmin_vv, opfvv_check) | ||
62 | +GEN_OPFVV_TRANS(vfmax_vv, opfvv_check) | ||
63 | +GEN_OPFVF_TRANS(vfmin_vf, opfvf_check) | ||
64 | +GEN_OPFVF_TRANS(vfmax_vf, opfvf_check) | ||
65 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
66 | index XXXXXXX..XXXXXXX 100644 | ||
67 | --- a/target/riscv/vector_helper.c | ||
68 | +++ b/target/riscv/vector_helper.c | ||
69 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) | ||
70 | GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2, clearh) | ||
71 | GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4, clearl) | ||
72 | GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8, clearq) | ||
73 | + | ||
74 | +/* Vector Floating-Point MIN/MAX Instructions */ | ||
75 | +RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum) | ||
76 | +RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum) | ||
77 | +RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum) | ||
78 | +GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2, clearh) | ||
79 | +GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4, clearl) | ||
80 | +GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8, clearq) | ||
81 | +RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum) | ||
82 | +RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum) | ||
83 | +RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum) | ||
84 | +GEN_VEXT_VF(vfmin_vf_h, 2, 2, clearh) | ||
85 | +GEN_VEXT_VF(vfmin_vf_w, 4, 4, clearl) | ||
86 | +GEN_VEXT_VF(vfmin_vf_d, 8, 8, clearq) | ||
87 | + | ||
88 | +RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum) | ||
89 | +RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum) | ||
90 | +RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum) | ||
91 | +GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2, clearh) | ||
92 | +GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4, clearl) | ||
93 | +GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8, clearq) | ||
94 | +RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum) | ||
95 | +RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum) | ||
96 | +RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) | ||
97 | +GEN_VEXT_VF(vfmax_vf_h, 2, 2, clearh) | ||
98 | +GEN_VEXT_VF(vfmax_vf_w, 4, 4, clearl) | ||
99 | +GEN_VEXT_VF(vfmax_vf_d, 8, 8, clearq) | ||
100 | -- | 55 | -- |
101 | 2.27.0 | 56 | 2.41.0 |
102 | 57 | ||
103 | 58 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | riscv_trigger_init() had been called on reset events that can happen |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | several times for a CPU and it allocated timers for itrigger. If old |
5 | Message-id: 20200623215920.2594-53-zhiwei_liu@c-sky.com | 5 | timers were present, they were simply overwritten by the new timers, |
6 | resulting in a memory leak. | ||
7 | |||
8 | Divide riscv_trigger_init() into two functions, namely | ||
9 | riscv_trigger_realize() and riscv_trigger_reset() and call them in | ||
10 | appropriate timing. The timer allocation will happen only once for a | ||
11 | CPU in riscv_trigger_realize(). | ||
12 | |||
13 | Fixes: 5a4ae64cac ("target/riscv: Add itrigger support when icount is enabled") | ||
14 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
15 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
16 | Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> | ||
17 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
18 | Message-ID: <20230818034059.9146-1-akihiko.odaki@daynix.com> | ||
6 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 19 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
7 | --- | 20 | --- |
8 | target/riscv/helper.h | 4 ++ | 21 | target/riscv/debug.h | 3 ++- |
9 | target/riscv/insn32.decode | 3 ++ | 22 | target/riscv/cpu.c | 8 +++++++- |
10 | target/riscv/insn_trans/trans_rvv.inc.c | 28 +++++++++++ | 23 | target/riscv/debug.c | 15 ++++++++++++--- |
11 | target/riscv/vector_helper.c | 63 +++++++++++++++++++++++++ | 24 | 3 files changed, 21 insertions(+), 5 deletions(-) |
12 | 4 files changed, 98 insertions(+) | ||
13 | 25 | ||
14 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 26 | diff --git a/target/riscv/debug.h b/target/riscv/debug.h |
15 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/riscv/helper.h | 28 | --- a/target/riscv/debug.h |
17 | +++ b/target/riscv/helper.h | 29 | +++ b/target/riscv/debug.h |
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32) | 30 | @@ -XXX,XX +XXX,XX @@ void riscv_cpu_debug_excp_handler(CPUState *cs); |
19 | DEF_HELPER_4(vmpopc_m, tl, ptr, ptr, env, i32) | 31 | bool riscv_cpu_debug_check_breakpoint(CPUState *cs); |
20 | 32 | bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp); | |
21 | DEF_HELPER_4(vmfirst_m, tl, ptr, ptr, env, i32) | 33 | |
34 | -void riscv_trigger_init(CPURISCVState *env); | ||
35 | +void riscv_trigger_realize(CPURISCVState *env); | ||
36 | +void riscv_trigger_reset_hold(CPURISCVState *env); | ||
37 | |||
38 | bool riscv_itrigger_enabled(CPURISCVState *env); | ||
39 | void riscv_itrigger_update_priv(CPURISCVState *env); | ||
40 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/target/riscv/cpu.c | ||
43 | +++ b/target/riscv/cpu.c | ||
44 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_reset_hold(Object *obj) | ||
45 | |||
46 | #ifndef CONFIG_USER_ONLY | ||
47 | if (cpu->cfg.debug) { | ||
48 | - riscv_trigger_init(env); | ||
49 | + riscv_trigger_reset_hold(env); | ||
50 | } | ||
51 | |||
52 | if (kvm_enabled()) { | ||
53 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) | ||
54 | |||
55 | riscv_cpu_register_gdb_regs_for_features(cs); | ||
56 | |||
57 | +#ifndef CONFIG_USER_ONLY | ||
58 | + if (cpu->cfg.debug) { | ||
59 | + riscv_trigger_realize(&cpu->env); | ||
60 | + } | ||
61 | +#endif | ||
22 | + | 62 | + |
23 | +DEF_HELPER_5(vmsbf_m, void, ptr, ptr, ptr, env, i32) | 63 | qemu_init_vcpu(cs); |
24 | +DEF_HELPER_5(vmsif_m, void, ptr, ptr, ptr, env, i32) | 64 | cpu_reset(cs); |
25 | +DEF_HELPER_5(vmsof_m, void, ptr, ptr, ptr, env, i32) | 65 | |
26 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 66 | diff --git a/target/riscv/debug.c b/target/riscv/debug.c |
27 | index XXXXXXX..XXXXXXX 100644 | 67 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/target/riscv/insn32.decode | 68 | --- a/target/riscv/debug.c |
29 | +++ b/target/riscv/insn32.decode | 69 | +++ b/target/riscv/debug.c |
30 | @@ -XXX,XX +XXX,XX @@ vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r | 70 | @@ -XXX,XX +XXX,XX @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) |
31 | vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r | ||
32 | vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm | ||
33 | vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm | ||
34 | +vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm | ||
35 | +vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm | ||
36 | +vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm | ||
37 | |||
38 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
39 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
40 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
43 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
44 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmfirst_m(DisasContext *s, arg_rmr *a) | ||
45 | } | ||
46 | return false; | 71 | return false; |
47 | } | 72 | } |
73 | |||
74 | -void riscv_trigger_init(CPURISCVState *env) | ||
75 | +void riscv_trigger_realize(CPURISCVState *env) | ||
76 | +{ | ||
77 | + int i; | ||
48 | + | 78 | + |
49 | +/* vmsbf.m set-before-first mask bit */ | 79 | + for (i = 0; i < RV_MAX_TRIGGERS; i++) { |
50 | +/* vmsif.m set-includ-first mask bit */ | 80 | + env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL, |
51 | +/* vmsof.m set-only-first mask bit */ | 81 | + riscv_itrigger_timer_cb, env); |
52 | +#define GEN_M_TRANS(NAME) \ | ||
53 | +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
54 | +{ \ | ||
55 | + if (vext_check_isa_ill(s)) { \ | ||
56 | + uint32_t data = 0; \ | ||
57 | + gen_helper_gvec_3_ptr *fn = gen_helper_##NAME; \ | ||
58 | + TCGLabel *over = gen_new_label(); \ | ||
59 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
60 | + \ | ||
61 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ | ||
62 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
63 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
64 | + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \ | ||
65 | + vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \ | ||
66 | + cpu_env, 0, s->vlen / 8, data, fn); \ | ||
67 | + gen_set_label(over); \ | ||
68 | + return true; \ | ||
69 | + } \ | ||
70 | + return false; \ | ||
71 | +} | ||
72 | + | ||
73 | +GEN_M_TRANS(vmsbf_m) | ||
74 | +GEN_M_TRANS(vmsif_m) | ||
75 | +GEN_M_TRANS(vmsof_m) | ||
76 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/target/riscv/vector_helper.c | ||
79 | +++ b/target/riscv/vector_helper.c | ||
80 | @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env, | ||
81 | } | ||
82 | return -1LL; | ||
83 | } | ||
84 | + | ||
85 | +enum set_mask_type { | ||
86 | + ONLY_FIRST = 1, | ||
87 | + INCLUDE_FIRST, | ||
88 | + BEFORE_FIRST, | ||
89 | +}; | ||
90 | + | ||
91 | +static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, | ||
92 | + uint32_t desc, enum set_mask_type type) | ||
93 | +{ | ||
94 | + uint32_t mlen = vext_mlen(desc); | ||
95 | + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; | ||
96 | + uint32_t vm = vext_vm(desc); | ||
97 | + uint32_t vl = env->vl; | ||
98 | + int i; | ||
99 | + bool first_mask_bit = false; | ||
100 | + | ||
101 | + for (i = 0; i < vl; i++) { | ||
102 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | ||
103 | + continue; | ||
104 | + } | ||
105 | + /* write a zero to all following active elements */ | ||
106 | + if (first_mask_bit) { | ||
107 | + vext_set_elem_mask(vd, mlen, i, 0); | ||
108 | + continue; | ||
109 | + } | ||
110 | + if (vext_elem_mask(vs2, mlen, i)) { | ||
111 | + first_mask_bit = true; | ||
112 | + if (type == BEFORE_FIRST) { | ||
113 | + vext_set_elem_mask(vd, mlen, i, 0); | ||
114 | + } else { | ||
115 | + vext_set_elem_mask(vd, mlen, i, 1); | ||
116 | + } | ||
117 | + } else { | ||
118 | + if (type == ONLY_FIRST) { | ||
119 | + vext_set_elem_mask(vd, mlen, i, 0); | ||
120 | + } else { | ||
121 | + vext_set_elem_mask(vd, mlen, i, 1); | ||
122 | + } | ||
123 | + } | ||
124 | + } | ||
125 | + for (; i < vlmax; i++) { | ||
126 | + vext_set_elem_mask(vd, mlen, i, 0); | ||
127 | + } | 82 | + } |
128 | +} | 83 | +} |
129 | + | 84 | + |
130 | +void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, | 85 | +void riscv_trigger_reset_hold(CPURISCVState *env) |
131 | + uint32_t desc) | 86 | { |
132 | +{ | 87 | target_ulong tdata1 = build_tdata1(env, TRIGGER_TYPE_AD_MATCH, 0, 0); |
133 | + vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); | 88 | int i; |
134 | +} | 89 | @@ -XXX,XX +XXX,XX @@ void riscv_trigger_init(CPURISCVState *env) |
135 | + | 90 | env->tdata3[i] = 0; |
136 | +void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, | 91 | env->cpu_breakpoint[i] = NULL; |
137 | + uint32_t desc) | 92 | env->cpu_watchpoint[i] = NULL; |
138 | +{ | 93 | - env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL, |
139 | + vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); | 94 | - riscv_itrigger_timer_cb, env); |
140 | +} | 95 | + timer_del(env->itrigger_timer[i]); |
141 | + | 96 | } |
142 | +void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, | 97 | } |
143 | + uint32_t desc) | ||
144 | +{ | ||
145 | + vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); | ||
146 | +} | ||
147 | -- | 98 | -- |
148 | 2.27.0 | 99 | 2.41.0 |
149 | 100 | ||
150 | 101 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Leon Schuermann <leons@opentitan.org> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | When the rule-lock bypass (RLB) bit is set in the mseccfg CSR, the PMP |
4 | configuration lock bits must not apply. While this behavior is | ||
5 | implemented for the pmpcfgX CSRs, this bit is not respected for | ||
6 | changes to the pmpaddrX CSRs. This patch ensures that pmpaddrX CSR | ||
7 | writes work even on locked regions when the global rule-lock bypass is | ||
8 | enabled. | ||
9 | |||
10 | Signed-off-by: Leon Schuermann <leons@opentitan.org> | ||
11 | Reviewed-by: Mayuresh Chitale <mchitale@ventanamicro.com> | ||
4 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 12 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 13 | Message-ID: <20230829215046.1430463-1-leon@is.currently.online> |
6 | Message-id: 20200623215920.2594-26-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 14 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 15 | --- |
9 | target/riscv/helper.h | 17 ++++ | 16 | target/riscv/pmp.c | 4 ++++ |
10 | target/riscv/insn32.decode | 5 ++ | 17 | 1 file changed, 4 insertions(+) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 7 ++ | ||
12 | target/riscv/vector_helper.c | 100 ++++++++++++++++++++++++ | ||
13 | 4 files changed, 129 insertions(+) | ||
14 | 18 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 19 | diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c |
16 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 21 | --- a/target/riscv/pmp.c |
18 | +++ b/target/riscv/helper.h | 22 | +++ b/target/riscv/pmp.c |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vssub_vx_b, void, ptr, ptr, tl, ptr, env, i32) | 23 | @@ -XXX,XX +XXX,XX @@ static inline uint8_t pmp_get_a_field(uint8_t cfg) |
20 | DEF_HELPER_6(vssub_vx_h, void, ptr, ptr, tl, ptr, env, i32) | 24 | */ |
21 | DEF_HELPER_6(vssub_vx_w, void, ptr, ptr, tl, ptr, env, i32) | 25 | static inline int pmp_is_locked(CPURISCVState *env, uint32_t pmp_index) |
22 | DEF_HELPER_6(vssub_vx_d, void, ptr, ptr, tl, ptr, env, i32) | 26 | { |
23 | + | 27 | + /* mseccfg.RLB is set */ |
24 | +DEF_HELPER_6(vaadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | 28 | + if (MSECCFG_RLB_ISSET(env)) { |
25 | +DEF_HELPER_6(vaadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vaadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vaadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vasub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vasub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vasub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vasub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vaadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vaadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vaadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vaadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vasub_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vasub_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vasub_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vasub_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
40 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/target/riscv/insn32.decode | ||
43 | +++ b/target/riscv/insn32.decode | ||
44 | @@ -XXX,XX +XXX,XX @@ vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm | ||
45 | vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm | ||
46 | vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm | ||
47 | vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm | ||
48 | +vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm | ||
49 | +vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm | ||
50 | +vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm | ||
51 | +vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm | ||
52 | +vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm | ||
53 | |||
54 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
55 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
56 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
59 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
60 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vssubu_vx, opivx_check) | ||
61 | GEN_OPIVX_TRANS(vssub_vx, opivx_check) | ||
62 | GEN_OPIVI_TRANS(vsaddu_vi, 1, vsaddu_vx, opivx_check) | ||
63 | GEN_OPIVI_TRANS(vsadd_vi, 0, vsadd_vx, opivx_check) | ||
64 | + | ||
65 | +/* Vector Single-Width Averaging Add and Subtract */ | ||
66 | +GEN_OPIVV_TRANS(vaadd_vv, opivv_check) | ||
67 | +GEN_OPIVV_TRANS(vasub_vv, opivv_check) | ||
68 | +GEN_OPIVX_TRANS(vaadd_vx, opivx_check) | ||
69 | +GEN_OPIVX_TRANS(vasub_vx, opivx_check) | ||
70 | +GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check) | ||
71 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
72 | index XXXXXXX..XXXXXXX 100644 | ||
73 | --- a/target/riscv/vector_helper.c | ||
74 | +++ b/target/riscv/vector_helper.c | ||
75 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX_RM(vssub_vx_b, 1, 1, clearb) | ||
76 | GEN_VEXT_VX_RM(vssub_vx_h, 2, 2, clearh) | ||
77 | GEN_VEXT_VX_RM(vssub_vx_w, 4, 4, clearl) | ||
78 | GEN_VEXT_VX_RM(vssub_vx_d, 8, 8, clearq) | ||
79 | + | ||
80 | +/* Vector Single-Width Averaging Add and Subtract */ | ||
81 | +static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) | ||
82 | +{ | ||
83 | + uint8_t d = extract64(v, shift, 1); | ||
84 | + uint8_t d1; | ||
85 | + uint64_t D1, D2; | ||
86 | + | ||
87 | + if (shift == 0 || shift > 64) { | ||
88 | + return 0; | 29 | + return 0; |
89 | + } | 30 | + } |
90 | + | 31 | |
91 | + d1 = extract64(v, shift - 1, 1); | 32 | if (env->pmp_state.pmp[pmp_index].cfg_reg & PMP_LOCK) { |
92 | + D1 = extract64(v, 0, shift); | 33 | return 1; |
93 | + if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ | ||
94 | + return d1; | ||
95 | + } else if (vxrm == 1) { /* round-to-nearest-even */ | ||
96 | + if (shift > 1) { | ||
97 | + D2 = extract64(v, 0, shift - 1); | ||
98 | + return d1 & ((D2 != 0) | d); | ||
99 | + } else { | ||
100 | + return d1 & d; | ||
101 | + } | ||
102 | + } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ | ||
103 | + return !d & (D1 != 0); | ||
104 | + } | ||
105 | + return 0; /* round-down (truncate) */ | ||
106 | +} | ||
107 | + | ||
108 | +static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) | ||
109 | +{ | ||
110 | + int64_t res = (int64_t)a + b; | ||
111 | + uint8_t round = get_round(vxrm, res, 1); | ||
112 | + | ||
113 | + return (res >> 1) + round; | ||
114 | +} | ||
115 | + | ||
116 | +static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) | ||
117 | +{ | ||
118 | + int64_t res = a + b; | ||
119 | + uint8_t round = get_round(vxrm, res, 1); | ||
120 | + int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; | ||
121 | + | ||
122 | + /* With signed overflow, bit 64 is inverse of bit 63. */ | ||
123 | + return ((res >> 1) ^ over) + round; | ||
124 | +} | ||
125 | + | ||
126 | +RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) | ||
127 | +RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) | ||
128 | +RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) | ||
129 | +RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) | ||
130 | +GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1, clearb) | ||
131 | +GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2, clearh) | ||
132 | +GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4, clearl) | ||
133 | +GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8, clearq) | ||
134 | + | ||
135 | +RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) | ||
136 | +RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) | ||
137 | +RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) | ||
138 | +RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) | ||
139 | +GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1, clearb) | ||
140 | +GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2, clearh) | ||
141 | +GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4, clearl) | ||
142 | +GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8, clearq) | ||
143 | + | ||
144 | +static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) | ||
145 | +{ | ||
146 | + int64_t res = (int64_t)a - b; | ||
147 | + uint8_t round = get_round(vxrm, res, 1); | ||
148 | + | ||
149 | + return (res >> 1) + round; | ||
150 | +} | ||
151 | + | ||
152 | +static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) | ||
153 | +{ | ||
154 | + int64_t res = (int64_t)a - b; | ||
155 | + uint8_t round = get_round(vxrm, res, 1); | ||
156 | + int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; | ||
157 | + | ||
158 | + /* With signed overflow, bit 64 is inverse of bit 63. */ | ||
159 | + return ((res >> 1) ^ over) + round; | ||
160 | +} | ||
161 | + | ||
162 | +RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) | ||
163 | +RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) | ||
164 | +RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) | ||
165 | +RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) | ||
166 | +GEN_VEXT_VV_RM(vasub_vv_b, 1, 1, clearb) | ||
167 | +GEN_VEXT_VV_RM(vasub_vv_h, 2, 2, clearh) | ||
168 | +GEN_VEXT_VV_RM(vasub_vv_w, 4, 4, clearl) | ||
169 | +GEN_VEXT_VV_RM(vasub_vv_d, 8, 8, clearq) | ||
170 | + | ||
171 | +RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) | ||
172 | +RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) | ||
173 | +RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) | ||
174 | +RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) | ||
175 | +GEN_VEXT_VX_RM(vasub_vx_b, 1, 1, clearb) | ||
176 | +GEN_VEXT_VX_RM(vasub_vx_h, 2, 2, clearh) | ||
177 | +GEN_VEXT_VX_RM(vasub_vx_w, 4, 4, clearl) | ||
178 | +GEN_VEXT_VX_RM(vasub_vx_d, 8, 8, clearq) | ||
179 | -- | 34 | -- |
180 | 2.27.0 | 35 | 2.41.0 |
181 | |||
182 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Tommy Wu <tommy.wu@sifive.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 3 | According to the new spec, when vsiselect has a reserved value, attempts |
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | from M-mode or HS-mode to access vsireg, or from VS-mode to access |
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | 5 | sireg, should preferably raise an illegal instruction exception. |
6 | Message-id: 20200623215920.2594-36-zhiwei_liu@c-sky.com | 6 | |
7 | Signed-off-by: Tommy Wu <tommy.wu@sifive.com> | ||
8 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
9 | Message-ID: <20230816061647.600672-1-tommy.wu@sifive.com> | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
8 | --- | 11 | --- |
9 | target/riscv/helper.h | 17 +++++ | 12 | target/riscv/csr.c | 7 +++++-- |
10 | target/riscv/insn32.decode | 8 +++ | 13 | 1 file changed, 5 insertions(+), 2 deletions(-) |
11 | target/riscv/insn_trans/trans_rvv.inc.c | 10 +++ | ||
12 | target/riscv/vector_helper.c | 91 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 126 insertions(+) | ||
14 | 14 | ||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | 15 | diff --git a/target/riscv/csr.c b/target/riscv/csr.c |
16 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/riscv/helper.h | 17 | --- a/target/riscv/csr.c |
18 | +++ b/target/riscv/helper.h | 18 | +++ b/target/riscv/csr.c |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) | 19 | @@ -XXX,XX +XXX,XX @@ static int rmw_iprio(target_ulong xlen, |
20 | DEF_HELPER_6(vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) | 20 | static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val, |
21 | DEF_HELPER_6(vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) | 21 | target_ulong new_val, target_ulong wr_mask) |
22 | DEF_HELPER_6(vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) | 22 | { |
23 | + | 23 | - bool virt; |
24 | +DEF_HELPER_6(vfwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 24 | + bool virt, isel_reserved; |
25 | +DEF_HELPER_6(vfwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 25 | uint8_t *iprio; |
26 | +DEF_HELPER_6(vfwnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 26 | int ret = -EINVAL; |
27 | +DEF_HELPER_6(vfwnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 27 | target_ulong priv, isel, vgein; |
28 | +DEF_HELPER_6(vfwmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 28 | @@ -XXX,XX +XXX,XX @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val, |
29 | +DEF_HELPER_6(vfwmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 29 | |
30 | +DEF_HELPER_6(vfwnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | 30 | /* Decode register details from CSR number */ |
31 | +DEF_HELPER_6(vfwnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | 31 | virt = false; |
32 | +DEF_HELPER_6(vfwmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) | 32 | + isel_reserved = false; |
33 | +DEF_HELPER_6(vfwmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) | 33 | switch (csrno) { |
34 | +DEF_HELPER_6(vfwnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) | 34 | case CSR_MIREG: |
35 | +DEF_HELPER_6(vfwnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) | 35 | iprio = env->miprio; |
36 | +DEF_HELPER_6(vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) | 36 | @@ -XXX,XX +XXX,XX @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val, |
37 | +DEF_HELPER_6(vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) | 37 | riscv_cpu_mxl_bits(env)), |
38 | +DEF_HELPER_6(vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) | 38 | val, new_val, wr_mask); |
39 | +DEF_HELPER_6(vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) | 39 | } |
40 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | 40 | + } else { |
41 | index XXXXXXX..XXXXXXX 100644 | 41 | + isel_reserved = true; |
42 | --- a/target/riscv/insn32.decode | 42 | } |
43 | +++ b/target/riscv/insn32.decode | 43 | |
44 | @@ -XXX,XX +XXX,XX @@ vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm | 44 | done: |
45 | vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm | 45 | if (ret) { |
46 | vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm | 46 | - return (env->virt_enabled && virt) ? |
47 | vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm | 47 | + return (env->virt_enabled && virt && !isel_reserved) ? |
48 | +vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm | 48 | RISCV_EXCP_VIRT_INSTRUCTION_FAULT : RISCV_EXCP_ILLEGAL_INST; |
49 | +vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm | 49 | } |
50 | +vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm | 50 | return RISCV_EXCP_NONE; |
51 | +vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm | ||
52 | +vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm | ||
53 | +vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm | ||
54 | +vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm | ||
55 | +vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm | ||
56 | |||
57 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
58 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
59 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
62 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
63 | @@ -XXX,XX +XXX,XX @@ GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check) | ||
64 | GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check) | ||
65 | GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check) | ||
66 | GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check) | ||
67 | + | ||
68 | +/* Vector Widening Floating-Point Fused Multiply-Add Instructions */ | ||
69 | +GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check) | ||
70 | +GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check) | ||
71 | +GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check) | ||
72 | +GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check) | ||
73 | +GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf) | ||
74 | +GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf) | ||
75 | +GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf) | ||
76 | +GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf) | ||
77 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/target/riscv/vector_helper.c | ||
80 | +++ b/target/riscv/vector_helper.c | ||
81 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) | ||
82 | GEN_VEXT_VF(vfnmsub_vf_h, 2, 2, clearh) | ||
83 | GEN_VEXT_VF(vfnmsub_vf_w, 4, 4, clearl) | ||
84 | GEN_VEXT_VF(vfnmsub_vf_d, 8, 8, clearq) | ||
85 | + | ||
86 | +/* Vector Widening Floating-Point Fused Multiply-Add Instructions */ | ||
87 | +static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
88 | +{ | ||
89 | + return float32_muladd(float16_to_float32(a, true, s), | ||
90 | + float16_to_float32(b, true, s), d, 0, s); | ||
91 | +} | ||
92 | + | ||
93 | +static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
94 | +{ | ||
95 | + return float64_muladd(float32_to_float64(a, s), | ||
96 | + float32_to_float64(b, s), d, 0, s); | ||
97 | +} | ||
98 | + | ||
99 | +RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) | ||
100 | +RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) | ||
101 | +GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4, clearl) | ||
102 | +GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8, clearq) | ||
103 | +RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) | ||
104 | +RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) | ||
105 | +GEN_VEXT_VF(vfwmacc_vf_h, 2, 4, clearl) | ||
106 | +GEN_VEXT_VF(vfwmacc_vf_w, 4, 8, clearq) | ||
107 | + | ||
108 | +static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
109 | +{ | ||
110 | + return float32_muladd(float16_to_float32(a, true, s), | ||
111 | + float16_to_float32(b, true, s), d, | ||
112 | + float_muladd_negate_c | float_muladd_negate_product, s); | ||
113 | +} | ||
114 | + | ||
115 | +static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
116 | +{ | ||
117 | + return float64_muladd(float32_to_float64(a, s), | ||
118 | + float32_to_float64(b, s), d, | ||
119 | + float_muladd_negate_c | float_muladd_negate_product, s); | ||
120 | +} | ||
121 | + | ||
122 | +RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) | ||
123 | +RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) | ||
124 | +GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4, clearl) | ||
125 | +GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8, clearq) | ||
126 | +RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) | ||
127 | +RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) | ||
128 | +GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4, clearl) | ||
129 | +GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8, clearq) | ||
130 | + | ||
131 | +static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
132 | +{ | ||
133 | + return float32_muladd(float16_to_float32(a, true, s), | ||
134 | + float16_to_float32(b, true, s), d, | ||
135 | + float_muladd_negate_c, s); | ||
136 | +} | ||
137 | + | ||
138 | +static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
139 | +{ | ||
140 | + return float64_muladd(float32_to_float64(a, s), | ||
141 | + float32_to_float64(b, s), d, | ||
142 | + float_muladd_negate_c, s); | ||
143 | +} | ||
144 | + | ||
145 | +RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) | ||
146 | +RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) | ||
147 | +GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4, clearl) | ||
148 | +GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8, clearq) | ||
149 | +RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) | ||
150 | +RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) | ||
151 | +GEN_VEXT_VF(vfwmsac_vf_h, 2, 4, clearl) | ||
152 | +GEN_VEXT_VF(vfwmsac_vf_w, 4, 8, clearq) | ||
153 | + | ||
154 | +static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
155 | +{ | ||
156 | + return float32_muladd(float16_to_float32(a, true, s), | ||
157 | + float16_to_float32(b, true, s), d, | ||
158 | + float_muladd_negate_product, s); | ||
159 | +} | ||
160 | + | ||
161 | +static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
162 | +{ | ||
163 | + return float64_muladd(float32_to_float64(a, s), | ||
164 | + float32_to_float64(b, s), d, | ||
165 | + float_muladd_negate_product, s); | ||
166 | +} | ||
167 | + | ||
168 | +RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) | ||
169 | +RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) | ||
170 | +GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4, clearl) | ||
171 | +GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8, clearq) | ||
172 | +RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) | ||
173 | +RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) | ||
174 | +GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4, clearl) | ||
175 | +GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8, clearq) | ||
176 | -- | 51 | -- |
177 | 2.27.0 | 52 | 2.41.0 |
178 | |||
179 | diff view generated by jsdifflib |
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | 1 | From: Nikita Shubin <n.shubin@yadro.com> |
---|---|---|---|
2 | 2 | ||
3 | The 32 vector registers will be viewed as a continuous memory block. | 3 | As per ISA: |
4 | It avoids the convension between element index and (regno, offset). | ||
5 | Thus elements can be directly accessed by offset from the first vector | ||
6 | base address. | ||
7 | 4 | ||
8 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | 5 | "For CSRRWI, if rd=x0, then the instruction shall not read the CSR and |
9 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | 6 | shall not cause any of the side effects that might occur on a CSR read." |
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 7 | |
11 | Message-id: 20200623215920.2594-2-zhiwei_liu@c-sky.com | 8 | trans_csrrwi() and trans_csrrw() call do_csrw() if rd=x0, do_csrw() calls |
9 | riscv_csrrw_do64(), via helper_csrw() passing NULL as *ret_value. | ||
10 | |||
11 | Signed-off-by: Nikita Shubin <n.shubin@yadro.com> | ||
12 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | Message-ID: <20230808090914.17634-1-nikita.shubin@maquefel.me> | ||
12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 14 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
13 | --- | 15 | --- |
14 | target/riscv/cpu.h | 12 ++++++++++++ | 16 | target/riscv/csr.c | 24 +++++++++++++++--------- |
15 | target/riscv/translate.c | 3 ++- | 17 | 1 file changed, 15 insertions(+), 9 deletions(-) |
16 | 2 files changed, 14 insertions(+), 1 deletion(-) | ||
17 | 18 | ||
18 | diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h | 19 | diff --git a/target/riscv/csr.c b/target/riscv/csr.c |
19 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/target/riscv/cpu.h | 21 | --- a/target/riscv/csr.c |
21 | +++ b/target/riscv/cpu.h | 22 | +++ b/target/riscv/csr.c |
22 | @@ -XXX,XX +XXX,XX @@ | 23 | @@ -XXX,XX +XXX,XX @@ static RISCVException riscv_csrrw_do64(CPURISCVState *env, int csrno, |
23 | #define RVA RV('A') | 24 | target_ulong write_mask) |
24 | #define RVF RV('F') | 25 | { |
25 | #define RVD RV('D') | 26 | RISCVException ret; |
26 | +#define RVV RV('V') | 27 | - target_ulong old_value; |
27 | #define RVC RV('C') | 28 | + target_ulong old_value = 0; |
28 | #define RVS RV('S') | 29 | |
29 | #define RVU RV('U') | 30 | /* execute combined read/write operation if it exists */ |
30 | @@ -XXX,XX +XXX,XX @@ typedef struct CPURISCVState CPURISCVState; | 31 | if (csr_ops[csrno].op) { |
31 | 32 | return csr_ops[csrno].op(env, csrno, ret_value, new_value, write_mask); | |
32 | #include "pmp.h" | ||
33 | |||
34 | +#define RV_VLEN_MAX 512 | ||
35 | + | ||
36 | struct CPURISCVState { | ||
37 | target_ulong gpr[32]; | ||
38 | uint64_t fpr[32]; /* assume both F and D extensions */ | ||
39 | + | ||
40 | + /* vector coprocessor state. */ | ||
41 | + uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16); | ||
42 | + target_ulong vxrm; | ||
43 | + target_ulong vxsat; | ||
44 | + target_ulong vl; | ||
45 | + target_ulong vstart; | ||
46 | + target_ulong vtype; | ||
47 | + | ||
48 | target_ulong pc; | ||
49 | target_ulong load_res; | ||
50 | target_ulong load_val; | ||
51 | diff --git a/target/riscv/translate.c b/target/riscv/translate.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/target/riscv/translate.c | ||
54 | +++ b/target/riscv/translate.c | ||
55 | @@ -XXX,XX +XXX,XX @@ | ||
56 | #include "instmap.h" | ||
57 | |||
58 | /* global register indices */ | ||
59 | -static TCGv cpu_gpr[32], cpu_pc; | ||
60 | +static TCGv cpu_gpr[32], cpu_pc, cpu_vl; | ||
61 | static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */ | ||
62 | static TCGv load_res; | ||
63 | static TCGv load_val; | ||
64 | @@ -XXX,XX +XXX,XX @@ void riscv_translate_init(void) | ||
65 | } | 33 | } |
66 | 34 | ||
67 | cpu_pc = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, pc), "pc"); | 35 | - /* if no accessor exists then return failure */ |
68 | + cpu_vl = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, vl), "vl"); | 36 | - if (!csr_ops[csrno].read) { |
69 | load_res = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_res), | 37 | - return RISCV_EXCP_ILLEGAL_INST; |
70 | "load_res"); | 38 | - } |
71 | load_val = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_val), | 39 | - /* read old value */ |
40 | - ret = csr_ops[csrno].read(env, csrno, &old_value); | ||
41 | - if (ret != RISCV_EXCP_NONE) { | ||
42 | - return ret; | ||
43 | + /* | ||
44 | + * ret_value == NULL means that rd=x0 and we're coming from helper_csrw() | ||
45 | + * and we can't throw side effects caused by CSR reads. | ||
46 | + */ | ||
47 | + if (ret_value) { | ||
48 | + /* if no accessor exists then return failure */ | ||
49 | + if (!csr_ops[csrno].read) { | ||
50 | + return RISCV_EXCP_ILLEGAL_INST; | ||
51 | + } | ||
52 | + /* read old value */ | ||
53 | + ret = csr_ops[csrno].read(env, csrno, &old_value); | ||
54 | + if (ret != RISCV_EXCP_NONE) { | ||
55 | + return ret; | ||
56 | + } | ||
57 | } | ||
58 | |||
59 | /* write value if writable and write mask set, otherwise drop writes */ | ||
72 | -- | 60 | -- |
73 | 2.27.0 | 61 | 2.41.0 |
74 | |||
75 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | The internals.h keeps things that are not relevant to the actual architecture, | ||
4 | only to the implementation, separate. | ||
5 | |||
6 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
7 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20200623215920.2594-6-zhiwei_liu@c-sky.com | ||
10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
11 | --- | ||
12 | target/riscv/internals.h | 24 ++++++++++++++++++++++++ | ||
13 | 1 file changed, 24 insertions(+) | ||
14 | create mode 100644 target/riscv/internals.h | ||
15 | |||
16 | diff --git a/target/riscv/internals.h b/target/riscv/internals.h | ||
17 | new file mode 100644 | ||
18 | index XXXXXXX..XXXXXXX | ||
19 | --- /dev/null | ||
20 | +++ b/target/riscv/internals.h | ||
21 | @@ -XXX,XX +XXX,XX @@ | ||
22 | +/* | ||
23 | + * QEMU RISC-V CPU -- internal functions and types | ||
24 | + * | ||
25 | + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. | ||
26 | + * | ||
27 | + * This program is free software; you can redistribute it and/or modify it | ||
28 | + * under the terms and conditions of the GNU General Public License, | ||
29 | + * version 2 or later, as published by the Free Software Foundation. | ||
30 | + * | ||
31 | + * This program is distributed in the hope it will be useful, but WITHOUT | ||
32 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
33 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
34 | + * more details. | ||
35 | + * | ||
36 | + * You should have received a copy of the GNU General Public License along with | ||
37 | + * this program. If not, see <http://www.gnu.org/licenses/>. | ||
38 | + */ | ||
39 | + | ||
40 | +#ifndef RISCV_CPU_INTERNALS_H | ||
41 | +#define RISCV_CPU_INTERNALS_H | ||
42 | + | ||
43 | +#include "hw/registerfields.h" | ||
44 | + | ||
45 | +#endif | ||
46 | -- | ||
47 | 2.27.0 | ||
48 | |||
49 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Vector indexed operations add the contents of each element of the | ||
4 | vector offset operand specified by vs2 to the base effective address | ||
5 | to give the effective address of each element. | ||
6 | |||
7 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
8 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Message-id: 20200623215920.2594-8-zhiwei_liu@c-sky.com | ||
11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
12 | --- | ||
13 | target/riscv/helper.h | 35 +++++++ | ||
14 | target/riscv/insn32.decode | 13 +++ | ||
15 | target/riscv/insn_trans/trans_rvv.inc.c | 129 ++++++++++++++++++++++++ | ||
16 | target/riscv/vector_helper.c | 116 +++++++++++++++++++++ | ||
17 | 4 files changed, 293 insertions(+) | ||
18 | |||
19 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/riscv/helper.h | ||
22 | +++ b/target/riscv/helper.h | ||
23 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vsse_v_b, void, ptr, ptr, tl, tl, env, i32) | ||
24 | DEF_HELPER_6(vsse_v_h, void, ptr, ptr, tl, tl, env, i32) | ||
25 | DEF_HELPER_6(vsse_v_w, void, ptr, ptr, tl, tl, env, i32) | ||
26 | DEF_HELPER_6(vsse_v_d, void, ptr, ptr, tl, tl, env, i32) | ||
27 | +DEF_HELPER_6(vlxb_v_b, void, ptr, ptr, tl, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vlxb_v_h, void, ptr, ptr, tl, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vlxb_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vlxb_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vlxh_v_h, void, ptr, ptr, tl, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vlxh_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vlxh_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vlxw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vlxw_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vlxe_v_b, void, ptr, ptr, tl, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vlxe_v_h, void, ptr, ptr, tl, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vlxe_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vlxe_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vlxbu_v_b, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vlxbu_v_h, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vlxbu_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vlxbu_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vlxhu_v_h, void, ptr, ptr, tl, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vlxhu_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vlxhu_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vlxwu_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
48 | +DEF_HELPER_6(vlxwu_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
49 | +DEF_HELPER_6(vsxb_v_b, void, ptr, ptr, tl, ptr, env, i32) | ||
50 | +DEF_HELPER_6(vsxb_v_h, void, ptr, ptr, tl, ptr, env, i32) | ||
51 | +DEF_HELPER_6(vsxb_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
52 | +DEF_HELPER_6(vsxb_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
53 | +DEF_HELPER_6(vsxh_v_h, void, ptr, ptr, tl, ptr, env, i32) | ||
54 | +DEF_HELPER_6(vsxh_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
55 | +DEF_HELPER_6(vsxh_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
56 | +DEF_HELPER_6(vsxw_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
57 | +DEF_HELPER_6(vsxw_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
58 | +DEF_HELPER_6(vsxe_v_b, void, ptr, ptr, tl, ptr, env, i32) | ||
59 | +DEF_HELPER_6(vsxe_v_h, void, ptr, ptr, tl, ptr, env, i32) | ||
60 | +DEF_HELPER_6(vsxe_v_w, void, ptr, ptr, tl, ptr, env, i32) | ||
61 | +DEF_HELPER_6(vsxe_v_d, void, ptr, ptr, tl, ptr, env, i32) | ||
62 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/riscv/insn32.decode | ||
65 | +++ b/target/riscv/insn32.decode | ||
66 | @@ -XXX,XX +XXX,XX @@ vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm | ||
67 | vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm | ||
68 | vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm | ||
69 | |||
70 | +vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm | ||
71 | +vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm | ||
72 | +vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm | ||
73 | +vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm | ||
74 | +vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm | ||
75 | +vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm | ||
76 | +vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm | ||
77 | +# Vector ordered-indexed and unordered-indexed store insns. | ||
78 | +vsxb_v ... -11 . ..... ..... 000 ..... 0100111 @r_nfvm | ||
79 | +vsxh_v ... -11 . ..... ..... 101 ..... 0100111 @r_nfvm | ||
80 | +vsxw_v ... -11 . ..... ..... 110 ..... 0100111 @r_nfvm | ||
81 | +vsxe_v ... -11 . ..... ..... 111 ..... 0100111 @r_nfvm | ||
82 | + | ||
83 | # *** new major opcode OP-V *** | ||
84 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
85 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
86 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
87 | index XXXXXXX..XXXXXXX 100644 | ||
88 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
89 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
90 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_TRANS(vssb_v, 0, rnfvm, st_stride_op, st_stride_check) | ||
91 | GEN_VEXT_TRANS(vssh_v, 1, rnfvm, st_stride_op, st_stride_check) | ||
92 | GEN_VEXT_TRANS(vssw_v, 2, rnfvm, st_stride_op, st_stride_check) | ||
93 | GEN_VEXT_TRANS(vsse_v, 3, rnfvm, st_stride_op, st_stride_check) | ||
94 | + | ||
95 | +/* | ||
96 | + *** index load and store | ||
97 | + */ | ||
98 | +typedef void gen_helper_ldst_index(TCGv_ptr, TCGv_ptr, TCGv, | ||
99 | + TCGv_ptr, TCGv_env, TCGv_i32); | ||
100 | + | ||
101 | +static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, | ||
102 | + uint32_t data, gen_helper_ldst_index *fn, | ||
103 | + DisasContext *s) | ||
104 | +{ | ||
105 | + TCGv_ptr dest, mask, index; | ||
106 | + TCGv base; | ||
107 | + TCGv_i32 desc; | ||
108 | + | ||
109 | + TCGLabel *over = gen_new_label(); | ||
110 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
111 | + | ||
112 | + dest = tcg_temp_new_ptr(); | ||
113 | + mask = tcg_temp_new_ptr(); | ||
114 | + index = tcg_temp_new_ptr(); | ||
115 | + base = tcg_temp_new(); | ||
116 | + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); | ||
117 | + | ||
118 | + gen_get_gpr(base, rs1); | ||
119 | + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); | ||
120 | + tcg_gen_addi_ptr(index, cpu_env, vreg_ofs(s, vs2)); | ||
121 | + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); | ||
122 | + | ||
123 | + fn(dest, mask, base, index, cpu_env, desc); | ||
124 | + | ||
125 | + tcg_temp_free_ptr(dest); | ||
126 | + tcg_temp_free_ptr(mask); | ||
127 | + tcg_temp_free_ptr(index); | ||
128 | + tcg_temp_free(base); | ||
129 | + tcg_temp_free_i32(desc); | ||
130 | + gen_set_label(over); | ||
131 | + return true; | ||
132 | +} | ||
133 | + | ||
134 | +static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) | ||
135 | +{ | ||
136 | + uint32_t data = 0; | ||
137 | + gen_helper_ldst_index *fn; | ||
138 | + static gen_helper_ldst_index * const fns[7][4] = { | ||
139 | + { gen_helper_vlxb_v_b, gen_helper_vlxb_v_h, | ||
140 | + gen_helper_vlxb_v_w, gen_helper_vlxb_v_d }, | ||
141 | + { NULL, gen_helper_vlxh_v_h, | ||
142 | + gen_helper_vlxh_v_w, gen_helper_vlxh_v_d }, | ||
143 | + { NULL, NULL, | ||
144 | + gen_helper_vlxw_v_w, gen_helper_vlxw_v_d }, | ||
145 | + { gen_helper_vlxe_v_b, gen_helper_vlxe_v_h, | ||
146 | + gen_helper_vlxe_v_w, gen_helper_vlxe_v_d }, | ||
147 | + { gen_helper_vlxbu_v_b, gen_helper_vlxbu_v_h, | ||
148 | + gen_helper_vlxbu_v_w, gen_helper_vlxbu_v_d }, | ||
149 | + { NULL, gen_helper_vlxhu_v_h, | ||
150 | + gen_helper_vlxhu_v_w, gen_helper_vlxhu_v_d }, | ||
151 | + { NULL, NULL, | ||
152 | + gen_helper_vlxwu_v_w, gen_helper_vlxwu_v_d }, | ||
153 | + }; | ||
154 | + | ||
155 | + fn = fns[seq][s->sew]; | ||
156 | + if (fn == NULL) { | ||
157 | + return false; | ||
158 | + } | ||
159 | + | ||
160 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | ||
161 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
162 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
163 | + data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
164 | + return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s); | ||
165 | +} | ||
166 | + | ||
167 | +static bool ld_index_check(DisasContext *s, arg_rnfvm* a) | ||
168 | +{ | ||
169 | + return (vext_check_isa_ill(s) && | ||
170 | + vext_check_overlap_mask(s, a->rd, a->vm, false) && | ||
171 | + vext_check_reg(s, a->rd, false) && | ||
172 | + vext_check_reg(s, a->rs2, false) && | ||
173 | + vext_check_nf(s, a->nf)); | ||
174 | +} | ||
175 | + | ||
176 | +GEN_VEXT_TRANS(vlxb_v, 0, rnfvm, ld_index_op, ld_index_check) | ||
177 | +GEN_VEXT_TRANS(vlxh_v, 1, rnfvm, ld_index_op, ld_index_check) | ||
178 | +GEN_VEXT_TRANS(vlxw_v, 2, rnfvm, ld_index_op, ld_index_check) | ||
179 | +GEN_VEXT_TRANS(vlxe_v, 3, rnfvm, ld_index_op, ld_index_check) | ||
180 | +GEN_VEXT_TRANS(vlxbu_v, 4, rnfvm, ld_index_op, ld_index_check) | ||
181 | +GEN_VEXT_TRANS(vlxhu_v, 5, rnfvm, ld_index_op, ld_index_check) | ||
182 | +GEN_VEXT_TRANS(vlxwu_v, 6, rnfvm, ld_index_op, ld_index_check) | ||
183 | + | ||
184 | +static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) | ||
185 | +{ | ||
186 | + uint32_t data = 0; | ||
187 | + gen_helper_ldst_index *fn; | ||
188 | + static gen_helper_ldst_index * const fns[4][4] = { | ||
189 | + { gen_helper_vsxb_v_b, gen_helper_vsxb_v_h, | ||
190 | + gen_helper_vsxb_v_w, gen_helper_vsxb_v_d }, | ||
191 | + { NULL, gen_helper_vsxh_v_h, | ||
192 | + gen_helper_vsxh_v_w, gen_helper_vsxh_v_d }, | ||
193 | + { NULL, NULL, | ||
194 | + gen_helper_vsxw_v_w, gen_helper_vsxw_v_d }, | ||
195 | + { gen_helper_vsxe_v_b, gen_helper_vsxe_v_h, | ||
196 | + gen_helper_vsxe_v_w, gen_helper_vsxe_v_d } | ||
197 | + }; | ||
198 | + | ||
199 | + fn = fns[seq][s->sew]; | ||
200 | + if (fn == NULL) { | ||
201 | + return false; | ||
202 | + } | ||
203 | + | ||
204 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); | ||
205 | + data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
206 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
207 | + data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
208 | + return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s); | ||
209 | +} | ||
210 | + | ||
211 | +static bool st_index_check(DisasContext *s, arg_rnfvm* a) | ||
212 | +{ | ||
213 | + return (vext_check_isa_ill(s) && | ||
214 | + vext_check_reg(s, a->rd, false) && | ||
215 | + vext_check_reg(s, a->rs2, false) && | ||
216 | + vext_check_nf(s, a->nf)); | ||
217 | +} | ||
218 | + | ||
219 | +GEN_VEXT_TRANS(vsxb_v, 0, rnfvm, st_index_op, st_index_check) | ||
220 | +GEN_VEXT_TRANS(vsxh_v, 1, rnfvm, st_index_op, st_index_check) | ||
221 | +GEN_VEXT_TRANS(vsxw_v, 2, rnfvm, st_index_op, st_index_check) | ||
222 | +GEN_VEXT_TRANS(vsxe_v, 3, rnfvm, st_index_op, st_index_check) | ||
223 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
224 | index XXXXXXX..XXXXXXX 100644 | ||
225 | --- a/target/riscv/vector_helper.c | ||
226 | +++ b/target/riscv/vector_helper.c | ||
227 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b) | ||
228 | GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h) | ||
229 | GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w) | ||
230 | GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d) | ||
231 | + | ||
232 | +/* | ||
233 | + *** index: access vector element from indexed memory | ||
234 | + */ | ||
235 | +typedef target_ulong vext_get_index_addr(target_ulong base, | ||
236 | + uint32_t idx, void *vs2); | ||
237 | + | ||
238 | +#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ | ||
239 | +static target_ulong NAME(target_ulong base, \ | ||
240 | + uint32_t idx, void *vs2) \ | ||
241 | +{ \ | ||
242 | + return (base + *((ETYPE *)vs2 + H(idx))); \ | ||
243 | +} | ||
244 | + | ||
245 | +GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t, H1) | ||
246 | +GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2) | ||
247 | +GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4) | ||
248 | +GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8) | ||
249 | + | ||
250 | +static inline void | ||
251 | +vext_ldst_index(void *vd, void *v0, target_ulong base, | ||
252 | + void *vs2, CPURISCVState *env, uint32_t desc, | ||
253 | + vext_get_index_addr get_index_addr, | ||
254 | + vext_ldst_elem_fn *ldst_elem, | ||
255 | + clear_fn *clear_elem, | ||
256 | + uint32_t esz, uint32_t msz, uintptr_t ra, | ||
257 | + MMUAccessType access_type) | ||
258 | +{ | ||
259 | + uint32_t i, k; | ||
260 | + uint32_t nf = vext_nf(desc); | ||
261 | + uint32_t vm = vext_vm(desc); | ||
262 | + uint32_t mlen = vext_mlen(desc); | ||
263 | + uint32_t vlmax = vext_maxsz(desc) / esz; | ||
264 | + | ||
265 | + /* probe every access*/ | ||
266 | + for (i = 0; i < env->vl; i++) { | ||
267 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | ||
268 | + continue; | ||
269 | + } | ||
270 | + probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra, | ||
271 | + access_type); | ||
272 | + } | ||
273 | + /* load bytes from guest memory */ | ||
274 | + for (i = 0; i < env->vl; i++) { | ||
275 | + k = 0; | ||
276 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { | ||
277 | + continue; | ||
278 | + } | ||
279 | + while (k < nf) { | ||
280 | + abi_ptr addr = get_index_addr(base, i, vs2) + k * msz; | ||
281 | + ldst_elem(env, addr, i + k * vlmax, vd, ra); | ||
282 | + k++; | ||
283 | + } | ||
284 | + } | ||
285 | + /* clear tail elements */ | ||
286 | + if (clear_elem) { | ||
287 | + for (k = 0; k < nf; k++) { | ||
288 | + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); | ||
289 | + } | ||
290 | + } | ||
291 | +} | ||
292 | + | ||
293 | +#define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN, CLEAR_FN) \ | ||
294 | +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
295 | + void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
296 | +{ \ | ||
297 | + vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ | ||
298 | + LOAD_FN, CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ | ||
299 | + GETPC(), MMU_DATA_LOAD); \ | ||
300 | +} | ||
301 | + | ||
302 | +GEN_VEXT_LD_INDEX(vlxb_v_b, int8_t, int8_t, idx_b, ldb_b, clearb) | ||
303 | +GEN_VEXT_LD_INDEX(vlxb_v_h, int8_t, int16_t, idx_h, ldb_h, clearh) | ||
304 | +GEN_VEXT_LD_INDEX(vlxb_v_w, int8_t, int32_t, idx_w, ldb_w, clearl) | ||
305 | +GEN_VEXT_LD_INDEX(vlxb_v_d, int8_t, int64_t, idx_d, ldb_d, clearq) | ||
306 | +GEN_VEXT_LD_INDEX(vlxh_v_h, int16_t, int16_t, idx_h, ldh_h, clearh) | ||
307 | +GEN_VEXT_LD_INDEX(vlxh_v_w, int16_t, int32_t, idx_w, ldh_w, clearl) | ||
308 | +GEN_VEXT_LD_INDEX(vlxh_v_d, int16_t, int64_t, idx_d, ldh_d, clearq) | ||
309 | +GEN_VEXT_LD_INDEX(vlxw_v_w, int32_t, int32_t, idx_w, ldw_w, clearl) | ||
310 | +GEN_VEXT_LD_INDEX(vlxw_v_d, int32_t, int64_t, idx_d, ldw_d, clearq) | ||
311 | +GEN_VEXT_LD_INDEX(vlxe_v_b, int8_t, int8_t, idx_b, lde_b, clearb) | ||
312 | +GEN_VEXT_LD_INDEX(vlxe_v_h, int16_t, int16_t, idx_h, lde_h, clearh) | ||
313 | +GEN_VEXT_LD_INDEX(vlxe_v_w, int32_t, int32_t, idx_w, lde_w, clearl) | ||
314 | +GEN_VEXT_LD_INDEX(vlxe_v_d, int64_t, int64_t, idx_d, lde_d, clearq) | ||
315 | +GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t, uint8_t, idx_b, ldbu_b, clearb) | ||
316 | +GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t, uint16_t, idx_h, ldbu_h, clearh) | ||
317 | +GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t, uint32_t, idx_w, ldbu_w, clearl) | ||
318 | +GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t, uint64_t, idx_d, ldbu_d, clearq) | ||
319 | +GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h, clearh) | ||
320 | +GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w, clearl) | ||
321 | +GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d, clearq) | ||
322 | +GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w, clearl) | ||
323 | +GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d, clearq) | ||
324 | + | ||
325 | +#define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\ | ||
326 | +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
327 | + void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
328 | +{ \ | ||
329 | + vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ | ||
330 | + STORE_FN, NULL, sizeof(ETYPE), sizeof(MTYPE),\ | ||
331 | + GETPC(), MMU_DATA_STORE); \ | ||
332 | +} | ||
333 | + | ||
334 | +GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t, int8_t, idx_b, stb_b) | ||
335 | +GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t, int16_t, idx_h, stb_h) | ||
336 | +GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t, int32_t, idx_w, stb_w) | ||
337 | +GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t, int64_t, idx_d, stb_d) | ||
338 | +GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h) | ||
339 | +GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w) | ||
340 | +GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d) | ||
341 | +GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w) | ||
342 | +GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d) | ||
343 | +GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b) | ||
344 | +GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h) | ||
345 | +GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w) | ||
346 | +GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d) | ||
347 | -- | ||
348 | 2.27.0 | ||
349 | |||
350 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-14-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 25 ++++++++++++ | ||
10 | target/riscv/insn32.decode | 9 +++++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 11 ++++++ | ||
12 | target/riscv/vector_helper.c | 51 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 96 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) | ||
20 | DEF_HELPER_6(vmsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vmsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vmsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vand_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vand_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vand_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vand_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vor_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vor_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vor_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vor_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vxor_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vxor_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vxor_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vxor_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vand_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vand_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vand_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vand_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vor_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vor_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vor_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vor_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vxor_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vxor_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vxor_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vxor_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
48 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/riscv/insn32.decode | ||
51 | +++ b/target/riscv/insn32.decode | ||
52 | @@ -XXX,XX +XXX,XX @@ vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r_vm_1 | ||
53 | vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r_vm_1 | ||
54 | vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r_vm_1 | ||
55 | vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r_vm_1 | ||
56 | +vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm | ||
57 | +vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm | ||
58 | +vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm | ||
59 | +vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm | ||
60 | +vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm | ||
61 | +vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm | ||
62 | +vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm | ||
63 | +vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm | ||
64 | +vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm | ||
65 | |||
66 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
67 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
68 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
71 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
72 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
73 | |||
74 | GEN_OPIVI_TRANS(vadc_vim, 0, vadc_vxm, opivx_vadc_check) | ||
75 | GEN_OPIVI_TRANS(vmadc_vim, 0, vmadc_vxm, opivx_vmadc_check) | ||
76 | + | ||
77 | +/* Vector Bitwise Logical Instructions */ | ||
78 | +GEN_OPIVV_GVEC_TRANS(vand_vv, and) | ||
79 | +GEN_OPIVV_GVEC_TRANS(vor_vv, or) | ||
80 | +GEN_OPIVV_GVEC_TRANS(vxor_vv, xor) | ||
81 | +GEN_OPIVX_GVEC_TRANS(vand_vx, ands) | ||
82 | +GEN_OPIVX_GVEC_TRANS(vor_vx, ors) | ||
83 | +GEN_OPIVX_GVEC_TRANS(vxor_vx, xors) | ||
84 | +GEN_OPIVI_GVEC_TRANS(vand_vi, 0, vand_vx, andi) | ||
85 | +GEN_OPIVI_GVEC_TRANS(vor_vi, 0, vor_vx, ori) | ||
86 | +GEN_OPIVI_GVEC_TRANS(vxor_vi, 0, vxor_vx, xori) | ||
87 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/target/riscv/vector_helper.c | ||
90 | +++ b/target/riscv/vector_helper.c | ||
91 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) | ||
92 | GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) | ||
93 | GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) | ||
94 | GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) | ||
95 | + | ||
96 | +/* Vector Bitwise Logical Instructions */ | ||
97 | +RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) | ||
98 | +RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) | ||
99 | +RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) | ||
100 | +RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) | ||
101 | +RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) | ||
102 | +RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) | ||
103 | +RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) | ||
104 | +RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) | ||
105 | +RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) | ||
106 | +RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) | ||
107 | +RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) | ||
108 | +RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) | ||
109 | +GEN_VEXT_VV(vand_vv_b, 1, 1, clearb) | ||
110 | +GEN_VEXT_VV(vand_vv_h, 2, 2, clearh) | ||
111 | +GEN_VEXT_VV(vand_vv_w, 4, 4, clearl) | ||
112 | +GEN_VEXT_VV(vand_vv_d, 8, 8, clearq) | ||
113 | +GEN_VEXT_VV(vor_vv_b, 1, 1, clearb) | ||
114 | +GEN_VEXT_VV(vor_vv_h, 2, 2, clearh) | ||
115 | +GEN_VEXT_VV(vor_vv_w, 4, 4, clearl) | ||
116 | +GEN_VEXT_VV(vor_vv_d, 8, 8, clearq) | ||
117 | +GEN_VEXT_VV(vxor_vv_b, 1, 1, clearb) | ||
118 | +GEN_VEXT_VV(vxor_vv_h, 2, 2, clearh) | ||
119 | +GEN_VEXT_VV(vxor_vv_w, 4, 4, clearl) | ||
120 | +GEN_VEXT_VV(vxor_vv_d, 8, 8, clearq) | ||
121 | + | ||
122 | +RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) | ||
123 | +RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) | ||
124 | +RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) | ||
125 | +RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) | ||
126 | +RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) | ||
127 | +RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) | ||
128 | +RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) | ||
129 | +RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) | ||
130 | +RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) | ||
131 | +RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) | ||
132 | +RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) | ||
133 | +RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) | ||
134 | +GEN_VEXT_VX(vand_vx_b, 1, 1, clearb) | ||
135 | +GEN_VEXT_VX(vand_vx_h, 2, 2, clearh) | ||
136 | +GEN_VEXT_VX(vand_vx_w, 4, 4, clearl) | ||
137 | +GEN_VEXT_VX(vand_vx_d, 8, 8, clearq) | ||
138 | +GEN_VEXT_VX(vor_vx_b, 1, 1, clearb) | ||
139 | +GEN_VEXT_VX(vor_vx_h, 2, 2, clearh) | ||
140 | +GEN_VEXT_VX(vor_vx_w, 4, 4, clearl) | ||
141 | +GEN_VEXT_VX(vor_vx_d, 8, 8, clearq) | ||
142 | +GEN_VEXT_VX(vxor_vx_b, 1, 1, clearb) | ||
143 | +GEN_VEXT_VX(vxor_vx_h, 2, 2, clearh) | ||
144 | +GEN_VEXT_VX(vxor_vx_w, 4, 4, clearl) | ||
145 | +GEN_VEXT_VX(vxor_vx_d, 8, 8, clearq) | ||
146 | -- | ||
147 | 2.27.0 | ||
148 | |||
149 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-16-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 13 ++++ | ||
10 | target/riscv/insn32.decode | 6 ++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 90 +++++++++++++++++++++++++ | ||
12 | target/riscv/vector_helper.c | 14 ++++ | ||
13 | 4 files changed, 123 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vsra_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
20 | DEF_HELPER_6(vsra_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vsra_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vsra_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vnsrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vnsrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vnsrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vnsra_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vnsra_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vnsra_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vnsrl_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vnsrl_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vnsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vnsra_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vnsra_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vnsra_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
36 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/riscv/insn32.decode | ||
39 | +++ b/target/riscv/insn32.decode | ||
40 | @@ -XXX,XX +XXX,XX @@ vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm | ||
41 | vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm | ||
42 | vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm | ||
43 | vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm | ||
44 | +vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm | ||
45 | +vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm | ||
46 | +vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm | ||
47 | +vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm | ||
48 | +vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm | ||
49 | +vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm | ||
50 | |||
51 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
52 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
53 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
56 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
57 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx, sars) | ||
58 | GEN_OPIVI_GVEC_TRANS(vsll_vi, 1, vsll_vx, shli) | ||
59 | GEN_OPIVI_GVEC_TRANS(vsrl_vi, 1, vsrl_vx, shri) | ||
60 | GEN_OPIVI_GVEC_TRANS(vsra_vi, 1, vsra_vx, sari) | ||
61 | + | ||
62 | +/* Vector Narrowing Integer Right Shift Instructions */ | ||
63 | +static bool opivv_narrow_check(DisasContext *s, arg_rmrr *a) | ||
64 | +{ | ||
65 | + return (vext_check_isa_ill(s) && | ||
66 | + vext_check_overlap_mask(s, a->rd, a->vm, false) && | ||
67 | + vext_check_reg(s, a->rd, false) && | ||
68 | + vext_check_reg(s, a->rs2, true) && | ||
69 | + vext_check_reg(s, a->rs1, false) && | ||
70 | + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, | ||
71 | + 2 << s->lmul) && | ||
72 | + (s->lmul < 0x3) && (s->sew < 0x3)); | ||
73 | +} | ||
74 | + | ||
75 | +/* OPIVV with NARROW */ | ||
76 | +#define GEN_OPIVV_NARROW_TRANS(NAME) \ | ||
77 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
78 | +{ \ | ||
79 | + if (opivv_narrow_check(s, a)) { \ | ||
80 | + uint32_t data = 0; \ | ||
81 | + static gen_helper_gvec_4_ptr * const fns[3] = { \ | ||
82 | + gen_helper_##NAME##_b, \ | ||
83 | + gen_helper_##NAME##_h, \ | ||
84 | + gen_helper_##NAME##_w, \ | ||
85 | + }; \ | ||
86 | + TCGLabel *over = gen_new_label(); \ | ||
87 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
88 | + \ | ||
89 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ | ||
90 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
91 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
92 | + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
93 | + vreg_ofs(s, a->rs1), \ | ||
94 | + vreg_ofs(s, a->rs2), cpu_env, 0, \ | ||
95 | + s->vlen / 8, data, fns[s->sew]); \ | ||
96 | + gen_set_label(over); \ | ||
97 | + return true; \ | ||
98 | + } \ | ||
99 | + return false; \ | ||
100 | +} | ||
101 | +GEN_OPIVV_NARROW_TRANS(vnsra_vv) | ||
102 | +GEN_OPIVV_NARROW_TRANS(vnsrl_vv) | ||
103 | + | ||
104 | +static bool opivx_narrow_check(DisasContext *s, arg_rmrr *a) | ||
105 | +{ | ||
106 | + return (vext_check_isa_ill(s) && | ||
107 | + vext_check_overlap_mask(s, a->rd, a->vm, false) && | ||
108 | + vext_check_reg(s, a->rd, false) && | ||
109 | + vext_check_reg(s, a->rs2, true) && | ||
110 | + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, | ||
111 | + 2 << s->lmul) && | ||
112 | + (s->lmul < 0x3) && (s->sew < 0x3)); | ||
113 | +} | ||
114 | + | ||
115 | +/* OPIVX with NARROW */ | ||
116 | +#define GEN_OPIVX_NARROW_TRANS(NAME) \ | ||
117 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
118 | +{ \ | ||
119 | + if (opivx_narrow_check(s, a)) { \ | ||
120 | + static gen_helper_opivx * const fns[3] = { \ | ||
121 | + gen_helper_##NAME##_b, \ | ||
122 | + gen_helper_##NAME##_h, \ | ||
123 | + gen_helper_##NAME##_w, \ | ||
124 | + }; \ | ||
125 | + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ | ||
126 | + } \ | ||
127 | + return false; \ | ||
128 | +} | ||
129 | + | ||
130 | +GEN_OPIVX_NARROW_TRANS(vnsra_vx) | ||
131 | +GEN_OPIVX_NARROW_TRANS(vnsrl_vx) | ||
132 | + | ||
133 | +/* OPIVI with NARROW */ | ||
134 | +#define GEN_OPIVI_NARROW_TRANS(NAME, ZX, OPIVX) \ | ||
135 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
136 | +{ \ | ||
137 | + if (opivx_narrow_check(s, a)) { \ | ||
138 | + static gen_helper_opivx * const fns[3] = { \ | ||
139 | + gen_helper_##OPIVX##_b, \ | ||
140 | + gen_helper_##OPIVX##_h, \ | ||
141 | + gen_helper_##OPIVX##_w, \ | ||
142 | + }; \ | ||
143 | + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ | ||
144 | + fns[s->sew], s, ZX); \ | ||
145 | + } \ | ||
146 | + return false; \ | ||
147 | +} | ||
148 | + | ||
149 | +GEN_OPIVI_NARROW_TRANS(vnsra_vi, 1, vnsra_vx) | ||
150 | +GEN_OPIVI_NARROW_TRANS(vnsrl_vi, 1, vnsrl_vx) | ||
151 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
152 | index XXXXXXX..XXXXXXX 100644 | ||
153 | --- a/target/riscv/vector_helper.c | ||
154 | +++ b/target/riscv/vector_helper.c | ||
155 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb) | ||
156 | GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh) | ||
157 | GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl) | ||
158 | GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq) | ||
159 | + | ||
160 | +/* Vector Narrowing Integer Right Shift Instructions */ | ||
161 | +GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb) | ||
162 | +GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh) | ||
163 | +GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl) | ||
164 | +GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb) | ||
165 | +GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh) | ||
166 | +GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl) | ||
167 | +GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb) | ||
168 | +GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh) | ||
169 | +GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl) | ||
170 | +GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb) | ||
171 | +GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh) | ||
172 | +GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl) | ||
173 | -- | ||
174 | 2.27.0 | ||
175 | |||
176 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-17-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 57 +++++++++++ | ||
10 | target/riscv/insn32.decode | 20 ++++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 46 +++++++++ | ||
12 | target/riscv/vector_helper.c | 123 ++++++++++++++++++++++++ | ||
13 | 4 files changed, 246 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vnsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
20 | DEF_HELPER_6(vnsra_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vnsra_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vnsra_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vmseq_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vmseq_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vmseq_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vmseq_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vmsne_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vmsne_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vmsne_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vmsne_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vmsltu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vmsltu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vmsltu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vmsltu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vmslt_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vmslt_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vmslt_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vmslt_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vmsleu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vmsleu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vmsleu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vmsleu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vmsle_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vmsle_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vmsle_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vmsle_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
48 | +DEF_HELPER_6(vmseq_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
49 | +DEF_HELPER_6(vmseq_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
50 | +DEF_HELPER_6(vmseq_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
51 | +DEF_HELPER_6(vmseq_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
52 | +DEF_HELPER_6(vmsne_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
53 | +DEF_HELPER_6(vmsne_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
54 | +DEF_HELPER_6(vmsne_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
55 | +DEF_HELPER_6(vmsne_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
56 | +DEF_HELPER_6(vmsltu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
57 | +DEF_HELPER_6(vmsltu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
58 | +DEF_HELPER_6(vmsltu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
59 | +DEF_HELPER_6(vmsltu_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
60 | +DEF_HELPER_6(vmslt_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
61 | +DEF_HELPER_6(vmslt_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
62 | +DEF_HELPER_6(vmslt_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
63 | +DEF_HELPER_6(vmslt_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
64 | +DEF_HELPER_6(vmsleu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
65 | +DEF_HELPER_6(vmsleu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
66 | +DEF_HELPER_6(vmsleu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
67 | +DEF_HELPER_6(vmsleu_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
68 | +DEF_HELPER_6(vmsle_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
69 | +DEF_HELPER_6(vmsle_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
70 | +DEF_HELPER_6(vmsle_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
71 | +DEF_HELPER_6(vmsle_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
72 | +DEF_HELPER_6(vmsgtu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
73 | +DEF_HELPER_6(vmsgtu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
74 | +DEF_HELPER_6(vmsgtu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
75 | +DEF_HELPER_6(vmsgtu_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
76 | +DEF_HELPER_6(vmsgt_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
77 | +DEF_HELPER_6(vmsgt_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
78 | +DEF_HELPER_6(vmsgt_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
79 | +DEF_HELPER_6(vmsgt_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
80 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
81 | index XXXXXXX..XXXXXXX 100644 | ||
82 | --- a/target/riscv/insn32.decode | ||
83 | +++ b/target/riscv/insn32.decode | ||
84 | @@ -XXX,XX +XXX,XX @@ vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm | ||
85 | vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm | ||
86 | vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm | ||
87 | vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm | ||
88 | +vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm | ||
89 | +vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm | ||
90 | +vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm | ||
91 | +vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm | ||
92 | +vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm | ||
93 | +vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm | ||
94 | +vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm | ||
95 | +vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm | ||
96 | +vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm | ||
97 | +vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm | ||
98 | +vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm | ||
99 | +vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm | ||
100 | +vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm | ||
101 | +vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm | ||
102 | +vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm | ||
103 | +vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm | ||
104 | +vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm | ||
105 | +vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm | ||
106 | +vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm | ||
107 | +vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm | ||
108 | |||
109 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
110 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
111 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
112 | index XXXXXXX..XXXXXXX 100644 | ||
113 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
114 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
115 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
116 | |||
117 | GEN_OPIVI_NARROW_TRANS(vnsra_vi, 1, vnsra_vx) | ||
118 | GEN_OPIVI_NARROW_TRANS(vnsrl_vi, 1, vnsrl_vx) | ||
119 | + | ||
120 | +/* Vector Integer Comparison Instructions */ | ||
121 | +/* | ||
122 | + * For all comparison instructions, an illegal instruction exception is raised | ||
123 | + * if the destination vector register overlaps a source vector register group | ||
124 | + * and LMUL > 1. | ||
125 | + */ | ||
126 | +static bool opivv_cmp_check(DisasContext *s, arg_rmrr *a) | ||
127 | +{ | ||
128 | + return (vext_check_isa_ill(s) && | ||
129 | + vext_check_reg(s, a->rs2, false) && | ||
130 | + vext_check_reg(s, a->rs1, false) && | ||
131 | + ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) && | ||
132 | + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) || | ||
133 | + (s->lmul == 0))); | ||
134 | +} | ||
135 | +GEN_OPIVV_TRANS(vmseq_vv, opivv_cmp_check) | ||
136 | +GEN_OPIVV_TRANS(vmsne_vv, opivv_cmp_check) | ||
137 | +GEN_OPIVV_TRANS(vmsltu_vv, opivv_cmp_check) | ||
138 | +GEN_OPIVV_TRANS(vmslt_vv, opivv_cmp_check) | ||
139 | +GEN_OPIVV_TRANS(vmsleu_vv, opivv_cmp_check) | ||
140 | +GEN_OPIVV_TRANS(vmsle_vv, opivv_cmp_check) | ||
141 | + | ||
142 | +static bool opivx_cmp_check(DisasContext *s, arg_rmrr *a) | ||
143 | +{ | ||
144 | + return (vext_check_isa_ill(s) && | ||
145 | + vext_check_reg(s, a->rs2, false) && | ||
146 | + (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) || | ||
147 | + (s->lmul == 0))); | ||
148 | +} | ||
149 | + | ||
150 | +GEN_OPIVX_TRANS(vmseq_vx, opivx_cmp_check) | ||
151 | +GEN_OPIVX_TRANS(vmsne_vx, opivx_cmp_check) | ||
152 | +GEN_OPIVX_TRANS(vmsltu_vx, opivx_cmp_check) | ||
153 | +GEN_OPIVX_TRANS(vmslt_vx, opivx_cmp_check) | ||
154 | +GEN_OPIVX_TRANS(vmsleu_vx, opivx_cmp_check) | ||
155 | +GEN_OPIVX_TRANS(vmsle_vx, opivx_cmp_check) | ||
156 | +GEN_OPIVX_TRANS(vmsgtu_vx, opivx_cmp_check) | ||
157 | +GEN_OPIVX_TRANS(vmsgt_vx, opivx_cmp_check) | ||
158 | + | ||
159 | +GEN_OPIVI_TRANS(vmseq_vi, 0, vmseq_vx, opivx_cmp_check) | ||
160 | +GEN_OPIVI_TRANS(vmsne_vi, 0, vmsne_vx, opivx_cmp_check) | ||
161 | +GEN_OPIVI_TRANS(vmsleu_vi, 1, vmsleu_vx, opivx_cmp_check) | ||
162 | +GEN_OPIVI_TRANS(vmsle_vi, 0, vmsle_vx, opivx_cmp_check) | ||
163 | +GEN_OPIVI_TRANS(vmsgtu_vi, 1, vmsgtu_vx, opivx_cmp_check) | ||
164 | +GEN_OPIVI_TRANS(vmsgt_vi, 0, vmsgt_vx, opivx_cmp_check) | ||
165 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
166 | index XXXXXXX..XXXXXXX 100644 | ||
167 | --- a/target/riscv/vector_helper.c | ||
168 | +++ b/target/riscv/vector_helper.c | ||
169 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl) | ||
170 | GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb) | ||
171 | GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh) | ||
172 | GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl) | ||
173 | + | ||
174 | +/* Vector Integer Comparison Instructions */ | ||
175 | +#define DO_MSEQ(N, M) (N == M) | ||
176 | +#define DO_MSNE(N, M) (N != M) | ||
177 | +#define DO_MSLT(N, M) (N < M) | ||
178 | +#define DO_MSLE(N, M) (N <= M) | ||
179 | +#define DO_MSGT(N, M) (N > M) | ||
180 | + | ||
181 | +#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ | ||
182 | +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
183 | + CPURISCVState *env, uint32_t desc) \ | ||
184 | +{ \ | ||
185 | + uint32_t mlen = vext_mlen(desc); \ | ||
186 | + uint32_t vm = vext_vm(desc); \ | ||
187 | + uint32_t vl = env->vl; \ | ||
188 | + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ | ||
189 | + uint32_t i; \ | ||
190 | + \ | ||
191 | + for (i = 0; i < vl; i++) { \ | ||
192 | + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ | ||
193 | + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ | ||
194 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
195 | + continue; \ | ||
196 | + } \ | ||
197 | + vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1)); \ | ||
198 | + } \ | ||
199 | + for (; i < vlmax; i++) { \ | ||
200 | + vext_set_elem_mask(vd, mlen, i, 0); \ | ||
201 | + } \ | ||
202 | +} | ||
203 | + | ||
204 | +GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) | ||
205 | +GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) | ||
206 | +GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) | ||
207 | +GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) | ||
208 | + | ||
209 | +GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) | ||
210 | +GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) | ||
211 | +GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) | ||
212 | +GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) | ||
213 | + | ||
214 | +GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) | ||
215 | +GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) | ||
216 | +GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) | ||
217 | +GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) | ||
218 | + | ||
219 | +GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) | ||
220 | +GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) | ||
221 | +GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) | ||
222 | +GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) | ||
223 | + | ||
224 | +GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) | ||
225 | +GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) | ||
226 | +GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) | ||
227 | +GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) | ||
228 | + | ||
229 | +GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) | ||
230 | +GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) | ||
231 | +GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) | ||
232 | +GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) | ||
233 | + | ||
234 | +#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ | ||
235 | +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
236 | + CPURISCVState *env, uint32_t desc) \ | ||
237 | +{ \ | ||
238 | + uint32_t mlen = vext_mlen(desc); \ | ||
239 | + uint32_t vm = vext_vm(desc); \ | ||
240 | + uint32_t vl = env->vl; \ | ||
241 | + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ | ||
242 | + uint32_t i; \ | ||
243 | + \ | ||
244 | + for (i = 0; i < vl; i++) { \ | ||
245 | + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ | ||
246 | + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ | ||
247 | + continue; \ | ||
248 | + } \ | ||
249 | + vext_set_elem_mask(vd, mlen, i, \ | ||
250 | + DO_OP(s2, (ETYPE)(target_long)s1)); \ | ||
251 | + } \ | ||
252 | + for (; i < vlmax; i++) { \ | ||
253 | + vext_set_elem_mask(vd, mlen, i, 0); \ | ||
254 | + } \ | ||
255 | +} | ||
256 | + | ||
257 | +GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) | ||
258 | +GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) | ||
259 | +GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) | ||
260 | +GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) | ||
261 | + | ||
262 | +GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) | ||
263 | +GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) | ||
264 | +GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) | ||
265 | +GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) | ||
266 | + | ||
267 | +GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) | ||
268 | +GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) | ||
269 | +GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) | ||
270 | +GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) | ||
271 | + | ||
272 | +GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) | ||
273 | +GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) | ||
274 | +GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) | ||
275 | +GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) | ||
276 | + | ||
277 | +GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) | ||
278 | +GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) | ||
279 | +GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) | ||
280 | +GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) | ||
281 | + | ||
282 | +GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) | ||
283 | +GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) | ||
284 | +GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) | ||
285 | +GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) | ||
286 | + | ||
287 | +GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) | ||
288 | +GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) | ||
289 | +GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) | ||
290 | +GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) | ||
291 | + | ||
292 | +GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) | ||
293 | +GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) | ||
294 | +GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) | ||
295 | +GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) | ||
296 | -- | ||
297 | 2.27.0 | ||
298 | |||
299 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-18-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 33 ++++++++++++ | ||
10 | target/riscv/insn32.decode | 8 +++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 10 ++++ | ||
12 | target/riscv/vector_helper.c | 71 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 122 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmsgt_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
20 | DEF_HELPER_6(vmsgt_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vmsgt_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vmsgt_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vminu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vminu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vminu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vminu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vmin_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vmaxu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vmaxu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vmaxu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vmaxu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vmax_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vmax_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vmax_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vmax_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vminu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vminu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vminu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vminu_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vmin_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vmin_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vmin_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vmin_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
48 | +DEF_HELPER_6(vmaxu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
49 | +DEF_HELPER_6(vmaxu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
50 | +DEF_HELPER_6(vmaxu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
51 | +DEF_HELPER_6(vmaxu_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
52 | +DEF_HELPER_6(vmax_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
53 | +DEF_HELPER_6(vmax_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
54 | +DEF_HELPER_6(vmax_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
55 | +DEF_HELPER_6(vmax_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
56 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/riscv/insn32.decode | ||
59 | +++ b/target/riscv/insn32.decode | ||
60 | @@ -XXX,XX +XXX,XX @@ vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm | ||
61 | vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm | ||
62 | vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm | ||
63 | vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm | ||
64 | +vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm | ||
65 | +vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm | ||
66 | +vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm | ||
67 | +vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm | ||
68 | +vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm | ||
69 | +vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm | ||
70 | +vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm | ||
71 | +vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm | ||
72 | |||
73 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
74 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
75 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
78 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
79 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVI_TRANS(vmsleu_vi, 1, vmsleu_vx, opivx_cmp_check) | ||
80 | GEN_OPIVI_TRANS(vmsle_vi, 0, vmsle_vx, opivx_cmp_check) | ||
81 | GEN_OPIVI_TRANS(vmsgtu_vi, 1, vmsgtu_vx, opivx_cmp_check) | ||
82 | GEN_OPIVI_TRANS(vmsgt_vi, 0, vmsgt_vx, opivx_cmp_check) | ||
83 | + | ||
84 | +/* Vector Integer Min/Max Instructions */ | ||
85 | +GEN_OPIVV_GVEC_TRANS(vminu_vv, umin) | ||
86 | +GEN_OPIVV_GVEC_TRANS(vmin_vv, smin) | ||
87 | +GEN_OPIVV_GVEC_TRANS(vmaxu_vv, umax) | ||
88 | +GEN_OPIVV_GVEC_TRANS(vmax_vv, smax) | ||
89 | +GEN_OPIVX_TRANS(vminu_vx, opivx_check) | ||
90 | +GEN_OPIVX_TRANS(vmin_vx, opivx_check) | ||
91 | +GEN_OPIVX_TRANS(vmaxu_vx, opivx_check) | ||
92 | +GEN_OPIVX_TRANS(vmax_vx, opivx_check) | ||
93 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/target/riscv/vector_helper.c | ||
96 | +++ b/target/riscv/vector_helper.c | ||
97 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) | ||
98 | #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t | ||
99 | #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t | ||
100 | #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t | ||
101 | +#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t | ||
102 | +#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t | ||
103 | +#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t | ||
104 | +#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t | ||
105 | |||
106 | /* operation of two vector elements */ | ||
107 | typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); | ||
108 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) | ||
109 | GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) | ||
110 | GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) | ||
111 | GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) | ||
112 | + | ||
113 | +/* Vector Integer Min/Max Instructions */ | ||
114 | +RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) | ||
115 | +RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) | ||
116 | +RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) | ||
117 | +RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) | ||
118 | +RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) | ||
119 | +RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) | ||
120 | +RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) | ||
121 | +RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) | ||
122 | +RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) | ||
123 | +RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) | ||
124 | +RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) | ||
125 | +RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) | ||
126 | +RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) | ||
127 | +RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) | ||
128 | +RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) | ||
129 | +RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) | ||
130 | +GEN_VEXT_VV(vminu_vv_b, 1, 1, clearb) | ||
131 | +GEN_VEXT_VV(vminu_vv_h, 2, 2, clearh) | ||
132 | +GEN_VEXT_VV(vminu_vv_w, 4, 4, clearl) | ||
133 | +GEN_VEXT_VV(vminu_vv_d, 8, 8, clearq) | ||
134 | +GEN_VEXT_VV(vmin_vv_b, 1, 1, clearb) | ||
135 | +GEN_VEXT_VV(vmin_vv_h, 2, 2, clearh) | ||
136 | +GEN_VEXT_VV(vmin_vv_w, 4, 4, clearl) | ||
137 | +GEN_VEXT_VV(vmin_vv_d, 8, 8, clearq) | ||
138 | +GEN_VEXT_VV(vmaxu_vv_b, 1, 1, clearb) | ||
139 | +GEN_VEXT_VV(vmaxu_vv_h, 2, 2, clearh) | ||
140 | +GEN_VEXT_VV(vmaxu_vv_w, 4, 4, clearl) | ||
141 | +GEN_VEXT_VV(vmaxu_vv_d, 8, 8, clearq) | ||
142 | +GEN_VEXT_VV(vmax_vv_b, 1, 1, clearb) | ||
143 | +GEN_VEXT_VV(vmax_vv_h, 2, 2, clearh) | ||
144 | +GEN_VEXT_VV(vmax_vv_w, 4, 4, clearl) | ||
145 | +GEN_VEXT_VV(vmax_vv_d, 8, 8, clearq) | ||
146 | + | ||
147 | +RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) | ||
148 | +RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) | ||
149 | +RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) | ||
150 | +RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) | ||
151 | +RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) | ||
152 | +RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) | ||
153 | +RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) | ||
154 | +RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) | ||
155 | +RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) | ||
156 | +RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) | ||
157 | +RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) | ||
158 | +RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) | ||
159 | +RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) | ||
160 | +RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) | ||
161 | +RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) | ||
162 | +RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) | ||
163 | +GEN_VEXT_VX(vminu_vx_b, 1, 1, clearb) | ||
164 | +GEN_VEXT_VX(vminu_vx_h, 2, 2, clearh) | ||
165 | +GEN_VEXT_VX(vminu_vx_w, 4, 4, clearl) | ||
166 | +GEN_VEXT_VX(vminu_vx_d, 8, 8, clearq) | ||
167 | +GEN_VEXT_VX(vmin_vx_b, 1, 1, clearb) | ||
168 | +GEN_VEXT_VX(vmin_vx_h, 2, 2, clearh) | ||
169 | +GEN_VEXT_VX(vmin_vx_w, 4, 4, clearl) | ||
170 | +GEN_VEXT_VX(vmin_vx_d, 8, 8, clearq) | ||
171 | +GEN_VEXT_VX(vmaxu_vx_b, 1, 1, clearb) | ||
172 | +GEN_VEXT_VX(vmaxu_vx_h, 2, 2, clearh) | ||
173 | +GEN_VEXT_VX(vmaxu_vx_w, 4, 4, clearl) | ||
174 | +GEN_VEXT_VX(vmaxu_vx_d, 8, 8, clearq) | ||
175 | +GEN_VEXT_VX(vmax_vx_b, 1, 1, clearb) | ||
176 | +GEN_VEXT_VX(vmax_vx_h, 2, 2, clearh) | ||
177 | +GEN_VEXT_VX(vmax_vx_w, 4, 4, clearl) | ||
178 | +GEN_VEXT_VX(vmax_vx_d, 8, 8, clearq) | ||
179 | -- | ||
180 | 2.27.0 | ||
181 | |||
182 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-id: 20200623215920.2594-19-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 33 +++++ | ||
10 | target/riscv/insn32.decode | 8 ++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 10 ++ | ||
12 | target/riscv/vector_helper.c | 163 ++++++++++++++++++++++++ | ||
13 | 4 files changed, 214 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmax_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
20 | DEF_HELPER_6(vmax_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vmax_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vmax_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vmulh_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vmulh_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vmulh_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vmulh_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vmulhu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vmulhu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vmulhu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vmulhu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vmulhsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vmulhsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vmulhsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vmulhsu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vmulh_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vmulh_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vmulh_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vmulh_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
48 | +DEF_HELPER_6(vmulhu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
49 | +DEF_HELPER_6(vmulhu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
50 | +DEF_HELPER_6(vmulhu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
51 | +DEF_HELPER_6(vmulhu_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
52 | +DEF_HELPER_6(vmulhsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
53 | +DEF_HELPER_6(vmulhsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
54 | +DEF_HELPER_6(vmulhsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
55 | +DEF_HELPER_6(vmulhsu_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
56 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/riscv/insn32.decode | ||
59 | +++ b/target/riscv/insn32.decode | ||
60 | @@ -XXX,XX +XXX,XX @@ vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm | ||
61 | vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm | ||
62 | vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm | ||
63 | vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm | ||
64 | +vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm | ||
65 | +vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm | ||
66 | +vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm | ||
67 | +vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm | ||
68 | +vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm | ||
69 | +vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm | ||
70 | +vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm | ||
71 | +vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm | ||
72 | |||
73 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
74 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
75 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
78 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
79 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vminu_vx, opivx_check) | ||
80 | GEN_OPIVX_TRANS(vmin_vx, opivx_check) | ||
81 | GEN_OPIVX_TRANS(vmaxu_vx, opivx_check) | ||
82 | GEN_OPIVX_TRANS(vmax_vx, opivx_check) | ||
83 | + | ||
84 | +/* Vector Single-Width Integer Multiply Instructions */ | ||
85 | +GEN_OPIVV_GVEC_TRANS(vmul_vv, mul) | ||
86 | +GEN_OPIVV_TRANS(vmulh_vv, opivv_check) | ||
87 | +GEN_OPIVV_TRANS(vmulhu_vv, opivv_check) | ||
88 | +GEN_OPIVV_TRANS(vmulhsu_vv, opivv_check) | ||
89 | +GEN_OPIVX_GVEC_TRANS(vmul_vx, muls) | ||
90 | +GEN_OPIVX_TRANS(vmulh_vx, opivx_check) | ||
91 | +GEN_OPIVX_TRANS(vmulhu_vx, opivx_check) | ||
92 | +GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check) | ||
93 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/target/riscv/vector_helper.c | ||
96 | +++ b/target/riscv/vector_helper.c | ||
97 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) | ||
98 | #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t | ||
99 | #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t | ||
100 | #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t | ||
101 | +#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t | ||
102 | +#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t | ||
103 | +#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t | ||
104 | +#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t | ||
105 | |||
106 | /* operation of two vector elements */ | ||
107 | typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); | ||
108 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vmax_vx_b, 1, 1, clearb) | ||
109 | GEN_VEXT_VX(vmax_vx_h, 2, 2, clearh) | ||
110 | GEN_VEXT_VX(vmax_vx_w, 4, 4, clearl) | ||
111 | GEN_VEXT_VX(vmax_vx_d, 8, 8, clearq) | ||
112 | + | ||
113 | +/* Vector Single-Width Integer Multiply Instructions */ | ||
114 | +#define DO_MUL(N, M) (N * M) | ||
115 | +RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) | ||
116 | +RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) | ||
117 | +RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) | ||
118 | +RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) | ||
119 | +GEN_VEXT_VV(vmul_vv_b, 1, 1, clearb) | ||
120 | +GEN_VEXT_VV(vmul_vv_h, 2, 2, clearh) | ||
121 | +GEN_VEXT_VV(vmul_vv_w, 4, 4, clearl) | ||
122 | +GEN_VEXT_VV(vmul_vv_d, 8, 8, clearq) | ||
123 | + | ||
124 | +static int8_t do_mulh_b(int8_t s2, int8_t s1) | ||
125 | +{ | ||
126 | + return (int16_t)s2 * (int16_t)s1 >> 8; | ||
127 | +} | ||
128 | + | ||
129 | +static int16_t do_mulh_h(int16_t s2, int16_t s1) | ||
130 | +{ | ||
131 | + return (int32_t)s2 * (int32_t)s1 >> 16; | ||
132 | +} | ||
133 | + | ||
134 | +static int32_t do_mulh_w(int32_t s2, int32_t s1) | ||
135 | +{ | ||
136 | + return (int64_t)s2 * (int64_t)s1 >> 32; | ||
137 | +} | ||
138 | + | ||
139 | +static int64_t do_mulh_d(int64_t s2, int64_t s1) | ||
140 | +{ | ||
141 | + uint64_t hi_64, lo_64; | ||
142 | + | ||
143 | + muls64(&lo_64, &hi_64, s1, s2); | ||
144 | + return hi_64; | ||
145 | +} | ||
146 | + | ||
147 | +static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) | ||
148 | +{ | ||
149 | + return (uint16_t)s2 * (uint16_t)s1 >> 8; | ||
150 | +} | ||
151 | + | ||
152 | +static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) | ||
153 | +{ | ||
154 | + return (uint32_t)s2 * (uint32_t)s1 >> 16; | ||
155 | +} | ||
156 | + | ||
157 | +static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) | ||
158 | +{ | ||
159 | + return (uint64_t)s2 * (uint64_t)s1 >> 32; | ||
160 | +} | ||
161 | + | ||
162 | +static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) | ||
163 | +{ | ||
164 | + uint64_t hi_64, lo_64; | ||
165 | + | ||
166 | + mulu64(&lo_64, &hi_64, s2, s1); | ||
167 | + return hi_64; | ||
168 | +} | ||
169 | + | ||
170 | +static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) | ||
171 | +{ | ||
172 | + return (int16_t)s2 * (uint16_t)s1 >> 8; | ||
173 | +} | ||
174 | + | ||
175 | +static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) | ||
176 | +{ | ||
177 | + return (int32_t)s2 * (uint32_t)s1 >> 16; | ||
178 | +} | ||
179 | + | ||
180 | +static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) | ||
181 | +{ | ||
182 | + return (int64_t)s2 * (uint64_t)s1 >> 32; | ||
183 | +} | ||
184 | + | ||
185 | +/* | ||
186 | + * Let A = signed operand, | ||
187 | + * B = unsigned operand | ||
188 | + * P = mulu64(A, B), unsigned product | ||
189 | + * | ||
190 | + * LET X = 2 ** 64 - A, 2's complement of A | ||
191 | + * SP = signed product | ||
192 | + * THEN | ||
193 | + * IF A < 0 | ||
194 | + * SP = -X * B | ||
195 | + * = -(2 ** 64 - A) * B | ||
196 | + * = A * B - 2 ** 64 * B | ||
197 | + * = P - 2 ** 64 * B | ||
198 | + * ELSE | ||
199 | + * SP = P | ||
200 | + * THEN | ||
201 | + * HI_P -= (A < 0 ? B : 0) | ||
202 | + */ | ||
203 | + | ||
204 | +static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) | ||
205 | +{ | ||
206 | + uint64_t hi_64, lo_64; | ||
207 | + | ||
208 | + mulu64(&lo_64, &hi_64, s2, s1); | ||
209 | + | ||
210 | + hi_64 -= s2 < 0 ? s1 : 0; | ||
211 | + return hi_64; | ||
212 | +} | ||
213 | + | ||
214 | +RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) | ||
215 | +RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) | ||
216 | +RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) | ||
217 | +RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) | ||
218 | +RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) | ||
219 | +RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) | ||
220 | +RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) | ||
221 | +RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) | ||
222 | +RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) | ||
223 | +RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) | ||
224 | +RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) | ||
225 | +RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) | ||
226 | +GEN_VEXT_VV(vmulh_vv_b, 1, 1, clearb) | ||
227 | +GEN_VEXT_VV(vmulh_vv_h, 2, 2, clearh) | ||
228 | +GEN_VEXT_VV(vmulh_vv_w, 4, 4, clearl) | ||
229 | +GEN_VEXT_VV(vmulh_vv_d, 8, 8, clearq) | ||
230 | +GEN_VEXT_VV(vmulhu_vv_b, 1, 1, clearb) | ||
231 | +GEN_VEXT_VV(vmulhu_vv_h, 2, 2, clearh) | ||
232 | +GEN_VEXT_VV(vmulhu_vv_w, 4, 4, clearl) | ||
233 | +GEN_VEXT_VV(vmulhu_vv_d, 8, 8, clearq) | ||
234 | +GEN_VEXT_VV(vmulhsu_vv_b, 1, 1, clearb) | ||
235 | +GEN_VEXT_VV(vmulhsu_vv_h, 2, 2, clearh) | ||
236 | +GEN_VEXT_VV(vmulhsu_vv_w, 4, 4, clearl) | ||
237 | +GEN_VEXT_VV(vmulhsu_vv_d, 8, 8, clearq) | ||
238 | + | ||
239 | +RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) | ||
240 | +RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) | ||
241 | +RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) | ||
242 | +RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) | ||
243 | +RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) | ||
244 | +RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) | ||
245 | +RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) | ||
246 | +RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) | ||
247 | +RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) | ||
248 | +RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) | ||
249 | +RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) | ||
250 | +RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) | ||
251 | +RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) | ||
252 | +RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) | ||
253 | +RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) | ||
254 | +RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) | ||
255 | +GEN_VEXT_VX(vmul_vx_b, 1, 1, clearb) | ||
256 | +GEN_VEXT_VX(vmul_vx_h, 2, 2, clearh) | ||
257 | +GEN_VEXT_VX(vmul_vx_w, 4, 4, clearl) | ||
258 | +GEN_VEXT_VX(vmul_vx_d, 8, 8, clearq) | ||
259 | +GEN_VEXT_VX(vmulh_vx_b, 1, 1, clearb) | ||
260 | +GEN_VEXT_VX(vmulh_vx_h, 2, 2, clearh) | ||
261 | +GEN_VEXT_VX(vmulh_vx_w, 4, 4, clearl) | ||
262 | +GEN_VEXT_VX(vmulh_vx_d, 8, 8, clearq) | ||
263 | +GEN_VEXT_VX(vmulhu_vx_b, 1, 1, clearb) | ||
264 | +GEN_VEXT_VX(vmulhu_vx_h, 2, 2, clearh) | ||
265 | +GEN_VEXT_VX(vmulhu_vx_w, 4, 4, clearl) | ||
266 | +GEN_VEXT_VX(vmulhu_vx_d, 8, 8, clearq) | ||
267 | +GEN_VEXT_VX(vmulhsu_vx_b, 1, 1, clearb) | ||
268 | +GEN_VEXT_VX(vmulhsu_vx_h, 2, 2, clearh) | ||
269 | +GEN_VEXT_VX(vmulhsu_vx_w, 4, 4, clearl) | ||
270 | +GEN_VEXT_VX(vmulhsu_vx_d, 8, 8, clearq) | ||
271 | -- | ||
272 | 2.27.0 | ||
273 | |||
274 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-20-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 33 +++++++++++ | ||
10 | target/riscv/insn32.decode | 8 +++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 10 ++++ | ||
12 | target/riscv/vector_helper.c | 74 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 125 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmulhsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
20 | DEF_HELPER_6(vmulhsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vmulhsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vmulhsu_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vdivu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vdivu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vdivu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vdivu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vdiv_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vdiv_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vdiv_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vdiv_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vremu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vremu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vremu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vremu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vrem_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vrem_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vrem_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vrem_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vdivu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vdivu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vdivu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vdivu_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vdiv_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vdiv_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vdiv_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vdiv_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
48 | +DEF_HELPER_6(vremu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
49 | +DEF_HELPER_6(vremu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
50 | +DEF_HELPER_6(vremu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
51 | +DEF_HELPER_6(vremu_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
52 | +DEF_HELPER_6(vrem_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
53 | +DEF_HELPER_6(vrem_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
54 | +DEF_HELPER_6(vrem_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
55 | +DEF_HELPER_6(vrem_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
56 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/riscv/insn32.decode | ||
59 | +++ b/target/riscv/insn32.decode | ||
60 | @@ -XXX,XX +XXX,XX @@ vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm | ||
61 | vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm | ||
62 | vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm | ||
63 | vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm | ||
64 | +vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm | ||
65 | +vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm | ||
66 | +vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm | ||
67 | +vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm | ||
68 | +vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm | ||
69 | +vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm | ||
70 | +vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm | ||
71 | +vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm | ||
72 | |||
73 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
74 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
75 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
78 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
79 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_GVEC_TRANS(vmul_vx, muls) | ||
80 | GEN_OPIVX_TRANS(vmulh_vx, opivx_check) | ||
81 | GEN_OPIVX_TRANS(vmulhu_vx, opivx_check) | ||
82 | GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check) | ||
83 | + | ||
84 | +/* Vector Integer Divide Instructions */ | ||
85 | +GEN_OPIVV_TRANS(vdivu_vv, opivv_check) | ||
86 | +GEN_OPIVV_TRANS(vdiv_vv, opivv_check) | ||
87 | +GEN_OPIVV_TRANS(vremu_vv, opivv_check) | ||
88 | +GEN_OPIVV_TRANS(vrem_vv, opivv_check) | ||
89 | +GEN_OPIVX_TRANS(vdivu_vx, opivx_check) | ||
90 | +GEN_OPIVX_TRANS(vdiv_vx, opivx_check) | ||
91 | +GEN_OPIVX_TRANS(vremu_vx, opivx_check) | ||
92 | +GEN_OPIVX_TRANS(vrem_vx, opivx_check) | ||
93 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/target/riscv/vector_helper.c | ||
96 | +++ b/target/riscv/vector_helper.c | ||
97 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vmulhsu_vx_b, 1, 1, clearb) | ||
98 | GEN_VEXT_VX(vmulhsu_vx_h, 2, 2, clearh) | ||
99 | GEN_VEXT_VX(vmulhsu_vx_w, 4, 4, clearl) | ||
100 | GEN_VEXT_VX(vmulhsu_vx_d, 8, 8, clearq) | ||
101 | + | ||
102 | +/* Vector Integer Divide Instructions */ | ||
103 | +#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) | ||
104 | +#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) | ||
105 | +#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ | ||
106 | + unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) | ||
107 | +#define DO_REM(N, M) (unlikely(M == 0) ? N :\ | ||
108 | + unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) | ||
109 | + | ||
110 | +RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) | ||
111 | +RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) | ||
112 | +RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) | ||
113 | +RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) | ||
114 | +RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) | ||
115 | +RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) | ||
116 | +RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) | ||
117 | +RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) | ||
118 | +RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) | ||
119 | +RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) | ||
120 | +RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) | ||
121 | +RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) | ||
122 | +RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) | ||
123 | +RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) | ||
124 | +RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) | ||
125 | +RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) | ||
126 | +GEN_VEXT_VV(vdivu_vv_b, 1, 1, clearb) | ||
127 | +GEN_VEXT_VV(vdivu_vv_h, 2, 2, clearh) | ||
128 | +GEN_VEXT_VV(vdivu_vv_w, 4, 4, clearl) | ||
129 | +GEN_VEXT_VV(vdivu_vv_d, 8, 8, clearq) | ||
130 | +GEN_VEXT_VV(vdiv_vv_b, 1, 1, clearb) | ||
131 | +GEN_VEXT_VV(vdiv_vv_h, 2, 2, clearh) | ||
132 | +GEN_VEXT_VV(vdiv_vv_w, 4, 4, clearl) | ||
133 | +GEN_VEXT_VV(vdiv_vv_d, 8, 8, clearq) | ||
134 | +GEN_VEXT_VV(vremu_vv_b, 1, 1, clearb) | ||
135 | +GEN_VEXT_VV(vremu_vv_h, 2, 2, clearh) | ||
136 | +GEN_VEXT_VV(vremu_vv_w, 4, 4, clearl) | ||
137 | +GEN_VEXT_VV(vremu_vv_d, 8, 8, clearq) | ||
138 | +GEN_VEXT_VV(vrem_vv_b, 1, 1, clearb) | ||
139 | +GEN_VEXT_VV(vrem_vv_h, 2, 2, clearh) | ||
140 | +GEN_VEXT_VV(vrem_vv_w, 4, 4, clearl) | ||
141 | +GEN_VEXT_VV(vrem_vv_d, 8, 8, clearq) | ||
142 | + | ||
143 | +RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) | ||
144 | +RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) | ||
145 | +RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) | ||
146 | +RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) | ||
147 | +RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) | ||
148 | +RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) | ||
149 | +RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) | ||
150 | +RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) | ||
151 | +RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) | ||
152 | +RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) | ||
153 | +RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) | ||
154 | +RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) | ||
155 | +RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) | ||
156 | +RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) | ||
157 | +RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) | ||
158 | +RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) | ||
159 | +GEN_VEXT_VX(vdivu_vx_b, 1, 1, clearb) | ||
160 | +GEN_VEXT_VX(vdivu_vx_h, 2, 2, clearh) | ||
161 | +GEN_VEXT_VX(vdivu_vx_w, 4, 4, clearl) | ||
162 | +GEN_VEXT_VX(vdivu_vx_d, 8, 8, clearq) | ||
163 | +GEN_VEXT_VX(vdiv_vx_b, 1, 1, clearb) | ||
164 | +GEN_VEXT_VX(vdiv_vx_h, 2, 2, clearh) | ||
165 | +GEN_VEXT_VX(vdiv_vx_w, 4, 4, clearl) | ||
166 | +GEN_VEXT_VX(vdiv_vx_d, 8, 8, clearq) | ||
167 | +GEN_VEXT_VX(vremu_vx_b, 1, 1, clearb) | ||
168 | +GEN_VEXT_VX(vremu_vx_h, 2, 2, clearh) | ||
169 | +GEN_VEXT_VX(vremu_vx_w, 4, 4, clearl) | ||
170 | +GEN_VEXT_VX(vremu_vx_d, 8, 8, clearq) | ||
171 | +GEN_VEXT_VX(vrem_vx_b, 1, 1, clearb) | ||
172 | +GEN_VEXT_VX(vrem_vx_h, 2, 2, clearh) | ||
173 | +GEN_VEXT_VX(vrem_vx_w, 4, 4, clearl) | ||
174 | +GEN_VEXT_VX(vrem_vx_d, 8, 8, clearq) | ||
175 | -- | ||
176 | 2.27.0 | ||
177 | |||
178 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-21-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 19 +++++++++ | ||
10 | target/riscv/insn32.decode | 6 +++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 8 ++++ | ||
12 | target/riscv/vector_helper.c | 51 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 84 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vrem_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
20 | DEF_HELPER_6(vrem_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vrem_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vrem_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vwmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vwmulu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vwmulu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vwmulu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vwmulsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vwmulsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vwmulsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vwmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vwmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vwmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vwmulu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vwmulu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vwmulu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vwmulsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vwmulsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vwmulsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/riscv/insn32.decode | ||
45 | +++ b/target/riscv/insn32.decode | ||
46 | @@ -XXX,XX +XXX,XX @@ vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm | ||
47 | vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm | ||
48 | vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm | ||
49 | vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm | ||
50 | +vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm | ||
51 | +vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm | ||
52 | +vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm | ||
53 | +vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm | ||
54 | +vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm | ||
55 | +vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm | ||
56 | |||
57 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
58 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
59 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
62 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
63 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vdivu_vx, opivx_check) | ||
64 | GEN_OPIVX_TRANS(vdiv_vx, opivx_check) | ||
65 | GEN_OPIVX_TRANS(vremu_vx, opivx_check) | ||
66 | GEN_OPIVX_TRANS(vrem_vx, opivx_check) | ||
67 | + | ||
68 | +/* Vector Widening Integer Multiply Instructions */ | ||
69 | +GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check) | ||
70 | +GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check) | ||
71 | +GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check) | ||
72 | +GEN_OPIVX_WIDEN_TRANS(vwmul_vx) | ||
73 | +GEN_OPIVX_WIDEN_TRANS(vwmulu_vx) | ||
74 | +GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx) | ||
75 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/target/riscv/vector_helper.c | ||
78 | +++ b/target/riscv/vector_helper.c | ||
79 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) | ||
80 | #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t | ||
81 | #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t | ||
82 | #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t | ||
83 | +#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t | ||
84 | +#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t | ||
85 | +#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t | ||
86 | +#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t | ||
87 | +#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t | ||
88 | +#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t | ||
89 | +#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t | ||
90 | +#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t | ||
91 | +#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t | ||
92 | +#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t | ||
93 | +#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t | ||
94 | +#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t | ||
95 | |||
96 | /* operation of two vector elements */ | ||
97 | typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); | ||
98 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vrem_vx_b, 1, 1, clearb) | ||
99 | GEN_VEXT_VX(vrem_vx_h, 2, 2, clearh) | ||
100 | GEN_VEXT_VX(vrem_vx_w, 4, 4, clearl) | ||
101 | GEN_VEXT_VX(vrem_vx_d, 8, 8, clearq) | ||
102 | + | ||
103 | +/* Vector Widening Integer Multiply Instructions */ | ||
104 | +RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) | ||
105 | +RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) | ||
106 | +RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) | ||
107 | +RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) | ||
108 | +RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) | ||
109 | +RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) | ||
110 | +RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) | ||
111 | +RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) | ||
112 | +RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) | ||
113 | +GEN_VEXT_VV(vwmul_vv_b, 1, 2, clearh) | ||
114 | +GEN_VEXT_VV(vwmul_vv_h, 2, 4, clearl) | ||
115 | +GEN_VEXT_VV(vwmul_vv_w, 4, 8, clearq) | ||
116 | +GEN_VEXT_VV(vwmulu_vv_b, 1, 2, clearh) | ||
117 | +GEN_VEXT_VV(vwmulu_vv_h, 2, 4, clearl) | ||
118 | +GEN_VEXT_VV(vwmulu_vv_w, 4, 8, clearq) | ||
119 | +GEN_VEXT_VV(vwmulsu_vv_b, 1, 2, clearh) | ||
120 | +GEN_VEXT_VV(vwmulsu_vv_h, 2, 4, clearl) | ||
121 | +GEN_VEXT_VV(vwmulsu_vv_w, 4, 8, clearq) | ||
122 | + | ||
123 | +RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) | ||
124 | +RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) | ||
125 | +RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) | ||
126 | +RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) | ||
127 | +RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) | ||
128 | +RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) | ||
129 | +RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) | ||
130 | +RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) | ||
131 | +RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) | ||
132 | +GEN_VEXT_VX(vwmul_vx_b, 1, 2, clearh) | ||
133 | +GEN_VEXT_VX(vwmul_vx_h, 2, 4, clearl) | ||
134 | +GEN_VEXT_VX(vwmul_vx_w, 4, 8, clearq) | ||
135 | +GEN_VEXT_VX(vwmulu_vx_b, 1, 2, clearh) | ||
136 | +GEN_VEXT_VX(vwmulu_vx_h, 2, 4, clearl) | ||
137 | +GEN_VEXT_VX(vwmulu_vx_w, 4, 8, clearq) | ||
138 | +GEN_VEXT_VX(vwmulsu_vx_b, 1, 2, clearh) | ||
139 | +GEN_VEXT_VX(vwmulsu_vx_h, 2, 4, clearl) | ||
140 | +GEN_VEXT_VX(vwmulsu_vx_w, 4, 8, clearq) | ||
141 | -- | ||
142 | 2.27.0 | ||
143 | |||
144 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-22-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 33 ++++++++++ | ||
10 | target/riscv/insn32.decode | 8 +++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 10 +++ | ||
12 | target/riscv/vector_helper.c | 88 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 139 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwmulu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
20 | DEF_HELPER_6(vwmulsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vwmulsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vwmulsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vnmsac_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vnmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vmadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vnmsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vnmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vnmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vnmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vmacc_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vnmsac_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vnmsac_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vnmsac_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vnmsac_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
48 | +DEF_HELPER_6(vmadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
49 | +DEF_HELPER_6(vmadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
50 | +DEF_HELPER_6(vmadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
51 | +DEF_HELPER_6(vmadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
52 | +DEF_HELPER_6(vnmsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
53 | +DEF_HELPER_6(vnmsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
54 | +DEF_HELPER_6(vnmsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
55 | +DEF_HELPER_6(vnmsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
56 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/target/riscv/insn32.decode | ||
59 | +++ b/target/riscv/insn32.decode | ||
60 | @@ -XXX,XX +XXX,XX @@ vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm | ||
61 | vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm | ||
62 | vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm | ||
63 | vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm | ||
64 | +vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm | ||
65 | +vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm | ||
66 | +vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm | ||
67 | +vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm | ||
68 | +vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm | ||
69 | +vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm | ||
70 | +vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm | ||
71 | +vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm | ||
72 | |||
73 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
74 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
75 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
78 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
79 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check) | ||
80 | GEN_OPIVX_WIDEN_TRANS(vwmul_vx) | ||
81 | GEN_OPIVX_WIDEN_TRANS(vwmulu_vx) | ||
82 | GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx) | ||
83 | + | ||
84 | +/* Vector Single-Width Integer Multiply-Add Instructions */ | ||
85 | +GEN_OPIVV_TRANS(vmacc_vv, opivv_check) | ||
86 | +GEN_OPIVV_TRANS(vnmsac_vv, opivv_check) | ||
87 | +GEN_OPIVV_TRANS(vmadd_vv, opivv_check) | ||
88 | +GEN_OPIVV_TRANS(vnmsub_vv, opivv_check) | ||
89 | +GEN_OPIVX_TRANS(vmacc_vx, opivx_check) | ||
90 | +GEN_OPIVX_TRANS(vnmsac_vx, opivx_check) | ||
91 | +GEN_OPIVX_TRANS(vmadd_vx, opivx_check) | ||
92 | +GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) | ||
93 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/target/riscv/vector_helper.c | ||
96 | +++ b/target/riscv/vector_helper.c | ||
97 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vwmulu_vx_w, 4, 8, clearq) | ||
98 | GEN_VEXT_VX(vwmulsu_vx_b, 1, 2, clearh) | ||
99 | GEN_VEXT_VX(vwmulsu_vx_h, 2, 4, clearl) | ||
100 | GEN_VEXT_VX(vwmulsu_vx_w, 4, 8, clearq) | ||
101 | + | ||
102 | +/* Vector Single-Width Integer Multiply-Add Instructions */ | ||
103 | +#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
104 | +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ | ||
105 | +{ \ | ||
106 | + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ | ||
107 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
108 | + TD d = *((TD *)vd + HD(i)); \ | ||
109 | + *((TD *)vd + HD(i)) = OP(s2, s1, d); \ | ||
110 | +} | ||
111 | + | ||
112 | +#define DO_MACC(N, M, D) (M * N + D) | ||
113 | +#define DO_NMSAC(N, M, D) (-(M * N) + D) | ||
114 | +#define DO_MADD(N, M, D) (M * D + N) | ||
115 | +#define DO_NMSUB(N, M, D) (-(M * D) + N) | ||
116 | +RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) | ||
117 | +RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) | ||
118 | +RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) | ||
119 | +RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) | ||
120 | +RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) | ||
121 | +RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) | ||
122 | +RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) | ||
123 | +RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) | ||
124 | +RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) | ||
125 | +RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) | ||
126 | +RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) | ||
127 | +RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) | ||
128 | +RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) | ||
129 | +RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) | ||
130 | +RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) | ||
131 | +RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) | ||
132 | +GEN_VEXT_VV(vmacc_vv_b, 1, 1, clearb) | ||
133 | +GEN_VEXT_VV(vmacc_vv_h, 2, 2, clearh) | ||
134 | +GEN_VEXT_VV(vmacc_vv_w, 4, 4, clearl) | ||
135 | +GEN_VEXT_VV(vmacc_vv_d, 8, 8, clearq) | ||
136 | +GEN_VEXT_VV(vnmsac_vv_b, 1, 1, clearb) | ||
137 | +GEN_VEXT_VV(vnmsac_vv_h, 2, 2, clearh) | ||
138 | +GEN_VEXT_VV(vnmsac_vv_w, 4, 4, clearl) | ||
139 | +GEN_VEXT_VV(vnmsac_vv_d, 8, 8, clearq) | ||
140 | +GEN_VEXT_VV(vmadd_vv_b, 1, 1, clearb) | ||
141 | +GEN_VEXT_VV(vmadd_vv_h, 2, 2, clearh) | ||
142 | +GEN_VEXT_VV(vmadd_vv_w, 4, 4, clearl) | ||
143 | +GEN_VEXT_VV(vmadd_vv_d, 8, 8, clearq) | ||
144 | +GEN_VEXT_VV(vnmsub_vv_b, 1, 1, clearb) | ||
145 | +GEN_VEXT_VV(vnmsub_vv_h, 2, 2, clearh) | ||
146 | +GEN_VEXT_VV(vnmsub_vv_w, 4, 4, clearl) | ||
147 | +GEN_VEXT_VV(vnmsub_vv_d, 8, 8, clearq) | ||
148 | + | ||
149 | +#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
150 | +static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ | ||
151 | +{ \ | ||
152 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
153 | + TD d = *((TD *)vd + HD(i)); \ | ||
154 | + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ | ||
155 | +} | ||
156 | + | ||
157 | +RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) | ||
158 | +RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) | ||
159 | +RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) | ||
160 | +RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) | ||
161 | +RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) | ||
162 | +RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) | ||
163 | +RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) | ||
164 | +RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) | ||
165 | +RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) | ||
166 | +RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) | ||
167 | +RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) | ||
168 | +RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) | ||
169 | +RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) | ||
170 | +RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) | ||
171 | +RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) | ||
172 | +RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) | ||
173 | +GEN_VEXT_VX(vmacc_vx_b, 1, 1, clearb) | ||
174 | +GEN_VEXT_VX(vmacc_vx_h, 2, 2, clearh) | ||
175 | +GEN_VEXT_VX(vmacc_vx_w, 4, 4, clearl) | ||
176 | +GEN_VEXT_VX(vmacc_vx_d, 8, 8, clearq) | ||
177 | +GEN_VEXT_VX(vnmsac_vx_b, 1, 1, clearb) | ||
178 | +GEN_VEXT_VX(vnmsac_vx_h, 2, 2, clearh) | ||
179 | +GEN_VEXT_VX(vnmsac_vx_w, 4, 4, clearl) | ||
180 | +GEN_VEXT_VX(vnmsac_vx_d, 8, 8, clearq) | ||
181 | +GEN_VEXT_VX(vmadd_vx_b, 1, 1, clearb) | ||
182 | +GEN_VEXT_VX(vmadd_vx_h, 2, 2, clearh) | ||
183 | +GEN_VEXT_VX(vmadd_vx_w, 4, 4, clearl) | ||
184 | +GEN_VEXT_VX(vmadd_vx_d, 8, 8, clearq) | ||
185 | +GEN_VEXT_VX(vnmsub_vx_b, 1, 1, clearb) | ||
186 | +GEN_VEXT_VX(vnmsub_vx_h, 2, 2, clearh) | ||
187 | +GEN_VEXT_VX(vnmsub_vx_w, 4, 4, clearl) | ||
188 | +GEN_VEXT_VX(vnmsub_vx_d, 8, 8, clearq) | ||
189 | -- | ||
190 | 2.27.0 | ||
191 | |||
192 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-23-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 22 ++++++++++++ | ||
10 | target/riscv/insn32.decode | 7 ++++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 9 +++++ | ||
12 | target/riscv/vector_helper.c | 45 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 83 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vnmsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
20 | DEF_HELPER_6(vnmsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vnmsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vnmsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vwmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vwmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vwmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vwmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vwmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vwmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vwmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vwmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vwmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vwmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vwmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vwmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vwmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vwmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vwmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vwmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vwmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vwmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vwmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
45 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/target/riscv/insn32.decode | ||
48 | +++ b/target/riscv/insn32.decode | ||
49 | @@ -XXX,XX +XXX,XX @@ vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm | ||
50 | vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm | ||
51 | vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm | ||
52 | vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm | ||
53 | +vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm | ||
54 | +vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm | ||
55 | +vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm | ||
56 | +vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm | ||
57 | +vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm | ||
58 | +vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm | ||
59 | +vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm | ||
60 | |||
61 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
62 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
63 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
66 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
67 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vmacc_vx, opivx_check) | ||
68 | GEN_OPIVX_TRANS(vnmsac_vx, opivx_check) | ||
69 | GEN_OPIVX_TRANS(vmadd_vx, opivx_check) | ||
70 | GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) | ||
71 | + | ||
72 | +/* Vector Widening Integer Multiply-Add Instructions */ | ||
73 | +GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check) | ||
74 | +GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check) | ||
75 | +GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check) | ||
76 | +GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx) | ||
77 | +GEN_OPIVX_WIDEN_TRANS(vwmacc_vx) | ||
78 | +GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx) | ||
79 | +GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx) | ||
80 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
81 | index XXXXXXX..XXXXXXX 100644 | ||
82 | --- a/target/riscv/vector_helper.c | ||
83 | +++ b/target/riscv/vector_helper.c | ||
84 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vnmsub_vx_b, 1, 1, clearb) | ||
85 | GEN_VEXT_VX(vnmsub_vx_h, 2, 2, clearh) | ||
86 | GEN_VEXT_VX(vnmsub_vx_w, 4, 4, clearl) | ||
87 | GEN_VEXT_VX(vnmsub_vx_d, 8, 8, clearq) | ||
88 | + | ||
89 | +/* Vector Widening Integer Multiply-Add Instructions */ | ||
90 | +RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) | ||
91 | +RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) | ||
92 | +RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) | ||
93 | +RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) | ||
94 | +RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) | ||
95 | +RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) | ||
96 | +RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) | ||
97 | +RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) | ||
98 | +RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) | ||
99 | +GEN_VEXT_VV(vwmaccu_vv_b, 1, 2, clearh) | ||
100 | +GEN_VEXT_VV(vwmaccu_vv_h, 2, 4, clearl) | ||
101 | +GEN_VEXT_VV(vwmaccu_vv_w, 4, 8, clearq) | ||
102 | +GEN_VEXT_VV(vwmacc_vv_b, 1, 2, clearh) | ||
103 | +GEN_VEXT_VV(vwmacc_vv_h, 2, 4, clearl) | ||
104 | +GEN_VEXT_VV(vwmacc_vv_w, 4, 8, clearq) | ||
105 | +GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2, clearh) | ||
106 | +GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4, clearl) | ||
107 | +GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8, clearq) | ||
108 | + | ||
109 | +RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) | ||
110 | +RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) | ||
111 | +RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) | ||
112 | +RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) | ||
113 | +RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) | ||
114 | +RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) | ||
115 | +RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) | ||
116 | +RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) | ||
117 | +RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) | ||
118 | +RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) | ||
119 | +RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) | ||
120 | +RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) | ||
121 | +GEN_VEXT_VX(vwmaccu_vx_b, 1, 2, clearh) | ||
122 | +GEN_VEXT_VX(vwmaccu_vx_h, 2, 4, clearl) | ||
123 | +GEN_VEXT_VX(vwmaccu_vx_w, 4, 8, clearq) | ||
124 | +GEN_VEXT_VX(vwmacc_vx_b, 1, 2, clearh) | ||
125 | +GEN_VEXT_VX(vwmacc_vx_h, 2, 4, clearl) | ||
126 | +GEN_VEXT_VX(vwmacc_vx_w, 4, 8, clearq) | ||
127 | +GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2, clearh) | ||
128 | +GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4, clearl) | ||
129 | +GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq) | ||
130 | +GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh) | ||
131 | +GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl) | ||
132 | +GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq) | ||
133 | -- | ||
134 | 2.27.0 | ||
135 | |||
136 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-28-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 22 +++ | ||
10 | target/riscv/insn32.decode | 7 + | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 9 ++ | ||
12 | target/riscv/vector_helper.c | 205 ++++++++++++++++++++++++ | ||
13 | 4 files changed, 243 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vsmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
20 | DEF_HELPER_6(vsmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vsmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vsmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vwsmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vwsmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vwsmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vwsmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vwsmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vwsmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vwsmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vwsmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vwsmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vwsmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vwsmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vwsmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vwsmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vwsmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vwsmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vwsmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vwsmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vwsmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vwsmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vwsmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vwsmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
45 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/target/riscv/insn32.decode | ||
48 | +++ b/target/riscv/insn32.decode | ||
49 | @@ -XXX,XX +XXX,XX @@ vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm | ||
50 | vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm | ||
51 | vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm | ||
52 | vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm | ||
53 | +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm | ||
54 | +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm | ||
55 | +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm | ||
56 | +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm | ||
57 | +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm | ||
58 | +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm | ||
59 | +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm | ||
60 | |||
61 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
62 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
63 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
66 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
67 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check) | ||
68 | /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ | ||
69 | GEN_OPIVV_TRANS(vsmul_vv, opivv_check) | ||
70 | GEN_OPIVX_TRANS(vsmul_vx, opivx_check) | ||
71 | + | ||
72 | +/* Vector Widening Saturating Scaled Multiply-Add */ | ||
73 | +GEN_OPIVV_WIDEN_TRANS(vwsmaccu_vv, opivv_widen_check) | ||
74 | +GEN_OPIVV_WIDEN_TRANS(vwsmacc_vv, opivv_widen_check) | ||
75 | +GEN_OPIVV_WIDEN_TRANS(vwsmaccsu_vv, opivv_widen_check) | ||
76 | +GEN_OPIVX_WIDEN_TRANS(vwsmaccu_vx) | ||
77 | +GEN_OPIVX_WIDEN_TRANS(vwsmacc_vx) | ||
78 | +GEN_OPIVX_WIDEN_TRANS(vwsmaccsu_vx) | ||
79 | +GEN_OPIVX_WIDEN_TRANS(vwsmaccus_vx) | ||
80 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
81 | index XXXXXXX..XXXXXXX 100644 | ||
82 | --- a/target/riscv/vector_helper.c | ||
83 | +++ b/target/riscv/vector_helper.c | ||
84 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb) | ||
85 | GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh) | ||
86 | GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl) | ||
87 | GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq) | ||
88 | + | ||
89 | +/* Vector Widening Saturating Scaled Multiply-Add */ | ||
90 | +static inline uint16_t | ||
91 | +vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, | ||
92 | + uint16_t c) | ||
93 | +{ | ||
94 | + uint8_t round; | ||
95 | + uint16_t res = (uint16_t)a * b; | ||
96 | + | ||
97 | + round = get_round(vxrm, res, 4); | ||
98 | + res = (res >> 4) + round; | ||
99 | + return saddu16(env, vxrm, c, res); | ||
100 | +} | ||
101 | + | ||
102 | +static inline uint32_t | ||
103 | +vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, | ||
104 | + uint32_t c) | ||
105 | +{ | ||
106 | + uint8_t round; | ||
107 | + uint32_t res = (uint32_t)a * b; | ||
108 | + | ||
109 | + round = get_round(vxrm, res, 8); | ||
110 | + res = (res >> 8) + round; | ||
111 | + return saddu32(env, vxrm, c, res); | ||
112 | +} | ||
113 | + | ||
114 | +static inline uint64_t | ||
115 | +vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, | ||
116 | + uint64_t c) | ||
117 | +{ | ||
118 | + uint8_t round; | ||
119 | + uint64_t res = (uint64_t)a * b; | ||
120 | + | ||
121 | + round = get_round(vxrm, res, 16); | ||
122 | + res = (res >> 16) + round; | ||
123 | + return saddu64(env, vxrm, c, res); | ||
124 | +} | ||
125 | + | ||
126 | +#define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
127 | +static inline void \ | ||
128 | +do_##NAME(void *vd, void *vs1, void *vs2, int i, \ | ||
129 | + CPURISCVState *env, int vxrm) \ | ||
130 | +{ \ | ||
131 | + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ | ||
132 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
133 | + TD d = *((TD *)vd + HD(i)); \ | ||
134 | + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ | ||
135 | +} | ||
136 | + | ||
137 | +RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) | ||
138 | +RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) | ||
139 | +RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) | ||
140 | +GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2, clearh) | ||
141 | +GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4, clearl) | ||
142 | +GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8, clearq) | ||
143 | + | ||
144 | +#define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
145 | +static inline void \ | ||
146 | +do_##NAME(void *vd, target_long s1, void *vs2, int i, \ | ||
147 | + CPURISCVState *env, int vxrm) \ | ||
148 | +{ \ | ||
149 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
150 | + TD d = *((TD *)vd + HD(i)); \ | ||
151 | + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ | ||
152 | +} | ||
153 | + | ||
154 | +RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) | ||
155 | +RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) | ||
156 | +RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) | ||
157 | +GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2, clearh) | ||
158 | +GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4, clearl) | ||
159 | +GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8, clearq) | ||
160 | + | ||
161 | +static inline int16_t | ||
162 | +vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) | ||
163 | +{ | ||
164 | + uint8_t round; | ||
165 | + int16_t res = (int16_t)a * b; | ||
166 | + | ||
167 | + round = get_round(vxrm, res, 4); | ||
168 | + res = (res >> 4) + round; | ||
169 | + return sadd16(env, vxrm, c, res); | ||
170 | +} | ||
171 | + | ||
172 | +static inline int32_t | ||
173 | +vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) | ||
174 | +{ | ||
175 | + uint8_t round; | ||
176 | + int32_t res = (int32_t)a * b; | ||
177 | + | ||
178 | + round = get_round(vxrm, res, 8); | ||
179 | + res = (res >> 8) + round; | ||
180 | + return sadd32(env, vxrm, c, res); | ||
181 | + | ||
182 | +} | ||
183 | + | ||
184 | +static inline int64_t | ||
185 | +vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) | ||
186 | +{ | ||
187 | + uint8_t round; | ||
188 | + int64_t res = (int64_t)a * b; | ||
189 | + | ||
190 | + round = get_round(vxrm, res, 16); | ||
191 | + res = (res >> 16) + round; | ||
192 | + return sadd64(env, vxrm, c, res); | ||
193 | +} | ||
194 | + | ||
195 | +RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) | ||
196 | +RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) | ||
197 | +RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) | ||
198 | +GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2, clearh) | ||
199 | +GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4, clearl) | ||
200 | +GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8, clearq) | ||
201 | +RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) | ||
202 | +RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) | ||
203 | +RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) | ||
204 | +GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2, clearh) | ||
205 | +GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4, clearl) | ||
206 | +GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8, clearq) | ||
207 | + | ||
208 | +static inline int16_t | ||
209 | +vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) | ||
210 | +{ | ||
211 | + uint8_t round; | ||
212 | + int16_t res = a * (int16_t)b; | ||
213 | + | ||
214 | + round = get_round(vxrm, res, 4); | ||
215 | + res = (res >> 4) + round; | ||
216 | + return ssub16(env, vxrm, c, res); | ||
217 | +} | ||
218 | + | ||
219 | +static inline int32_t | ||
220 | +vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) | ||
221 | +{ | ||
222 | + uint8_t round; | ||
223 | + int32_t res = a * (int32_t)b; | ||
224 | + | ||
225 | + round = get_round(vxrm, res, 8); | ||
226 | + res = (res >> 8) + round; | ||
227 | + return ssub32(env, vxrm, c, res); | ||
228 | +} | ||
229 | + | ||
230 | +static inline int64_t | ||
231 | +vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) | ||
232 | +{ | ||
233 | + uint8_t round; | ||
234 | + int64_t res = a * (int64_t)b; | ||
235 | + | ||
236 | + round = get_round(vxrm, res, 16); | ||
237 | + res = (res >> 16) + round; | ||
238 | + return ssub64(env, vxrm, c, res); | ||
239 | +} | ||
240 | + | ||
241 | +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) | ||
242 | +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) | ||
243 | +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) | ||
244 | +GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2, clearh) | ||
245 | +GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4, clearl) | ||
246 | +GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8, clearq) | ||
247 | +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) | ||
248 | +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) | ||
249 | +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) | ||
250 | +GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2, clearh) | ||
251 | +GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4, clearl) | ||
252 | +GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8, clearq) | ||
253 | + | ||
254 | +static inline int16_t | ||
255 | +vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) | ||
256 | +{ | ||
257 | + uint8_t round; | ||
258 | + int16_t res = (int16_t)a * b; | ||
259 | + | ||
260 | + round = get_round(vxrm, res, 4); | ||
261 | + res = (res >> 4) + round; | ||
262 | + return ssub16(env, vxrm, c, res); | ||
263 | +} | ||
264 | + | ||
265 | +static inline int32_t | ||
266 | +vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) | ||
267 | +{ | ||
268 | + uint8_t round; | ||
269 | + int32_t res = (int32_t)a * b; | ||
270 | + | ||
271 | + round = get_round(vxrm, res, 8); | ||
272 | + res = (res >> 8) + round; | ||
273 | + return ssub32(env, vxrm, c, res); | ||
274 | +} | ||
275 | + | ||
276 | +static inline int64_t | ||
277 | +vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) | ||
278 | +{ | ||
279 | + uint8_t round; | ||
280 | + int64_t res = (int64_t)a * b; | ||
281 | + | ||
282 | + round = get_round(vxrm, res, 16); | ||
283 | + res = (res >> 16) + round; | ||
284 | + return ssub64(env, vxrm, c, res); | ||
285 | +} | ||
286 | + | ||
287 | +RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) | ||
288 | +RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) | ||
289 | +RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) | ||
290 | +GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh) | ||
291 | +GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl) | ||
292 | +GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq) | ||
293 | -- | ||
294 | 2.27.0 | ||
295 | |||
296 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-29-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 17 ++++ | ||
10 | target/riscv/insn32.decode | 6 ++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 8 ++ | ||
12 | target/riscv/vector_helper.c | 117 ++++++++++++++++++++++++ | ||
13 | 4 files changed, 148 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwsmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
20 | DEF_HELPER_6(vwsmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vwsmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vwsmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vssrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vssrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vssrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vssrl_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vssra_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vssra_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vssra_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vssra_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vssrl_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vssrl_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vssrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vssrl_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vssra_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vssra_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vssra_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vssra_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
40 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/target/riscv/insn32.decode | ||
43 | +++ b/target/riscv/insn32.decode | ||
44 | @@ -XXX,XX +XXX,XX @@ vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm | ||
45 | vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm | ||
46 | vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm | ||
47 | vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm | ||
48 | +vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm | ||
49 | +vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm | ||
50 | +vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm | ||
51 | +vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm | ||
52 | +vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm | ||
53 | +vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm | ||
54 | |||
55 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
56 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
57 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
60 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
61 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_WIDEN_TRANS(vwsmaccu_vx) | ||
62 | GEN_OPIVX_WIDEN_TRANS(vwsmacc_vx) | ||
63 | GEN_OPIVX_WIDEN_TRANS(vwsmaccsu_vx) | ||
64 | GEN_OPIVX_WIDEN_TRANS(vwsmaccus_vx) | ||
65 | + | ||
66 | +/* Vector Single-Width Scaling Shift Instructions */ | ||
67 | +GEN_OPIVV_TRANS(vssrl_vv, opivv_check) | ||
68 | +GEN_OPIVV_TRANS(vssra_vv, opivv_check) | ||
69 | +GEN_OPIVX_TRANS(vssrl_vx, opivx_check) | ||
70 | +GEN_OPIVX_TRANS(vssra_vx, opivx_check) | ||
71 | +GEN_OPIVI_TRANS(vssrl_vi, 1, vssrl_vx, opivx_check) | ||
72 | +GEN_OPIVI_TRANS(vssra_vi, 0, vssra_vx, opivx_check) | ||
73 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
74 | index XXXXXXX..XXXXXXX 100644 | ||
75 | --- a/target/riscv/vector_helper.c | ||
76 | +++ b/target/riscv/vector_helper.c | ||
77 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) | ||
78 | GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh) | ||
79 | GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl) | ||
80 | GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq) | ||
81 | + | ||
82 | +/* Vector Single-Width Scaling Shift Instructions */ | ||
83 | +static inline uint8_t | ||
84 | +vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) | ||
85 | +{ | ||
86 | + uint8_t round, shift = b & 0x7; | ||
87 | + uint8_t res; | ||
88 | + | ||
89 | + round = get_round(vxrm, a, shift); | ||
90 | + res = (a >> shift) + round; | ||
91 | + return res; | ||
92 | +} | ||
93 | +static inline uint16_t | ||
94 | +vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) | ||
95 | +{ | ||
96 | + uint8_t round, shift = b & 0xf; | ||
97 | + uint16_t res; | ||
98 | + | ||
99 | + round = get_round(vxrm, a, shift); | ||
100 | + res = (a >> shift) + round; | ||
101 | + return res; | ||
102 | +} | ||
103 | +static inline uint32_t | ||
104 | +vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) | ||
105 | +{ | ||
106 | + uint8_t round, shift = b & 0x1f; | ||
107 | + uint32_t res; | ||
108 | + | ||
109 | + round = get_round(vxrm, a, shift); | ||
110 | + res = (a >> shift) + round; | ||
111 | + return res; | ||
112 | +} | ||
113 | +static inline uint64_t | ||
114 | +vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) | ||
115 | +{ | ||
116 | + uint8_t round, shift = b & 0x3f; | ||
117 | + uint64_t res; | ||
118 | + | ||
119 | + round = get_round(vxrm, a, shift); | ||
120 | + res = (a >> shift) + round; | ||
121 | + return res; | ||
122 | +} | ||
123 | +RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) | ||
124 | +RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) | ||
125 | +RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) | ||
126 | +RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) | ||
127 | +GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1, clearb) | ||
128 | +GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2, clearh) | ||
129 | +GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4, clearl) | ||
130 | +GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8, clearq) | ||
131 | + | ||
132 | +RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) | ||
133 | +RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) | ||
134 | +RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) | ||
135 | +RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) | ||
136 | +GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1, clearb) | ||
137 | +GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2, clearh) | ||
138 | +GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4, clearl) | ||
139 | +GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8, clearq) | ||
140 | + | ||
141 | +static inline int8_t | ||
142 | +vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
143 | +{ | ||
144 | + uint8_t round, shift = b & 0x7; | ||
145 | + int8_t res; | ||
146 | + | ||
147 | + round = get_round(vxrm, a, shift); | ||
148 | + res = (a >> shift) + round; | ||
149 | + return res; | ||
150 | +} | ||
151 | +static inline int16_t | ||
152 | +vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) | ||
153 | +{ | ||
154 | + uint8_t round, shift = b & 0xf; | ||
155 | + int16_t res; | ||
156 | + | ||
157 | + round = get_round(vxrm, a, shift); | ||
158 | + res = (a >> shift) + round; | ||
159 | + return res; | ||
160 | +} | ||
161 | +static inline int32_t | ||
162 | +vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) | ||
163 | +{ | ||
164 | + uint8_t round, shift = b & 0x1f; | ||
165 | + int32_t res; | ||
166 | + | ||
167 | + round = get_round(vxrm, a, shift); | ||
168 | + res = (a >> shift) + round; | ||
169 | + return res; | ||
170 | +} | ||
171 | +static inline int64_t | ||
172 | +vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) | ||
173 | +{ | ||
174 | + uint8_t round, shift = b & 0x3f; | ||
175 | + int64_t res; | ||
176 | + | ||
177 | + round = get_round(vxrm, a, shift); | ||
178 | + res = (a >> shift) + round; | ||
179 | + return res; | ||
180 | +} | ||
181 | +RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) | ||
182 | +RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) | ||
183 | +RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) | ||
184 | +RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) | ||
185 | +GEN_VEXT_VV_RM(vssra_vv_b, 1, 1, clearb) | ||
186 | +GEN_VEXT_VV_RM(vssra_vv_h, 2, 2, clearh) | ||
187 | +GEN_VEXT_VV_RM(vssra_vv_w, 4, 4, clearl) | ||
188 | +GEN_VEXT_VV_RM(vssra_vv_d, 8, 8, clearq) | ||
189 | + | ||
190 | +RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) | ||
191 | +RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) | ||
192 | +RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) | ||
193 | +RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) | ||
194 | +GEN_VEXT_VX_RM(vssra_vx_b, 1, 1, clearb) | ||
195 | +GEN_VEXT_VX_RM(vssra_vx_h, 2, 2, clearh) | ||
196 | +GEN_VEXT_VX_RM(vssra_vx_w, 4, 4, clearl) | ||
197 | +GEN_VEXT_VX_RM(vssra_vx_d, 8, 8, clearq) | ||
198 | -- | ||
199 | 2.27.0 | ||
200 | |||
201 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-30-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 13 +++ | ||
10 | target/riscv/insn32.decode | 6 + | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 8 ++ | ||
12 | target/riscv/vector_helper.c | 141 ++++++++++++++++++++++++ | ||
13 | 4 files changed, 168 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vssra_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
20 | DEF_HELPER_6(vssra_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
21 | DEF_HELPER_6(vssra_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
22 | DEF_HELPER_6(vssra_vx_d, void, ptr, ptr, tl, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vnclip_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vnclip_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vnclip_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vnclipu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vnclipu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vnclipu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vnclipu_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vnclipu_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vnclipu_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vnclip_vx_b, void, ptr, ptr, tl, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vnclip_vx_h, void, ptr, ptr, tl, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vnclip_vx_w, void, ptr, ptr, tl, ptr, env, i32) | ||
36 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/riscv/insn32.decode | ||
39 | +++ b/target/riscv/insn32.decode | ||
40 | @@ -XXX,XX +XXX,XX @@ vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm | ||
41 | vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm | ||
42 | vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm | ||
43 | vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm | ||
44 | +vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm | ||
45 | +vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm | ||
46 | +vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm | ||
47 | +vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm | ||
48 | +vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm | ||
49 | +vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm | ||
50 | |||
51 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
52 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
53 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
56 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
57 | @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vssrl_vx, opivx_check) | ||
58 | GEN_OPIVX_TRANS(vssra_vx, opivx_check) | ||
59 | GEN_OPIVI_TRANS(vssrl_vi, 1, vssrl_vx, opivx_check) | ||
60 | GEN_OPIVI_TRANS(vssra_vi, 0, vssra_vx, opivx_check) | ||
61 | + | ||
62 | +/* Vector Narrowing Fixed-Point Clip Instructions */ | ||
63 | +GEN_OPIVV_NARROW_TRANS(vnclipu_vv) | ||
64 | +GEN_OPIVV_NARROW_TRANS(vnclip_vv) | ||
65 | +GEN_OPIVX_NARROW_TRANS(vnclipu_vx) | ||
66 | +GEN_OPIVX_NARROW_TRANS(vnclip_vx) | ||
67 | +GEN_OPIVI_NARROW_TRANS(vnclipu_vi, 1, vnclipu_vx) | ||
68 | +GEN_OPIVI_NARROW_TRANS(vnclip_vi, 1, vnclip_vx) | ||
69 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/target/riscv/vector_helper.c | ||
72 | +++ b/target/riscv/vector_helper.c | ||
73 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) | ||
74 | #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t | ||
75 | #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t | ||
76 | #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t | ||
77 | +#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t | ||
78 | +#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t | ||
79 | +#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t | ||
80 | +#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t | ||
81 | +#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t | ||
82 | +#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t | ||
83 | |||
84 | /* operation of two vector elements */ | ||
85 | typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); | ||
86 | @@ -XXX,XX +XXX,XX @@ vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) | ||
87 | res = (a >> shift) + round; | ||
88 | return res; | ||
89 | } | ||
90 | + | ||
91 | RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) | ||
92 | RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) | ||
93 | RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) | ||
94 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX_RM(vssra_vx_b, 1, 1, clearb) | ||
95 | GEN_VEXT_VX_RM(vssra_vx_h, 2, 2, clearh) | ||
96 | GEN_VEXT_VX_RM(vssra_vx_w, 4, 4, clearl) | ||
97 | GEN_VEXT_VX_RM(vssra_vx_d, 8, 8, clearq) | ||
98 | + | ||
99 | +/* Vector Narrowing Fixed-Point Clip Instructions */ | ||
100 | +static inline int8_t | ||
101 | +vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) | ||
102 | +{ | ||
103 | + uint8_t round, shift = b & 0xf; | ||
104 | + int16_t res; | ||
105 | + | ||
106 | + round = get_round(vxrm, a, shift); | ||
107 | + res = (a >> shift) + round; | ||
108 | + if (res > INT8_MAX) { | ||
109 | + env->vxsat = 0x1; | ||
110 | + return INT8_MAX; | ||
111 | + } else if (res < INT8_MIN) { | ||
112 | + env->vxsat = 0x1; | ||
113 | + return INT8_MIN; | ||
114 | + } else { | ||
115 | + return res; | ||
116 | + } | ||
117 | +} | ||
118 | + | ||
119 | +static inline int16_t | ||
120 | +vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) | ||
121 | +{ | ||
122 | + uint8_t round, shift = b & 0x1f; | ||
123 | + int32_t res; | ||
124 | + | ||
125 | + round = get_round(vxrm, a, shift); | ||
126 | + res = (a >> shift) + round; | ||
127 | + if (res > INT16_MAX) { | ||
128 | + env->vxsat = 0x1; | ||
129 | + return INT16_MAX; | ||
130 | + } else if (res < INT16_MIN) { | ||
131 | + env->vxsat = 0x1; | ||
132 | + return INT16_MIN; | ||
133 | + } else { | ||
134 | + return res; | ||
135 | + } | ||
136 | +} | ||
137 | + | ||
138 | +static inline int32_t | ||
139 | +vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) | ||
140 | +{ | ||
141 | + uint8_t round, shift = b & 0x3f; | ||
142 | + int64_t res; | ||
143 | + | ||
144 | + round = get_round(vxrm, a, shift); | ||
145 | + res = (a >> shift) + round; | ||
146 | + if (res > INT32_MAX) { | ||
147 | + env->vxsat = 0x1; | ||
148 | + return INT32_MAX; | ||
149 | + } else if (res < INT32_MIN) { | ||
150 | + env->vxsat = 0x1; | ||
151 | + return INT32_MIN; | ||
152 | + } else { | ||
153 | + return res; | ||
154 | + } | ||
155 | +} | ||
156 | + | ||
157 | +RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8) | ||
158 | +RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16) | ||
159 | +RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32) | ||
160 | +GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1, clearb) | ||
161 | +GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2, clearh) | ||
162 | +GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4, clearl) | ||
163 | + | ||
164 | +RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8) | ||
165 | +RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16) | ||
166 | +RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32) | ||
167 | +GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1, clearb) | ||
168 | +GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2, clearh) | ||
169 | +GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4, clearl) | ||
170 | + | ||
171 | +static inline uint8_t | ||
172 | +vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) | ||
173 | +{ | ||
174 | + uint8_t round, shift = b & 0xf; | ||
175 | + uint16_t res; | ||
176 | + | ||
177 | + round = get_round(vxrm, a, shift); | ||
178 | + res = (a >> shift) + round; | ||
179 | + if (res > UINT8_MAX) { | ||
180 | + env->vxsat = 0x1; | ||
181 | + return UINT8_MAX; | ||
182 | + } else { | ||
183 | + return res; | ||
184 | + } | ||
185 | +} | ||
186 | + | ||
187 | +static inline uint16_t | ||
188 | +vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) | ||
189 | +{ | ||
190 | + uint8_t round, shift = b & 0x1f; | ||
191 | + uint32_t res; | ||
192 | + | ||
193 | + round = get_round(vxrm, a, shift); | ||
194 | + res = (a >> shift) + round; | ||
195 | + if (res > UINT16_MAX) { | ||
196 | + env->vxsat = 0x1; | ||
197 | + return UINT16_MAX; | ||
198 | + } else { | ||
199 | + return res; | ||
200 | + } | ||
201 | +} | ||
202 | + | ||
203 | +static inline uint32_t | ||
204 | +vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) | ||
205 | +{ | ||
206 | + uint8_t round, shift = b & 0x3f; | ||
207 | + int64_t res; | ||
208 | + | ||
209 | + round = get_round(vxrm, a, shift); | ||
210 | + res = (a >> shift) + round; | ||
211 | + if (res > UINT32_MAX) { | ||
212 | + env->vxsat = 0x1; | ||
213 | + return UINT32_MAX; | ||
214 | + } else { | ||
215 | + return res; | ||
216 | + } | ||
217 | +} | ||
218 | + | ||
219 | +RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) | ||
220 | +RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) | ||
221 | +RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) | ||
222 | +GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1, clearb) | ||
223 | +GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2, clearh) | ||
224 | +GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4, clearl) | ||
225 | + | ||
226 | +RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8) | ||
227 | +RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16) | ||
228 | +RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) | ||
229 | +GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1, clearb) | ||
230 | +GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2, clearh) | ||
231 | +GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4, clearl) | ||
232 | -- | ||
233 | 2.27.0 | ||
234 | |||
235 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-32-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 17 +++ | ||
10 | target/riscv/insn32.decode | 8 ++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 149 ++++++++++++++++++++++++ | ||
12 | target/riscv/vector_helper.c | 83 +++++++++++++ | ||
13 | 4 files changed, 257 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
20 | DEF_HELPER_6(vfrsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
21 | DEF_HELPER_6(vfrsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
22 | DEF_HELPER_6(vfrsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vfwadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vfwadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vfwsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vfwsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vfwadd_wv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vfwadd_wv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vfwsub_wv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vfwsub_wv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vfwadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vfwadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vfwsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vfwsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vfwadd_wf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vfwadd_wf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vfwsub_wf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vfwsub_wf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
40 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/target/riscv/insn32.decode | ||
43 | +++ b/target/riscv/insn32.decode | ||
44 | @@ -XXX,XX +XXX,XX @@ vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm | ||
45 | vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm | ||
46 | vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm | ||
47 | vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm | ||
48 | +vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm | ||
49 | +vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm | ||
50 | +vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm | ||
51 | +vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm | ||
52 | +vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm | ||
53 | +vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm | ||
54 | +vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm | ||
55 | +vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm | ||
56 | |||
57 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
58 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
59 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
62 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
63 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
64 | GEN_OPFVF_TRANS(vfadd_vf, opfvf_check) | ||
65 | GEN_OPFVF_TRANS(vfsub_vf, opfvf_check) | ||
66 | GEN_OPFVF_TRANS(vfrsub_vf, opfvf_check) | ||
67 | + | ||
68 | +/* Vector Widening Floating-Point Add/Subtract Instructions */ | ||
69 | +static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a) | ||
70 | +{ | ||
71 | + return (vext_check_isa_ill(s) && | ||
72 | + vext_check_overlap_mask(s, a->rd, a->vm, true) && | ||
73 | + vext_check_reg(s, a->rd, true) && | ||
74 | + vext_check_reg(s, a->rs2, false) && | ||
75 | + vext_check_reg(s, a->rs1, false) && | ||
76 | + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, | ||
77 | + 1 << s->lmul) && | ||
78 | + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, | ||
79 | + 1 << s->lmul) && | ||
80 | + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); | ||
81 | +} | ||
82 | + | ||
83 | +/* OPFVV with WIDEN */ | ||
84 | +#define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \ | ||
85 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
86 | +{ \ | ||
87 | + if (CHECK(s, a)) { \ | ||
88 | + uint32_t data = 0; \ | ||
89 | + static gen_helper_gvec_4_ptr * const fns[2] = { \ | ||
90 | + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ | ||
91 | + }; \ | ||
92 | + TCGLabel *over = gen_new_label(); \ | ||
93 | + gen_set_rm(s, 7); \ | ||
94 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
95 | + \ | ||
96 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ | ||
97 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
98 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
99 | + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
100 | + vreg_ofs(s, a->rs1), \ | ||
101 | + vreg_ofs(s, a->rs2), cpu_env, 0, \ | ||
102 | + s->vlen / 8, data, fns[s->sew - 1]); \ | ||
103 | + gen_set_label(over); \ | ||
104 | + return true; \ | ||
105 | + } \ | ||
106 | + return false; \ | ||
107 | +} | ||
108 | + | ||
109 | +GEN_OPFVV_WIDEN_TRANS(vfwadd_vv, opfvv_widen_check) | ||
110 | +GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check) | ||
111 | + | ||
112 | +static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) | ||
113 | +{ | ||
114 | + return (vext_check_isa_ill(s) && | ||
115 | + vext_check_overlap_mask(s, a->rd, a->vm, true) && | ||
116 | + vext_check_reg(s, a->rd, true) && | ||
117 | + vext_check_reg(s, a->rs2, false) && | ||
118 | + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, | ||
119 | + 1 << s->lmul) && | ||
120 | + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); | ||
121 | +} | ||
122 | + | ||
123 | +/* OPFVF with WIDEN */ | ||
124 | +#define GEN_OPFVF_WIDEN_TRANS(NAME) \ | ||
125 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
126 | +{ \ | ||
127 | + if (opfvf_widen_check(s, a)) { \ | ||
128 | + uint32_t data = 0; \ | ||
129 | + static gen_helper_opfvf *const fns[2] = { \ | ||
130 | + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ | ||
131 | + }; \ | ||
132 | + gen_set_rm(s, 7); \ | ||
133 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ | ||
134 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
135 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
136 | + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ | ||
137 | + fns[s->sew - 1], s); \ | ||
138 | + } \ | ||
139 | + return false; \ | ||
140 | +} | ||
141 | + | ||
142 | +GEN_OPFVF_WIDEN_TRANS(vfwadd_vf) | ||
143 | +GEN_OPFVF_WIDEN_TRANS(vfwsub_vf) | ||
144 | + | ||
145 | +static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a) | ||
146 | +{ | ||
147 | + return (vext_check_isa_ill(s) && | ||
148 | + vext_check_overlap_mask(s, a->rd, a->vm, true) && | ||
149 | + vext_check_reg(s, a->rd, true) && | ||
150 | + vext_check_reg(s, a->rs2, true) && | ||
151 | + vext_check_reg(s, a->rs1, false) && | ||
152 | + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, | ||
153 | + 1 << s->lmul) && | ||
154 | + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); | ||
155 | +} | ||
156 | + | ||
157 | +/* WIDEN OPFVV with WIDEN */ | ||
158 | +#define GEN_OPFWV_WIDEN_TRANS(NAME) \ | ||
159 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
160 | +{ \ | ||
161 | + if (opfwv_widen_check(s, a)) { \ | ||
162 | + uint32_t data = 0; \ | ||
163 | + static gen_helper_gvec_4_ptr * const fns[2] = { \ | ||
164 | + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ | ||
165 | + }; \ | ||
166 | + TCGLabel *over = gen_new_label(); \ | ||
167 | + gen_set_rm(s, 7); \ | ||
168 | + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
169 | + \ | ||
170 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ | ||
171 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
172 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
173 | + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
174 | + vreg_ofs(s, a->rs1), \ | ||
175 | + vreg_ofs(s, a->rs2), cpu_env, 0, \ | ||
176 | + s->vlen / 8, data, fns[s->sew - 1]); \ | ||
177 | + gen_set_label(over); \ | ||
178 | + return true; \ | ||
179 | + } \ | ||
180 | + return false; \ | ||
181 | +} | ||
182 | + | ||
183 | +GEN_OPFWV_WIDEN_TRANS(vfwadd_wv) | ||
184 | +GEN_OPFWV_WIDEN_TRANS(vfwsub_wv) | ||
185 | + | ||
186 | +static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a) | ||
187 | +{ | ||
188 | + return (vext_check_isa_ill(s) && | ||
189 | + vext_check_overlap_mask(s, a->rd, a->vm, true) && | ||
190 | + vext_check_reg(s, a->rd, true) && | ||
191 | + vext_check_reg(s, a->rs2, true) && | ||
192 | + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); | ||
193 | +} | ||
194 | + | ||
195 | +/* WIDEN OPFVF with WIDEN */ | ||
196 | +#define GEN_OPFWF_WIDEN_TRANS(NAME) \ | ||
197 | +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
198 | +{ \ | ||
199 | + if (opfwf_widen_check(s, a)) { \ | ||
200 | + uint32_t data = 0; \ | ||
201 | + static gen_helper_opfvf *const fns[2] = { \ | ||
202 | + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ | ||
203 | + }; \ | ||
204 | + gen_set_rm(s, 7); \ | ||
205 | + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ | ||
206 | + data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
207 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
208 | + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ | ||
209 | + fns[s->sew - 1], s); \ | ||
210 | + } \ | ||
211 | + return false; \ | ||
212 | +} | ||
213 | + | ||
214 | +GEN_OPFWF_WIDEN_TRANS(vfwadd_wf) | ||
215 | +GEN_OPFWF_WIDEN_TRANS(vfwsub_wf) | ||
216 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
217 | index XXXXXXX..XXXXXXX 100644 | ||
218 | --- a/target/riscv/vector_helper.c | ||
219 | +++ b/target/riscv/vector_helper.c | ||
220 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) | ||
221 | GEN_VEXT_VF(vfrsub_vf_h, 2, 2, clearh) | ||
222 | GEN_VEXT_VF(vfrsub_vf_w, 4, 4, clearl) | ||
223 | GEN_VEXT_VF(vfrsub_vf_d, 8, 8, clearq) | ||
224 | + | ||
225 | +/* Vector Widening Floating-Point Add/Subtract Instructions */ | ||
226 | +static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) | ||
227 | +{ | ||
228 | + return float32_add(float16_to_float32(a, true, s), | ||
229 | + float16_to_float32(b, true, s), s); | ||
230 | +} | ||
231 | + | ||
232 | +static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) | ||
233 | +{ | ||
234 | + return float64_add(float32_to_float64(a, s), | ||
235 | + float32_to_float64(b, s), s); | ||
236 | + | ||
237 | +} | ||
238 | + | ||
239 | +RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) | ||
240 | +RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) | ||
241 | +GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4, clearl) | ||
242 | +GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8, clearq) | ||
243 | +RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) | ||
244 | +RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) | ||
245 | +GEN_VEXT_VF(vfwadd_vf_h, 2, 4, clearl) | ||
246 | +GEN_VEXT_VF(vfwadd_vf_w, 4, 8, clearq) | ||
247 | + | ||
248 | +static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) | ||
249 | +{ | ||
250 | + return float32_sub(float16_to_float32(a, true, s), | ||
251 | + float16_to_float32(b, true, s), s); | ||
252 | +} | ||
253 | + | ||
254 | +static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) | ||
255 | +{ | ||
256 | + return float64_sub(float32_to_float64(a, s), | ||
257 | + float32_to_float64(b, s), s); | ||
258 | + | ||
259 | +} | ||
260 | + | ||
261 | +RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) | ||
262 | +RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) | ||
263 | +GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4, clearl) | ||
264 | +GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8, clearq) | ||
265 | +RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) | ||
266 | +RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) | ||
267 | +GEN_VEXT_VF(vfwsub_vf_h, 2, 4, clearl) | ||
268 | +GEN_VEXT_VF(vfwsub_vf_w, 4, 8, clearq) | ||
269 | + | ||
270 | +static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) | ||
271 | +{ | ||
272 | + return float32_add(a, float16_to_float32(b, true, s), s); | ||
273 | +} | ||
274 | + | ||
275 | +static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) | ||
276 | +{ | ||
277 | + return float64_add(a, float32_to_float64(b, s), s); | ||
278 | +} | ||
279 | + | ||
280 | +RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) | ||
281 | +RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) | ||
282 | +GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4, clearl) | ||
283 | +GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8, clearq) | ||
284 | +RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) | ||
285 | +RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) | ||
286 | +GEN_VEXT_VF(vfwadd_wf_h, 2, 4, clearl) | ||
287 | +GEN_VEXT_VF(vfwadd_wf_w, 4, 8, clearq) | ||
288 | + | ||
289 | +static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) | ||
290 | +{ | ||
291 | + return float32_sub(a, float16_to_float32(b, true, s), s); | ||
292 | +} | ||
293 | + | ||
294 | +static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) | ||
295 | +{ | ||
296 | + return float64_sub(a, float32_to_float64(b, s), s); | ||
297 | +} | ||
298 | + | ||
299 | +RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) | ||
300 | +RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) | ||
301 | +GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4, clearl) | ||
302 | +GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8, clearq) | ||
303 | +RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) | ||
304 | +RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) | ||
305 | +GEN_VEXT_VF(vfwsub_wf_h, 2, 4, clearl) | ||
306 | +GEN_VEXT_VF(vfwsub_wf_w, 4, 8, clearq) | ||
307 | -- | ||
308 | 2.27.0 | ||
309 | |||
310 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-33-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 16 ++++++++ | ||
10 | target/riscv/insn32.decode | 5 +++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 7 ++++ | ||
12 | target/riscv/vector_helper.c | 49 +++++++++++++++++++++++++ | ||
13 | 4 files changed, 77 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfwadd_wf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
20 | DEF_HELPER_6(vfwadd_wf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
21 | DEF_HELPER_6(vfwsub_wf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
22 | DEF_HELPER_6(vfwsub_wf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vfmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vfmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vfmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vfdiv_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vfdiv_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vfdiv_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vfmul_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vfmul_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vfmul_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vfdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vfdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vfdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vfrdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vfrdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vfrdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
39 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/riscv/insn32.decode | ||
42 | +++ b/target/riscv/insn32.decode | ||
43 | @@ -XXX,XX +XXX,XX @@ vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm | ||
44 | vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm | ||
45 | vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm | ||
46 | vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm | ||
47 | +vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm | ||
48 | +vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm | ||
49 | +vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm | ||
50 | +vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm | ||
51 | +vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm | ||
52 | |||
53 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
54 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
55 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
58 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
59 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
60 | |||
61 | GEN_OPFWF_WIDEN_TRANS(vfwadd_wf) | ||
62 | GEN_OPFWF_WIDEN_TRANS(vfwsub_wf) | ||
63 | + | ||
64 | +/* Vector Single-Width Floating-Point Multiply/Divide Instructions */ | ||
65 | +GEN_OPFVV_TRANS(vfmul_vv, opfvv_check) | ||
66 | +GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check) | ||
67 | +GEN_OPFVF_TRANS(vfmul_vf, opfvf_check) | ||
68 | +GEN_OPFVF_TRANS(vfdiv_vf, opfvf_check) | ||
69 | +GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) | ||
70 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
71 | index XXXXXXX..XXXXXXX 100644 | ||
72 | --- a/target/riscv/vector_helper.c | ||
73 | +++ b/target/riscv/vector_helper.c | ||
74 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) | ||
75 | RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) | ||
76 | GEN_VEXT_VF(vfwsub_wf_h, 2, 4, clearl) | ||
77 | GEN_VEXT_VF(vfwsub_wf_w, 4, 8, clearq) | ||
78 | + | ||
79 | +/* Vector Single-Width Floating-Point Multiply/Divide Instructions */ | ||
80 | +RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) | ||
81 | +RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) | ||
82 | +RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) | ||
83 | +GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2, clearh) | ||
84 | +GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4, clearl) | ||
85 | +GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8, clearq) | ||
86 | +RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) | ||
87 | +RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) | ||
88 | +RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) | ||
89 | +GEN_VEXT_VF(vfmul_vf_h, 2, 2, clearh) | ||
90 | +GEN_VEXT_VF(vfmul_vf_w, 4, 4, clearl) | ||
91 | +GEN_VEXT_VF(vfmul_vf_d, 8, 8, clearq) | ||
92 | + | ||
93 | +RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) | ||
94 | +RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) | ||
95 | +RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) | ||
96 | +GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2, clearh) | ||
97 | +GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4, clearl) | ||
98 | +GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8, clearq) | ||
99 | +RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) | ||
100 | +RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) | ||
101 | +RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) | ||
102 | +GEN_VEXT_VF(vfdiv_vf_h, 2, 2, clearh) | ||
103 | +GEN_VEXT_VF(vfdiv_vf_w, 4, 4, clearl) | ||
104 | +GEN_VEXT_VF(vfdiv_vf_d, 8, 8, clearq) | ||
105 | + | ||
106 | +static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) | ||
107 | +{ | ||
108 | + return float16_div(b, a, s); | ||
109 | +} | ||
110 | + | ||
111 | +static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) | ||
112 | +{ | ||
113 | + return float32_div(b, a, s); | ||
114 | +} | ||
115 | + | ||
116 | +static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) | ||
117 | +{ | ||
118 | + return float64_div(b, a, s); | ||
119 | +} | ||
120 | + | ||
121 | +RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) | ||
122 | +RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) | ||
123 | +RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) | ||
124 | +GEN_VEXT_VF(vfrdiv_vf_h, 2, 2, clearh) | ||
125 | +GEN_VEXT_VF(vfrdiv_vf_w, 4, 4, clearl) | ||
126 | +GEN_VEXT_VF(vfrdiv_vf_d, 8, 8, clearq) | ||
127 | -- | ||
128 | 2.27.0 | ||
129 | |||
130 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-34-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 5 +++++ | ||
10 | target/riscv/insn32.decode | 2 ++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 4 ++++ | ||
12 | target/riscv/vector_helper.c | 22 ++++++++++++++++++++++ | ||
13 | 4 files changed, 33 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
20 | DEF_HELPER_6(vfrdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
21 | DEF_HELPER_6(vfrdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
22 | DEF_HELPER_6(vfrdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vfwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vfwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vfwmul_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vfwmul_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
28 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/riscv/insn32.decode | ||
31 | +++ b/target/riscv/insn32.decode | ||
32 | @@ -XXX,XX +XXX,XX @@ vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm | ||
33 | vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm | ||
34 | vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm | ||
35 | vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm | ||
36 | +vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm | ||
37 | +vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm | ||
38 | |||
39 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
40 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
41 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
44 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
45 | @@ -XXX,XX +XXX,XX @@ GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check) | ||
46 | GEN_OPFVF_TRANS(vfmul_vf, opfvf_check) | ||
47 | GEN_OPFVF_TRANS(vfdiv_vf, opfvf_check) | ||
48 | GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) | ||
49 | + | ||
50 | +/* Vector Widening Floating-Point Multiply */ | ||
51 | +GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check) | ||
52 | +GEN_OPFVF_WIDEN_TRANS(vfwmul_vf) | ||
53 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/target/riscv/vector_helper.c | ||
56 | +++ b/target/riscv/vector_helper.c | ||
57 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) | ||
58 | GEN_VEXT_VF(vfrdiv_vf_h, 2, 2, clearh) | ||
59 | GEN_VEXT_VF(vfrdiv_vf_w, 4, 4, clearl) | ||
60 | GEN_VEXT_VF(vfrdiv_vf_d, 8, 8, clearq) | ||
61 | + | ||
62 | +/* Vector Widening Floating-Point Multiply */ | ||
63 | +static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) | ||
64 | +{ | ||
65 | + return float32_mul(float16_to_float32(a, true, s), | ||
66 | + float16_to_float32(b, true, s), s); | ||
67 | +} | ||
68 | + | ||
69 | +static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) | ||
70 | +{ | ||
71 | + return float64_mul(float32_to_float64(a, s), | ||
72 | + float32_to_float64(b, s), s); | ||
73 | + | ||
74 | +} | ||
75 | +RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) | ||
76 | +RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) | ||
77 | +GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4, clearl) | ||
78 | +GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8, clearq) | ||
79 | +RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) | ||
80 | +RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) | ||
81 | +GEN_VEXT_VF(vfwmul_vf_h, 2, 4, clearl) | ||
82 | +GEN_VEXT_VF(vfwmul_vf_w, 4, 8, clearq) | ||
83 | -- | ||
84 | 2.27.0 | ||
85 | |||
86 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
2 | 1 | ||
3 | Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-id: 20200623215920.2594-35-zhiwei_liu@c-sky.com | ||
7 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | --- | ||
9 | target/riscv/helper.h | 49 +++++ | ||
10 | target/riscv/insn32.decode | 16 ++ | ||
11 | target/riscv/insn_trans/trans_rvv.inc.c | 18 ++ | ||
12 | target/riscv/vector_helper.c | 251 ++++++++++++++++++++++++ | ||
13 | 4 files changed, 334 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/helper.h b/target/riscv/helper.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/helper.h | ||
18 | +++ b/target/riscv/helper.h | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
20 | DEF_HELPER_6(vfwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
21 | DEF_HELPER_6(vfwmul_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
22 | DEF_HELPER_6(vfwmul_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
23 | + | ||
24 | +DEF_HELPER_6(vfmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
25 | +DEF_HELPER_6(vfmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
26 | +DEF_HELPER_6(vfmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
27 | +DEF_HELPER_6(vfnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
28 | +DEF_HELPER_6(vfnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
29 | +DEF_HELPER_6(vfnmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
30 | +DEF_HELPER_6(vfmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
31 | +DEF_HELPER_6(vfmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
32 | +DEF_HELPER_6(vfmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
33 | +DEF_HELPER_6(vfnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
34 | +DEF_HELPER_6(vfnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
35 | +DEF_HELPER_6(vfnmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
36 | +DEF_HELPER_6(vfmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
37 | +DEF_HELPER_6(vfmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
38 | +DEF_HELPER_6(vfmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
39 | +DEF_HELPER_6(vfnmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
40 | +DEF_HELPER_6(vfnmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
41 | +DEF_HELPER_6(vfnmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
42 | +DEF_HELPER_6(vfmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
43 | +DEF_HELPER_6(vfmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
44 | +DEF_HELPER_6(vfmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
45 | +DEF_HELPER_6(vfnmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) | ||
46 | +DEF_HELPER_6(vfnmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) | ||
47 | +DEF_HELPER_6(vfnmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) | ||
48 | +DEF_HELPER_6(vfmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
49 | +DEF_HELPER_6(vfmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
50 | +DEF_HELPER_6(vfmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
51 | +DEF_HELPER_6(vfnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
52 | +DEF_HELPER_6(vfnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
53 | +DEF_HELPER_6(vfnmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
54 | +DEF_HELPER_6(vfmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
55 | +DEF_HELPER_6(vfmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
56 | +DEF_HELPER_6(vfmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
57 | +DEF_HELPER_6(vfnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
58 | +DEF_HELPER_6(vfnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
59 | +DEF_HELPER_6(vfnmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
60 | +DEF_HELPER_6(vfmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
61 | +DEF_HELPER_6(vfmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
62 | +DEF_HELPER_6(vfmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
63 | +DEF_HELPER_6(vfnmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
64 | +DEF_HELPER_6(vfnmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
65 | +DEF_HELPER_6(vfnmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
66 | +DEF_HELPER_6(vfmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
67 | +DEF_HELPER_6(vfmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
68 | +DEF_HELPER_6(vfmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
69 | +DEF_HELPER_6(vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) | ||
70 | +DEF_HELPER_6(vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) | ||
71 | +DEF_HELPER_6(vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) | ||
72 | diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/target/riscv/insn32.decode | ||
75 | +++ b/target/riscv/insn32.decode | ||
76 | @@ -XXX,XX +XXX,XX @@ vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm | ||
77 | vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm | ||
78 | vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm | ||
79 | vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm | ||
80 | +vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm | ||
81 | +vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm | ||
82 | +vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm | ||
83 | +vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm | ||
84 | +vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm | ||
85 | +vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm | ||
86 | +vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm | ||
87 | +vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm | ||
88 | +vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm | ||
89 | +vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm | ||
90 | +vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm | ||
91 | +vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm | ||
92 | +vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm | ||
93 | +vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm | ||
94 | +vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm | ||
95 | +vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm | ||
96 | |||
97 | vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm | ||
98 | vsetvl 1000000 ..... ..... 111 ..... 1010111 @r | ||
99 | diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c | ||
100 | index XXXXXXX..XXXXXXX 100644 | ||
101 | --- a/target/riscv/insn_trans/trans_rvv.inc.c | ||
102 | +++ b/target/riscv/insn_trans/trans_rvv.inc.c | ||
103 | @@ -XXX,XX +XXX,XX @@ GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) | ||
104 | /* Vector Widening Floating-Point Multiply */ | ||
105 | GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check) | ||
106 | GEN_OPFVF_WIDEN_TRANS(vfwmul_vf) | ||
107 | + | ||
108 | +/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ | ||
109 | +GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check) | ||
110 | +GEN_OPFVV_TRANS(vfnmacc_vv, opfvv_check) | ||
111 | +GEN_OPFVV_TRANS(vfmsac_vv, opfvv_check) | ||
112 | +GEN_OPFVV_TRANS(vfnmsac_vv, opfvv_check) | ||
113 | +GEN_OPFVV_TRANS(vfmadd_vv, opfvv_check) | ||
114 | +GEN_OPFVV_TRANS(vfnmadd_vv, opfvv_check) | ||
115 | +GEN_OPFVV_TRANS(vfmsub_vv, opfvv_check) | ||
116 | +GEN_OPFVV_TRANS(vfnmsub_vv, opfvv_check) | ||
117 | +GEN_OPFVF_TRANS(vfmacc_vf, opfvf_check) | ||
118 | +GEN_OPFVF_TRANS(vfnmacc_vf, opfvf_check) | ||
119 | +GEN_OPFVF_TRANS(vfmsac_vf, opfvf_check) | ||
120 | +GEN_OPFVF_TRANS(vfnmsac_vf, opfvf_check) | ||
121 | +GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check) | ||
122 | +GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check) | ||
123 | +GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check) | ||
124 | +GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check) | ||
125 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
126 | index XXXXXXX..XXXXXXX 100644 | ||
127 | --- a/target/riscv/vector_helper.c | ||
128 | +++ b/target/riscv/vector_helper.c | ||
129 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) | ||
130 | RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) | ||
131 | GEN_VEXT_VF(vfwmul_vf_h, 2, 4, clearl) | ||
132 | GEN_VEXT_VF(vfwmul_vf_w, 4, 8, clearq) | ||
133 | + | ||
134 | +/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ | ||
135 | +#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
136 | +static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ | ||
137 | + CPURISCVState *env) \ | ||
138 | +{ \ | ||
139 | + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ | ||
140 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
141 | + TD d = *((TD *)vd + HD(i)); \ | ||
142 | + *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ | ||
143 | +} | ||
144 | + | ||
145 | +static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
146 | +{ | ||
147 | + return float16_muladd(a, b, d, 0, s); | ||
148 | +} | ||
149 | + | ||
150 | +static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) | ||
151 | +{ | ||
152 | + return float32_muladd(a, b, d, 0, s); | ||
153 | +} | ||
154 | + | ||
155 | +static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
156 | +{ | ||
157 | + return float64_muladd(a, b, d, 0, s); | ||
158 | +} | ||
159 | + | ||
160 | +RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) | ||
161 | +RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) | ||
162 | +RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) | ||
163 | +GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2, clearh) | ||
164 | +GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4, clearl) | ||
165 | +GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8, clearq) | ||
166 | + | ||
167 | +#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
168 | +static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
169 | + CPURISCVState *env) \ | ||
170 | +{ \ | ||
171 | + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ | ||
172 | + TD d = *((TD *)vd + HD(i)); \ | ||
173 | + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ | ||
174 | +} | ||
175 | + | ||
176 | +RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) | ||
177 | +RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) | ||
178 | +RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) | ||
179 | +GEN_VEXT_VF(vfmacc_vf_h, 2, 2, clearh) | ||
180 | +GEN_VEXT_VF(vfmacc_vf_w, 4, 4, clearl) | ||
181 | +GEN_VEXT_VF(vfmacc_vf_d, 8, 8, clearq) | ||
182 | + | ||
183 | +static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
184 | +{ | ||
185 | + return float16_muladd(a, b, d, | ||
186 | + float_muladd_negate_c | float_muladd_negate_product, s); | ||
187 | +} | ||
188 | + | ||
189 | +static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) | ||
190 | +{ | ||
191 | + return float32_muladd(a, b, d, | ||
192 | + float_muladd_negate_c | float_muladd_negate_product, s); | ||
193 | +} | ||
194 | + | ||
195 | +static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
196 | +{ | ||
197 | + return float64_muladd(a, b, d, | ||
198 | + float_muladd_negate_c | float_muladd_negate_product, s); | ||
199 | +} | ||
200 | + | ||
201 | +RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) | ||
202 | +RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) | ||
203 | +RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) | ||
204 | +GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2, clearh) | ||
205 | +GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4, clearl) | ||
206 | +GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8, clearq) | ||
207 | +RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) | ||
208 | +RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) | ||
209 | +RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) | ||
210 | +GEN_VEXT_VF(vfnmacc_vf_h, 2, 2, clearh) | ||
211 | +GEN_VEXT_VF(vfnmacc_vf_w, 4, 4, clearl) | ||
212 | +GEN_VEXT_VF(vfnmacc_vf_d, 8, 8, clearq) | ||
213 | + | ||
214 | +static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
215 | +{ | ||
216 | + return float16_muladd(a, b, d, float_muladd_negate_c, s); | ||
217 | +} | ||
218 | + | ||
219 | +static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) | ||
220 | +{ | ||
221 | + return float32_muladd(a, b, d, float_muladd_negate_c, s); | ||
222 | +} | ||
223 | + | ||
224 | +static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
225 | +{ | ||
226 | + return float64_muladd(a, b, d, float_muladd_negate_c, s); | ||
227 | +} | ||
228 | + | ||
229 | +RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) | ||
230 | +RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) | ||
231 | +RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) | ||
232 | +GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2, clearh) | ||
233 | +GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4, clearl) | ||
234 | +GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8, clearq) | ||
235 | +RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) | ||
236 | +RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) | ||
237 | +RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) | ||
238 | +GEN_VEXT_VF(vfmsac_vf_h, 2, 2, clearh) | ||
239 | +GEN_VEXT_VF(vfmsac_vf_w, 4, 4, clearl) | ||
240 | +GEN_VEXT_VF(vfmsac_vf_d, 8, 8, clearq) | ||
241 | + | ||
242 | +static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
243 | +{ | ||
244 | + return float16_muladd(a, b, d, float_muladd_negate_product, s); | ||
245 | +} | ||
246 | + | ||
247 | +static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) | ||
248 | +{ | ||
249 | + return float32_muladd(a, b, d, float_muladd_negate_product, s); | ||
250 | +} | ||
251 | + | ||
252 | +static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
253 | +{ | ||
254 | + return float64_muladd(a, b, d, float_muladd_negate_product, s); | ||
255 | +} | ||
256 | + | ||
257 | +RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) | ||
258 | +RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) | ||
259 | +RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) | ||
260 | +GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2, clearh) | ||
261 | +GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4, clearl) | ||
262 | +GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8, clearq) | ||
263 | +RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) | ||
264 | +RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) | ||
265 | +RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) | ||
266 | +GEN_VEXT_VF(vfnmsac_vf_h, 2, 2, clearh) | ||
267 | +GEN_VEXT_VF(vfnmsac_vf_w, 4, 4, clearl) | ||
268 | +GEN_VEXT_VF(vfnmsac_vf_d, 8, 8, clearq) | ||
269 | + | ||
270 | +static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
271 | +{ | ||
272 | + return float16_muladd(d, b, a, 0, s); | ||
273 | +} | ||
274 | + | ||
275 | +static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) | ||
276 | +{ | ||
277 | + return float32_muladd(d, b, a, 0, s); | ||
278 | +} | ||
279 | + | ||
280 | +static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
281 | +{ | ||
282 | + return float64_muladd(d, b, a, 0, s); | ||
283 | +} | ||
284 | + | ||
285 | +RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) | ||
286 | +RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) | ||
287 | +RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) | ||
288 | +GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2, clearh) | ||
289 | +GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4, clearl) | ||
290 | +GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8, clearq) | ||
291 | +RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) | ||
292 | +RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) | ||
293 | +RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) | ||
294 | +GEN_VEXT_VF(vfmadd_vf_h, 2, 2, clearh) | ||
295 | +GEN_VEXT_VF(vfmadd_vf_w, 4, 4, clearl) | ||
296 | +GEN_VEXT_VF(vfmadd_vf_d, 8, 8, clearq) | ||
297 | + | ||
298 | +static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
299 | +{ | ||
300 | + return float16_muladd(d, b, a, | ||
301 | + float_muladd_negate_c | float_muladd_negate_product, s); | ||
302 | +} | ||
303 | + | ||
304 | +static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) | ||
305 | +{ | ||
306 | + return float32_muladd(d, b, a, | ||
307 | + float_muladd_negate_c | float_muladd_negate_product, s); | ||
308 | +} | ||
309 | + | ||
310 | +static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
311 | +{ | ||
312 | + return float64_muladd(d, b, a, | ||
313 | + float_muladd_negate_c | float_muladd_negate_product, s); | ||
314 | +} | ||
315 | + | ||
316 | +RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) | ||
317 | +RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) | ||
318 | +RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) | ||
319 | +GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2, clearh) | ||
320 | +GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4, clearl) | ||
321 | +GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8, clearq) | ||
322 | +RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) | ||
323 | +RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) | ||
324 | +RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) | ||
325 | +GEN_VEXT_VF(vfnmadd_vf_h, 2, 2, clearh) | ||
326 | +GEN_VEXT_VF(vfnmadd_vf_w, 4, 4, clearl) | ||
327 | +GEN_VEXT_VF(vfnmadd_vf_d, 8, 8, clearq) | ||
328 | + | ||
329 | +static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
330 | +{ | ||
331 | + return float16_muladd(d, b, a, float_muladd_negate_c, s); | ||
332 | +} | ||
333 | + | ||
334 | +static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) | ||
335 | +{ | ||
336 | + return float32_muladd(d, b, a, float_muladd_negate_c, s); | ||
337 | +} | ||
338 | + | ||
339 | +static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
340 | +{ | ||
341 | + return float64_muladd(d, b, a, float_muladd_negate_c, s); | ||
342 | +} | ||
343 | + | ||
344 | +RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) | ||
345 | +RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) | ||
346 | +RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) | ||
347 | +GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2, clearh) | ||
348 | +GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4, clearl) | ||
349 | +GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8, clearq) | ||
350 | +RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) | ||
351 | +RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) | ||
352 | +RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) | ||
353 | +GEN_VEXT_VF(vfmsub_vf_h, 2, 2, clearh) | ||
354 | +GEN_VEXT_VF(vfmsub_vf_w, 4, 4, clearl) | ||
355 | +GEN_VEXT_VF(vfmsub_vf_d, 8, 8, clearq) | ||
356 | + | ||
357 | +static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
358 | +{ | ||
359 | + return float16_muladd(d, b, a, float_muladd_negate_product, s); | ||
360 | +} | ||
361 | + | ||
362 | +static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) | ||
363 | +{ | ||
364 | + return float32_muladd(d, b, a, float_muladd_negate_product, s); | ||
365 | +} | ||
366 | + | ||
367 | +static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
368 | +{ | ||
369 | + return float64_muladd(d, b, a, float_muladd_negate_product, s); | ||
370 | +} | ||
371 | + | ||
372 | +RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) | ||
373 | +RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) | ||
374 | +RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) | ||
375 | +GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2, clearh) | ||
376 | +GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4, clearl) | ||
377 | +GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8, clearq) | ||
378 | +RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) | ||
379 | +RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) | ||
380 | +RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) | ||
381 | +GEN_VEXT_VF(vfnmsub_vf_h, 2, 2, clearh) | ||
382 | +GEN_VEXT_VF(vfnmsub_vf_w, 4, 4, clearl) | ||
383 | +GEN_VEXT_VF(vfnmsub_vf_d, 8, 8, clearq) | ||
384 | -- | ||
385 | 2.27.0 | ||
386 | |||
387 | diff view generated by jsdifflib |