1 | From: Alistair Francis <alistair.francis@wdc.com> | 1 | From: Alistair Francis <alistair.francis@wdc.com> |
---|---|---|---|
2 | 2 | ||
3 | The following changes since commit c5fbdd60cf1fb52f01bdfe342b6fa65d5343e1b1: | 3 | The following changes since commit 9cc1bf1ebca550f8d90f967ccd2b6d2e00e81387: |
4 | 4 | ||
5 | Merge tag 'qemu-sparc-20211121' of git://github.com/mcayland/qemu into staging (2021-11-21 14:12:25 +0100) | 5 | Merge tag 'pull-xen-20220609' of https://xenbits.xen.org/git-http/people/aperard/qemu-dm into staging (2022-06-09 08:25:17 -0700) |
6 | 6 | ||
7 | are available in the Git repository at: | 7 | are available in the Git repository at: |
8 | 8 | ||
9 | git@github.com:alistair23/qemu.git tags/pull-riscv-to-apply-20211122 | 9 | git@github.com:alistair23/qemu.git tags/pull-riscv-to-apply-20220610 |
10 | 10 | ||
11 | for you to fetch changes up to 526e7443027c71fe7b04c29df529e1f9f425f9e3: | 11 | for you to fetch changes up to 07314158f6aa4d2589520c194a7531b9364a8d54: |
12 | 12 | ||
13 | hw/misc/sifive_u_otp: Do not reset OTP content on hardware reset (2021-11-22 10:46:22 +1000) | 13 | target/riscv: trans_rvv: Avoid assert for RV32 and e64 (2022-06-10 09:42:12 +1000) |
14 | 14 | ||
15 | ---------------------------------------------------------------- | 15 | ---------------------------------------------------------------- |
16 | Seventh RISC-V PR for QEMU 6.2 | 16 | Fourth RISC-V PR for QEMU 7.1 |
17 | 17 | ||
18 | - Deprecate IF_NONE for SiFive OTP | 18 | * Update MAINTAINERS |
19 | - Don't reset SiFive OTP content | 19 | * Add support for Zmmul extension |
20 | * Fixup FDT errors when supplying device tree from the command line for virt machine | ||
21 | * Avoid overflowing the addr_config buffer in the SiFive PLIC | ||
22 | * Support -device loader addresses above 2GB | ||
23 | * Correctly wake from WFI on VS-level external interrupts | ||
24 | * Fixes for RV128 support | ||
25 | * Support Vector extension tail agnostic setting elements' bits to all 1s | ||
26 | * Don't expose the CPU properties on named CPUs | ||
27 | * Fix vector extension assert for RV32 | ||
20 | 28 | ||
21 | ---------------------------------------------------------------- | 29 | ---------------------------------------------------------------- |
22 | Philippe Mathieu-Daudé (1): | 30 | Alistair Francis (4): |
23 | hw/misc/sifive_u_otp: Do not reset OTP content on hardware reset | 31 | MAINTAINERS: Cover hw/core/uboot_image.h within Generic Loader section |
32 | hw/intc: sifive_plic: Avoid overflowing the addr_config buffer | ||
33 | target/riscv: Don't expose the CPU properties on names CPUs | ||
34 | target/riscv: trans_rvv: Avoid assert for RV32 and e64 | ||
24 | 35 | ||
25 | Thomas Huth (1): | 36 | Andrew Bresticker (1): |
26 | hw/misc/sifive_u_otp: Use IF_PFLASH for the OTP device instead of IF_NONE | 37 | target/riscv: Wake on VS-level external interrupts |
27 | 38 | ||
28 | docs/about/deprecated.rst | 6 ++++++ | 39 | Atish Patra (1): |
29 | hw/misc/sifive_u_otp.c | 22 +++++++++++++--------- | 40 | hw/riscv: virt: Generate fw_cfg DT node correctly |
30 | 2 files changed, 19 insertions(+), 9 deletions(-) | ||
31 | 41 | ||
42 | Frédéric Pétrot (1): | ||
43 | target/riscv/debug.c: keep experimental rv128 support working | ||
44 | |||
45 | Jamie Iles (1): | ||
46 | hw/core/loader: return image sizes as ssize_t | ||
47 | |||
48 | Weiwei Li (1): | ||
49 | target/riscv: add support for zmmul extension v0.1 | ||
50 | |||
51 | eopXD (16): | ||
52 | target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed | ||
53 | target/riscv: rvv: Prune redundant access_type parameter passed | ||
54 | target/riscv: rvv: Rename ambiguous esz | ||
55 | target/riscv: rvv: Early exit when vstart >= vl | ||
56 | target/riscv: rvv: Add tail agnostic for vv instructions | ||
57 | target/riscv: rvv: Add tail agnostic for vector load / store instructions | ||
58 | target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions | ||
59 | target/riscv: rvv: Add tail agnostic for vector integer shift instructions | ||
60 | target/riscv: rvv: Add tail agnostic for vector integer comparison instructions | ||
61 | target/riscv: rvv: Add tail agnostic for vector integer merge and move instructions | ||
62 | target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic instructions | ||
63 | target/riscv: rvv: Add tail agnostic for vector floating-point instructions | ||
64 | target/riscv: rvv: Add tail agnostic for vector reduction instructions | ||
65 | target/riscv: rvv: Add tail agnostic for vector mask instructions | ||
66 | target/riscv: rvv: Add tail agnostic for vector permutation instructions | ||
67 | target/riscv: rvv: Add option 'rvv_ta_all_1s' to enable optional tail agnostic behavior | ||
68 | |||
69 | include/hw/loader.h | 55 +- | ||
70 | target/riscv/cpu.h | 4 + | ||
71 | target/riscv/internals.h | 6 +- | ||
72 | hw/arm/armv7m.c | 2 +- | ||
73 | hw/arm/boot.c | 8 +- | ||
74 | hw/core/generic-loader.c | 2 +- | ||
75 | hw/core/loader.c | 81 +- | ||
76 | hw/i386/x86.c | 2 +- | ||
77 | hw/intc/sifive_plic.c | 19 +- | ||
78 | hw/riscv/boot.c | 5 +- | ||
79 | hw/riscv/virt.c | 28 +- | ||
80 | target/riscv/cpu.c | 68 +- | ||
81 | target/riscv/cpu_helper.c | 4 +- | ||
82 | target/riscv/debug.c | 2 + | ||
83 | target/riscv/translate.c | 4 + | ||
84 | target/riscv/vector_helper.c | 1588 +++++++++++++++++++------------ | ||
85 | target/riscv/insn_trans/trans_rvm.c.inc | 18 +- | ||
86 | target/riscv/insn_trans/trans_rvv.c.inc | 106 ++- | ||
87 | MAINTAINERS | 1 + | ||
88 | 19 files changed, 1244 insertions(+), 759 deletions(-) | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alistair Francis <alistair.francis@wdc.com> | ||
1 | 2 | ||
3 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
5 | Message-Id: <20220509091339.26016-1-alistair.francis@wdc.com> | ||
6 | --- | ||
7 | MAINTAINERS | 1 + | ||
8 | 1 file changed, 1 insertion(+) | ||
9 | |||
10 | diff --git a/MAINTAINERS b/MAINTAINERS | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/MAINTAINERS | ||
13 | +++ b/MAINTAINERS | ||
14 | @@ -XXX,XX +XXX,XX @@ Generic Loader | ||
15 | M: Alistair Francis <alistair@alistair23.me> | ||
16 | S: Maintained | ||
17 | F: hw/core/generic-loader.c | ||
18 | +F: hw/core/uboot_image.h | ||
19 | F: include/hw/core/generic-loader.h | ||
20 | F: docs/system/generic-loader.rst | ||
21 | |||
22 | -- | ||
23 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Weiwei Li <liweiwei@iscas.ac.cn> | ||
1 | 2 | ||
3 | Add support for the zmmul extension v0.1. This extension includes all | ||
4 | multiplication operations from the M extension but not the divide ops. | ||
5 | |||
6 | Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
7 | Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> | ||
8 | Reviewed-by: Víctor Colombo <victor.colombo@eldorado.org.br> | ||
9 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
10 | Message-Id: <20220531030732.3850-1-liweiwei@iscas.ac.cn> | ||
11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
12 | --- | ||
13 | target/riscv/cpu.h | 1 + | ||
14 | target/riscv/cpu.c | 7 +++++++ | ||
15 | target/riscv/insn_trans/trans_rvm.c.inc | 18 ++++++++++++------ | ||
16 | 3 files changed, 20 insertions(+), 6 deletions(-) | ||
17 | |||
18 | diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/riscv/cpu.h | ||
21 | +++ b/target/riscv/cpu.h | ||
22 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
23 | bool ext_zhinxmin; | ||
24 | bool ext_zve32f; | ||
25 | bool ext_zve64f; | ||
26 | + bool ext_zmmul; | ||
27 | |||
28 | uint32_t mvendorid; | ||
29 | uint64_t marchid; | ||
30 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/riscv/cpu.c | ||
33 | +++ b/target/riscv/cpu.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) | ||
35 | cpu->cfg.ext_ifencei = true; | ||
36 | } | ||
37 | |||
38 | + if (cpu->cfg.ext_m && cpu->cfg.ext_zmmul) { | ||
39 | + warn_report("Zmmul will override M"); | ||
40 | + cpu->cfg.ext_m = false; | ||
41 | + } | ||
42 | + | ||
43 | if (cpu->cfg.ext_i && cpu->cfg.ext_e) { | ||
44 | error_setg(errp, | ||
45 | "I and E extensions are incompatible"); | ||
46 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { | ||
47 | |||
48 | /* These are experimental so mark with 'x-' */ | ||
49 | DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false), | ||
50 | + DEFINE_PROP_BOOL("x-zmmul", RISCVCPU, cfg.ext_zmmul, false), | ||
51 | /* ePMP 0.9.3 */ | ||
52 | DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false), | ||
53 | DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false), | ||
54 | @@ -XXX,XX +XXX,XX @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int max_str_len) | ||
55 | struct isa_ext_data isa_edata_arr[] = { | ||
56 | ISA_EDATA_ENTRY(zicsr, ext_icsr), | ||
57 | ISA_EDATA_ENTRY(zifencei, ext_ifencei), | ||
58 | + ISA_EDATA_ENTRY(zmmul, ext_zmmul), | ||
59 | ISA_EDATA_ENTRY(zfh, ext_zfh), | ||
60 | ISA_EDATA_ENTRY(zfhmin, ext_zfhmin), | ||
61 | ISA_EDATA_ENTRY(zfinx, ext_zfinx), | ||
62 | diff --git a/target/riscv/insn_trans/trans_rvm.c.inc b/target/riscv/insn_trans/trans_rvm.c.inc | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/riscv/insn_trans/trans_rvm.c.inc | ||
65 | +++ b/target/riscv/insn_trans/trans_rvm.c.inc | ||
66 | @@ -XXX,XX +XXX,XX @@ | ||
67 | * this program. If not, see <http://www.gnu.org/licenses/>. | ||
68 | */ | ||
69 | |||
70 | +#define REQUIRE_M_OR_ZMMUL(ctx) do { \ | ||
71 | + if (!ctx->cfg_ptr->ext_zmmul && !has_ext(ctx, RVM)) { \ | ||
72 | + return false; \ | ||
73 | + } \ | ||
74 | +} while (0) | ||
75 | + | ||
76 | static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, TCGv bh) | ||
77 | { | ||
78 | TCGv tmpl = tcg_temp_new(); | ||
79 | @@ -XXX,XX +XXX,XX @@ static void gen_mul_i128(TCGv rl, TCGv rh, | ||
80 | |||
81 | static bool trans_mul(DisasContext *ctx, arg_mul *a) | ||
82 | { | ||
83 | - REQUIRE_EXT(ctx, RVM); | ||
84 | + REQUIRE_M_OR_ZMMUL(ctx); | ||
85 | return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128); | ||
86 | } | ||
87 | |||
88 | @@ -XXX,XX +XXX,XX @@ static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2) | ||
89 | |||
90 | static bool trans_mulh(DisasContext *ctx, arg_mulh *a) | ||
91 | { | ||
92 | - REQUIRE_EXT(ctx, RVM); | ||
93 | + REQUIRE_M_OR_ZMMUL(ctx); | ||
94 | return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w, | ||
95 | gen_mulh_i128); | ||
96 | } | ||
97 | @@ -XXX,XX +XXX,XX @@ static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2) | ||
98 | |||
99 | static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a) | ||
100 | { | ||
101 | - REQUIRE_EXT(ctx, RVM); | ||
102 | + REQUIRE_M_OR_ZMMUL(ctx); | ||
103 | return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w, | ||
104 | gen_mulhsu_i128); | ||
105 | } | ||
106 | @@ -XXX,XX +XXX,XX @@ static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2) | ||
107 | |||
108 | static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a) | ||
109 | { | ||
110 | - REQUIRE_EXT(ctx, RVM); | ||
111 | + REQUIRE_M_OR_ZMMUL(ctx); | ||
112 | /* gen_mulh_w works for either sign as input. */ | ||
113 | return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w, | ||
114 | gen_mulhu_i128); | ||
115 | @@ -XXX,XX +XXX,XX @@ static bool trans_remu(DisasContext *ctx, arg_remu *a) | ||
116 | static bool trans_mulw(DisasContext *ctx, arg_mulw *a) | ||
117 | { | ||
118 | REQUIRE_64_OR_128BIT(ctx); | ||
119 | - REQUIRE_EXT(ctx, RVM); | ||
120 | + REQUIRE_M_OR_ZMMUL(ctx); | ||
121 | ctx->ol = MXL_RV32; | ||
122 | return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL); | ||
123 | } | ||
124 | @@ -XXX,XX +XXX,XX @@ static bool trans_remuw(DisasContext *ctx, arg_remuw *a) | ||
125 | static bool trans_muld(DisasContext *ctx, arg_muld *a) | ||
126 | { | ||
127 | REQUIRE_128BIT(ctx); | ||
128 | - REQUIRE_EXT(ctx, RVM); | ||
129 | + REQUIRE_M_OR_ZMMUL(ctx); | ||
130 | ctx->ol = MXL_RV64; | ||
131 | return gen_arith(ctx, a, EXT_SIGN, tcg_gen_mul_tl, NULL); | ||
132 | } | ||
133 | -- | ||
134 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Atish Patra <atishp@rivosinc.com> | ||
1 | 2 | ||
3 | fw_cfg DT node is generated after the create_fdt without any check | ||
4 | if the DT is being loaded from the commandline. This results in | ||
5 | FDT_ERR_EXISTS error if dtb is loaded from the commandline. | ||
6 | |||
7 | Generate fw_cfg node only if the DT is not loaded from the commandline. | ||
8 | |||
9 | Signed-off-by: Atish Patra <atishp@rivosinc.com> | ||
10 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
11 | Message-Id: <20220526203500.847165-1-atishp@rivosinc.com> | ||
12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | --- | ||
14 | hw/riscv/virt.c | 28 ++++++++++++++++++---------- | ||
15 | 1 file changed, 18 insertions(+), 10 deletions(-) | ||
16 | |||
17 | diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/hw/riscv/virt.c | ||
20 | +++ b/hw/riscv/virt.c | ||
21 | @@ -XXX,XX +XXX,XX @@ static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap) | ||
22 | g_free(name); | ||
23 | } | ||
24 | |||
25 | +static void create_fdt_fw_cfg(RISCVVirtState *s, const MemMapEntry *memmap) | ||
26 | +{ | ||
27 | + char *nodename; | ||
28 | + MachineState *mc = MACHINE(s); | ||
29 | + hwaddr base = memmap[VIRT_FW_CFG].base; | ||
30 | + hwaddr size = memmap[VIRT_FW_CFG].size; | ||
31 | + | ||
32 | + nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); | ||
33 | + qemu_fdt_add_subnode(mc->fdt, nodename); | ||
34 | + qemu_fdt_setprop_string(mc->fdt, nodename, | ||
35 | + "compatible", "qemu,fw-cfg-mmio"); | ||
36 | + qemu_fdt_setprop_sized_cells(mc->fdt, nodename, "reg", | ||
37 | + 2, base, 2, size); | ||
38 | + qemu_fdt_setprop(mc->fdt, nodename, "dma-coherent", NULL, 0); | ||
39 | + g_free(nodename); | ||
40 | +} | ||
41 | + | ||
42 | static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, | ||
43 | uint64_t mem_size, const char *cmdline, bool is_32_bit) | ||
44 | { | ||
45 | @@ -XXX,XX +XXX,XX @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, | ||
46 | create_fdt_rtc(s, memmap, irq_mmio_phandle); | ||
47 | |||
48 | create_fdt_flash(s, memmap); | ||
49 | + create_fdt_fw_cfg(s, memmap); | ||
50 | |||
51 | update_bootargs: | ||
52 | if (cmdline && *cmdline) { | ||
53 | @@ -XXX,XX +XXX,XX @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, | ||
54 | static FWCfgState *create_fw_cfg(const MachineState *mc) | ||
55 | { | ||
56 | hwaddr base = virt_memmap[VIRT_FW_CFG].base; | ||
57 | - hwaddr size = virt_memmap[VIRT_FW_CFG].size; | ||
58 | FWCfgState *fw_cfg; | ||
59 | - char *nodename; | ||
60 | |||
61 | fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, | ||
62 | &address_space_memory); | ||
63 | fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)mc->smp.cpus); | ||
64 | |||
65 | - nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); | ||
66 | - qemu_fdt_add_subnode(mc->fdt, nodename); | ||
67 | - qemu_fdt_setprop_string(mc->fdt, nodename, | ||
68 | - "compatible", "qemu,fw-cfg-mmio"); | ||
69 | - qemu_fdt_setprop_sized_cells(mc->fdt, nodename, "reg", | ||
70 | - 2, base, 2, size); | ||
71 | - qemu_fdt_setprop(mc->fdt, nodename, "dma-coherent", NULL, 0); | ||
72 | - g_free(nodename); | ||
73 | return fw_cfg; | ||
74 | } | ||
75 | |||
76 | -- | ||
77 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alistair Francis <alistair.francis@wdc.com> | ||
1 | 2 | ||
3 | Since commit ad40be27 "target/riscv: Support start kernel directly by | ||
4 | KVM" we have been overflowing the addr_config on "M,MS..." | ||
5 | configurations, as reported https://gitlab.com/qemu-project/qemu/-/issues/1050. | ||
6 | |||
7 | This commit changes the loop in sifive_plic_create() from iterating over | ||
8 | the number of harts to just iterating over the addr_config. The | ||
9 | addr_config is based on the hart_config, and will contain interrup details | ||
10 | for all harts. This way we can't iterate past the end of addr_config. | ||
11 | |||
12 | Fixes: ad40be27084536 ("target/riscv: Support start kernel directly by KVM") | ||
13 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1050 | ||
14 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
15 | Reviewed-by: Mingwang Li <limingwang@huawei.com> | ||
16 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
17 | Message-Id: <20220601013631.196854-1-alistair.francis@opensource.wdc.com> | ||
18 | --- | ||
19 | hw/intc/sifive_plic.c | 19 +++++++++---------- | ||
20 | 1 file changed, 9 insertions(+), 10 deletions(-) | ||
21 | |||
22 | diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/hw/intc/sifive_plic.c | ||
25 | +++ b/hw/intc/sifive_plic.c | ||
26 | @@ -XXX,XX +XXX,XX @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, | ||
27 | uint32_t context_stride, uint32_t aperture_size) | ||
28 | { | ||
29 | DeviceState *dev = qdev_new(TYPE_SIFIVE_PLIC); | ||
30 | - int i, j = 0; | ||
31 | + int i; | ||
32 | SiFivePLICState *plic; | ||
33 | |||
34 | assert(enable_stride == (enable_stride & -enable_stride)); | ||
35 | @@ -XXX,XX +XXX,XX @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, | ||
36 | sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); | ||
37 | |||
38 | plic = SIFIVE_PLIC(dev); | ||
39 | - for (i = 0; i < num_harts; i++) { | ||
40 | - CPUState *cpu = qemu_get_cpu(hartid_base + i); | ||
41 | |||
42 | - if (plic->addr_config[j].mode == PLICMode_M) { | ||
43 | - j++; | ||
44 | - qdev_connect_gpio_out(dev, num_harts + i, | ||
45 | + for (i = 0; i < plic->num_addrs; i++) { | ||
46 | + int cpu_num = plic->addr_config[i].hartid; | ||
47 | + CPUState *cpu = qemu_get_cpu(hartid_base + cpu_num); | ||
48 | + | ||
49 | + if (plic->addr_config[i].mode == PLICMode_M) { | ||
50 | + qdev_connect_gpio_out(dev, num_harts + cpu_num, | ||
51 | qdev_get_gpio_in(DEVICE(cpu), IRQ_M_EXT)); | ||
52 | } | ||
53 | - | ||
54 | - if (plic->addr_config[j].mode == PLICMode_S) { | ||
55 | - j++; | ||
56 | - qdev_connect_gpio_out(dev, i, | ||
57 | + if (plic->addr_config[i].mode == PLICMode_S) { | ||
58 | + qdev_connect_gpio_out(dev, cpu_num, | ||
59 | qdev_get_gpio_in(DEVICE(cpu), IRQ_S_EXT)); | ||
60 | } | ||
61 | } | ||
62 | -- | ||
63 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Jamie Iles <jamie@nuviainc.com> | ||
1 | 2 | ||
3 | Various loader functions return an int which limits images to 2GB which | ||
4 | is fine for things like a BIOS/kernel image, but if we want to be able | ||
5 | to load memory images or large ramdisks then any file over 2GB would | ||
6 | silently fail to load. | ||
7 | |||
8 | Cc: Luc Michel <lmichel@kalray.eu> | ||
9 | Signed-off-by: Jamie Iles <jamie@nuviainc.com> | ||
10 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
11 | Reviewed-by: Luc Michel <lmichel@kalray.eu> | ||
12 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | Message-Id: <20211111141141.3295094-2-jamie@nuviainc.com> | ||
14 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
15 | --- | ||
16 | include/hw/loader.h | 55 +++++++++++++-------------- | ||
17 | hw/arm/armv7m.c | 2 +- | ||
18 | hw/arm/boot.c | 8 ++-- | ||
19 | hw/core/generic-loader.c | 2 +- | ||
20 | hw/core/loader.c | 81 +++++++++++++++++++++------------------- | ||
21 | hw/i386/x86.c | 2 +- | ||
22 | hw/riscv/boot.c | 5 ++- | ||
23 | 7 files changed, 80 insertions(+), 75 deletions(-) | ||
24 | |||
25 | diff --git a/include/hw/loader.h b/include/hw/loader.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/include/hw/loader.h | ||
28 | +++ b/include/hw/loader.h | ||
29 | @@ -XXX,XX +XXX,XX @@ ssize_t load_image_size(const char *filename, void *addr, size_t size); | ||
30 | * | ||
31 | * Returns the size of the loaded image on success, -1 otherwise. | ||
32 | */ | ||
33 | -int load_image_targphys_as(const char *filename, | ||
34 | - hwaddr addr, uint64_t max_sz, AddressSpace *as); | ||
35 | +ssize_t load_image_targphys_as(const char *filename, | ||
36 | + hwaddr addr, uint64_t max_sz, AddressSpace *as); | ||
37 | |||
38 | /**load_targphys_hex_as: | ||
39 | * @filename: Path to the .hex file | ||
40 | @@ -XXX,XX +XXX,XX @@ int load_image_targphys_as(const char *filename, | ||
41 | * | ||
42 | * Returns the size of the loaded .hex file on success, -1 otherwise. | ||
43 | */ | ||
44 | -int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as); | ||
45 | +ssize_t load_targphys_hex_as(const char *filename, hwaddr *entry, | ||
46 | + AddressSpace *as); | ||
47 | |||
48 | /** load_image_targphys: | ||
49 | * Same as load_image_targphys_as(), but doesn't allow the caller to specify | ||
50 | * an AddressSpace. | ||
51 | */ | ||
52 | -int load_image_targphys(const char *filename, hwaddr, | ||
53 | - uint64_t max_sz); | ||
54 | +ssize_t load_image_targphys(const char *filename, hwaddr, | ||
55 | + uint64_t max_sz); | ||
56 | |||
57 | /** | ||
58 | * load_image_mr: load an image into a memory region | ||
59 | @@ -XXX,XX +XXX,XX @@ int load_image_targphys(const char *filename, hwaddr, | ||
60 | * If the file is larger than the memory region's size the call will fail. | ||
61 | * Returns -1 on failure, or the size of the file. | ||
62 | */ | ||
63 | -int load_image_mr(const char *filename, MemoryRegion *mr); | ||
64 | +ssize_t load_image_mr(const char *filename, MemoryRegion *mr); | ||
65 | |||
66 | /* This is the limit on the maximum uncompressed image size that | ||
67 | * load_image_gzipped_buffer() and load_image_gzipped() will read. It prevents | ||
68 | @@ -XXX,XX +XXX,XX @@ int load_image_mr(const char *filename, MemoryRegion *mr); | ||
69 | */ | ||
70 | #define LOAD_IMAGE_MAX_GUNZIP_BYTES (256 << 20) | ||
71 | |||
72 | -int load_image_gzipped_buffer(const char *filename, uint64_t max_sz, | ||
73 | - uint8_t **buffer); | ||
74 | -int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); | ||
75 | +ssize_t load_image_gzipped_buffer(const char *filename, uint64_t max_sz, | ||
76 | + uint8_t **buffer); | ||
77 | +ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); | ||
78 | |||
79 | #define ELF_LOAD_FAILED -1 | ||
80 | #define ELF_LOAD_NOT_ELF -2 | ||
81 | @@ -XXX,XX +XXX,XX @@ ssize_t load_elf(const char *filename, | ||
82 | */ | ||
83 | void load_elf_hdr(const char *filename, void *hdr, bool *is64, Error **errp); | ||
84 | |||
85 | -int load_aout(const char *filename, hwaddr addr, int max_sz, | ||
86 | - int bswap_needed, hwaddr target_page_size); | ||
87 | +ssize_t load_aout(const char *filename, hwaddr addr, int max_sz, | ||
88 | + int bswap_needed, hwaddr target_page_size); | ||
89 | |||
90 | #define LOAD_UIMAGE_LOADADDR_INVALID (-1) | ||
91 | |||
92 | @@ -XXX,XX +XXX,XX @@ int load_aout(const char *filename, hwaddr addr, int max_sz, | ||
93 | * | ||
94 | * Returns the size of the loaded image on success, -1 otherwise. | ||
95 | */ | ||
96 | -int load_uimage_as(const char *filename, hwaddr *ep, | ||
97 | - hwaddr *loadaddr, int *is_linux, | ||
98 | - uint64_t (*translate_fn)(void *, uint64_t), | ||
99 | - void *translate_opaque, AddressSpace *as); | ||
100 | +ssize_t load_uimage_as(const char *filename, hwaddr *ep, | ||
101 | + hwaddr *loadaddr, int *is_linux, | ||
102 | + uint64_t (*translate_fn)(void *, uint64_t), | ||
103 | + void *translate_opaque, AddressSpace *as); | ||
104 | |||
105 | /** load_uimage: | ||
106 | * Same as load_uimage_as(), but doesn't allow the caller to specify an | ||
107 | * AddressSpace. | ||
108 | */ | ||
109 | -int load_uimage(const char *filename, hwaddr *ep, | ||
110 | - hwaddr *loadaddr, int *is_linux, | ||
111 | - uint64_t (*translate_fn)(void *, uint64_t), | ||
112 | - void *translate_opaque); | ||
113 | +ssize_t load_uimage(const char *filename, hwaddr *ep, | ||
114 | + hwaddr *loadaddr, int *is_linux, | ||
115 | + uint64_t (*translate_fn)(void *, uint64_t), | ||
116 | + void *translate_opaque); | ||
117 | |||
118 | /** | ||
119 | * load_ramdisk_as: | ||
120 | @@ -XXX,XX +XXX,XX @@ int load_uimage(const char *filename, hwaddr *ep, | ||
121 | * | ||
122 | * Returns the size of the loaded image on success, -1 otherwise. | ||
123 | */ | ||
124 | -int load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz, | ||
125 | - AddressSpace *as); | ||
126 | +ssize_t load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz, | ||
127 | + AddressSpace *as); | ||
128 | |||
129 | /** | ||
130 | * load_ramdisk: | ||
131 | * Same as load_ramdisk_as(), but doesn't allow the caller to specify | ||
132 | * an AddressSpace. | ||
133 | */ | ||
134 | -int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz); | ||
135 | +ssize_t load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz); | ||
136 | |||
137 | ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen); | ||
138 | |||
139 | @@ -XXX,XX +XXX,XX @@ void pstrcpy_targphys(const char *name, | ||
140 | extern bool option_rom_has_mr; | ||
141 | extern bool rom_file_has_mr; | ||
142 | |||
143 | -int rom_add_file(const char *file, const char *fw_dir, | ||
144 | - hwaddr addr, int32_t bootindex, | ||
145 | - bool option_rom, MemoryRegion *mr, AddressSpace *as); | ||
146 | +ssize_t rom_add_file(const char *file, const char *fw_dir, | ||
147 | + hwaddr addr, int32_t bootindex, | ||
148 | + bool option_rom, MemoryRegion *mr, AddressSpace *as); | ||
149 | MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len, | ||
150 | size_t max_len, hwaddr addr, | ||
151 | const char *fw_file_name, | ||
152 | @@ -XXX,XX +XXX,XX @@ void hmp_info_roms(Monitor *mon, const QDict *qdict); | ||
153 | #define rom_add_blob_fixed_as(_f, _b, _l, _a, _as) \ | ||
154 | rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, _as, true) | ||
155 | |||
156 | -int rom_add_vga(const char *file); | ||
157 | -int rom_add_option(const char *file, int32_t bootindex); | ||
158 | +ssize_t rom_add_vga(const char *file); | ||
159 | +ssize_t rom_add_option(const char *file, int32_t bootindex); | ||
160 | |||
161 | /* This is the usual maximum in uboot, so if a uImage overflows this, it would | ||
162 | * overflow on real hardware too. */ | ||
163 | diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c | ||
164 | index XXXXXXX..XXXXXXX 100644 | ||
165 | --- a/hw/arm/armv7m.c | ||
166 | +++ b/hw/arm/armv7m.c | ||
167 | @@ -XXX,XX +XXX,XX @@ static void armv7m_reset(void *opaque) | ||
168 | |||
169 | void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size) | ||
170 | { | ||
171 | - int image_size; | ||
172 | + ssize_t image_size; | ||
173 | uint64_t entry; | ||
174 | int big_endian; | ||
175 | AddressSpace *as; | ||
176 | diff --git a/hw/arm/boot.c b/hw/arm/boot.c | ||
177 | index XXXXXXX..XXXXXXX 100644 | ||
178 | --- a/hw/arm/boot.c | ||
179 | +++ b/hw/arm/boot.c | ||
180 | @@ -XXX,XX +XXX,XX @@ static int do_arm_linux_init(Object *obj, void *opaque) | ||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | -static int64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry, | ||
185 | +static ssize_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry, | ||
186 | uint64_t *lowaddr, uint64_t *highaddr, | ||
187 | int elf_machine, AddressSpace *as) | ||
188 | { | ||
189 | @@ -XXX,XX +XXX,XX @@ static int64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry, | ||
190 | } elf_header; | ||
191 | int data_swab = 0; | ||
192 | bool big_endian; | ||
193 | - int64_t ret = -1; | ||
194 | + ssize_t ret = -1; | ||
195 | Error *err = NULL; | ||
196 | |||
197 | |||
198 | @@ -XXX,XX +XXX,XX @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, | ||
199 | /* Set up for a direct boot of a kernel image file. */ | ||
200 | CPUState *cs; | ||
201 | AddressSpace *as = arm_boot_address_space(cpu, info); | ||
202 | - int kernel_size; | ||
203 | + ssize_t kernel_size; | ||
204 | int initrd_size; | ||
205 | int is_linux = 0; | ||
206 | uint64_t elf_entry; | ||
207 | @@ -XXX,XX +XXX,XX @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, | ||
208 | |||
209 | if (kernel_size > info->ram_size) { | ||
210 | error_report("kernel '%s' is too large to fit in RAM " | ||
211 | - "(kernel size %d, RAM size %" PRId64 ")", | ||
212 | + "(kernel size %zd, RAM size %" PRId64 ")", | ||
213 | info->kernel_filename, kernel_size, info->ram_size); | ||
214 | exit(1); | ||
215 | } | ||
216 | diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c | ||
217 | index XXXXXXX..XXXXXXX 100644 | ||
218 | --- a/hw/core/generic-loader.c | ||
219 | +++ b/hw/core/generic-loader.c | ||
220 | @@ -XXX,XX +XXX,XX @@ static void generic_loader_realize(DeviceState *dev, Error **errp) | ||
221 | GenericLoaderState *s = GENERIC_LOADER(dev); | ||
222 | hwaddr entry; | ||
223 | int big_endian; | ||
224 | - int size = 0; | ||
225 | + ssize_t size = 0; | ||
226 | |||
227 | s->set_pc = false; | ||
228 | |||
229 | diff --git a/hw/core/loader.c b/hw/core/loader.c | ||
230 | index XXXXXXX..XXXXXXX 100644 | ||
231 | --- a/hw/core/loader.c | ||
232 | +++ b/hw/core/loader.c | ||
233 | @@ -XXX,XX +XXX,XX @@ ssize_t read_targphys(const char *name, | ||
234 | return did; | ||
235 | } | ||
236 | |||
237 | -int load_image_targphys(const char *filename, | ||
238 | - hwaddr addr, uint64_t max_sz) | ||
239 | +ssize_t load_image_targphys(const char *filename, | ||
240 | + hwaddr addr, uint64_t max_sz) | ||
241 | { | ||
242 | return load_image_targphys_as(filename, addr, max_sz, NULL); | ||
243 | } | ||
244 | |||
245 | /* return the size or -1 if error */ | ||
246 | -int load_image_targphys_as(const char *filename, | ||
247 | - hwaddr addr, uint64_t max_sz, AddressSpace *as) | ||
248 | +ssize_t load_image_targphys_as(const char *filename, | ||
249 | + hwaddr addr, uint64_t max_sz, AddressSpace *as) | ||
250 | { | ||
251 | - int size; | ||
252 | + ssize_t size; | ||
253 | |||
254 | size = get_image_size(filename); | ||
255 | if (size < 0 || size > max_sz) { | ||
256 | @@ -XXX,XX +XXX,XX @@ int load_image_targphys_as(const char *filename, | ||
257 | return size; | ||
258 | } | ||
259 | |||
260 | -int load_image_mr(const char *filename, MemoryRegion *mr) | ||
261 | +ssize_t load_image_mr(const char *filename, MemoryRegion *mr) | ||
262 | { | ||
263 | - int size; | ||
264 | + ssize_t size; | ||
265 | |||
266 | if (!memory_access_is_direct(mr, false)) { | ||
267 | /* Can only load an image into RAM or ROM */ | ||
268 | @@ -XXX,XX +XXX,XX @@ static void bswap_ahdr(struct exec *e) | ||
269 | : (_N_SEGMENT_ROUND (_N_TXTENDADDR(x, target_page_size), target_page_size))) | ||
270 | |||
271 | |||
272 | -int load_aout(const char *filename, hwaddr addr, int max_sz, | ||
273 | - int bswap_needed, hwaddr target_page_size) | ||
274 | +ssize_t load_aout(const char *filename, hwaddr addr, int max_sz, | ||
275 | + int bswap_needed, hwaddr target_page_size) | ||
276 | { | ||
277 | int fd; | ||
278 | ssize_t size, ret; | ||
279 | @@ -XXX,XX +XXX,XX @@ toosmall: | ||
280 | } | ||
281 | |||
282 | /* Load a U-Boot image. */ | ||
283 | -static int load_uboot_image(const char *filename, hwaddr *ep, hwaddr *loadaddr, | ||
284 | - int *is_linux, uint8_t image_type, | ||
285 | - uint64_t (*translate_fn)(void *, uint64_t), | ||
286 | - void *translate_opaque, AddressSpace *as) | ||
287 | +static ssize_t load_uboot_image(const char *filename, hwaddr *ep, | ||
288 | + hwaddr *loadaddr, int *is_linux, | ||
289 | + uint8_t image_type, | ||
290 | + uint64_t (*translate_fn)(void *, uint64_t), | ||
291 | + void *translate_opaque, AddressSpace *as) | ||
292 | { | ||
293 | int fd; | ||
294 | - int size; | ||
295 | + ssize_t size; | ||
296 | hwaddr address; | ||
297 | uboot_image_header_t h; | ||
298 | uboot_image_header_t *hdr = &h; | ||
299 | @@ -XXX,XX +XXX,XX @@ out: | ||
300 | return ret; | ||
301 | } | ||
302 | |||
303 | -int load_uimage(const char *filename, hwaddr *ep, hwaddr *loadaddr, | ||
304 | - int *is_linux, | ||
305 | - uint64_t (*translate_fn)(void *, uint64_t), | ||
306 | - void *translate_opaque) | ||
307 | +ssize_t load_uimage(const char *filename, hwaddr *ep, hwaddr *loadaddr, | ||
308 | + int *is_linux, | ||
309 | + uint64_t (*translate_fn)(void *, uint64_t), | ||
310 | + void *translate_opaque) | ||
311 | { | ||
312 | return load_uboot_image(filename, ep, loadaddr, is_linux, IH_TYPE_KERNEL, | ||
313 | translate_fn, translate_opaque, NULL); | ||
314 | } | ||
315 | |||
316 | -int load_uimage_as(const char *filename, hwaddr *ep, hwaddr *loadaddr, | ||
317 | - int *is_linux, | ||
318 | - uint64_t (*translate_fn)(void *, uint64_t), | ||
319 | - void *translate_opaque, AddressSpace *as) | ||
320 | +ssize_t load_uimage_as(const char *filename, hwaddr *ep, hwaddr *loadaddr, | ||
321 | + int *is_linux, | ||
322 | + uint64_t (*translate_fn)(void *, uint64_t), | ||
323 | + void *translate_opaque, AddressSpace *as) | ||
324 | { | ||
325 | return load_uboot_image(filename, ep, loadaddr, is_linux, IH_TYPE_KERNEL, | ||
326 | translate_fn, translate_opaque, as); | ||
327 | } | ||
328 | |||
329 | /* Load a ramdisk. */ | ||
330 | -int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz) | ||
331 | +ssize_t load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz) | ||
332 | { | ||
333 | return load_ramdisk_as(filename, addr, max_sz, NULL); | ||
334 | } | ||
335 | |||
336 | -int load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz, | ||
337 | - AddressSpace *as) | ||
338 | +ssize_t load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz, | ||
339 | + AddressSpace *as) | ||
340 | { | ||
341 | return load_uboot_image(filename, NULL, &addr, NULL, IH_TYPE_RAMDISK, | ||
342 | NULL, NULL, as); | ||
343 | } | ||
344 | |||
345 | /* Load a gzip-compressed kernel to a dynamically allocated buffer. */ | ||
346 | -int load_image_gzipped_buffer(const char *filename, uint64_t max_sz, | ||
347 | - uint8_t **buffer) | ||
348 | +ssize_t load_image_gzipped_buffer(const char *filename, uint64_t max_sz, | ||
349 | + uint8_t **buffer) | ||
350 | { | ||
351 | uint8_t *compressed_data = NULL; | ||
352 | uint8_t *data = NULL; | ||
353 | @@ -XXX,XX +XXX,XX @@ int load_image_gzipped_buffer(const char *filename, uint64_t max_sz, | ||
354 | } | ||
355 | |||
356 | /* Load a gzip-compressed kernel. */ | ||
357 | -int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz) | ||
358 | +ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz) | ||
359 | { | ||
360 | - int bytes; | ||
361 | + ssize_t bytes; | ||
362 | uint8_t *data; | ||
363 | |||
364 | bytes = load_image_gzipped_buffer(filename, max_sz, &data); | ||
365 | @@ -XXX,XX +XXX,XX @@ static void *rom_set_mr(Rom *rom, Object *owner, const char *name, bool ro) | ||
366 | return data; | ||
367 | } | ||
368 | |||
369 | -int rom_add_file(const char *file, const char *fw_dir, | ||
370 | - hwaddr addr, int32_t bootindex, | ||
371 | - bool option_rom, MemoryRegion *mr, | ||
372 | - AddressSpace *as) | ||
373 | +ssize_t rom_add_file(const char *file, const char *fw_dir, | ||
374 | + hwaddr addr, int32_t bootindex, | ||
375 | + bool option_rom, MemoryRegion *mr, | ||
376 | + AddressSpace *as) | ||
377 | { | ||
378 | MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); | ||
379 | Rom *rom; | ||
380 | - int rc, fd = -1; | ||
381 | + ssize_t rc; | ||
382 | + int fd = -1; | ||
383 | char devpath[100]; | ||
384 | |||
385 | if (as && mr) { | ||
386 | @@ -XXX,XX +XXX,XX @@ int rom_add_file(const char *file, const char *fw_dir, | ||
387 | lseek(fd, 0, SEEK_SET); | ||
388 | rc = read(fd, rom->data, rom->datasize); | ||
389 | if (rc != rom->datasize) { | ||
390 | - fprintf(stderr, "rom: file %-20s: read error: rc=%d (expected %zd)\n", | ||
391 | + fprintf(stderr, "rom: file %-20s: read error: rc=%zd (expected %zd)\n", | ||
392 | rom->name, rc, rom->datasize); | ||
393 | goto err; | ||
394 | } | ||
395 | @@ -XXX,XX +XXX,XX @@ int rom_add_elf_program(const char *name, GMappedFile *mapped_file, void *data, | ||
396 | return 0; | ||
397 | } | ||
398 | |||
399 | -int rom_add_vga(const char *file) | ||
400 | +ssize_t rom_add_vga(const char *file) | ||
401 | { | ||
402 | return rom_add_file(file, "vgaroms", 0, -1, true, NULL, NULL); | ||
403 | } | ||
404 | |||
405 | -int rom_add_option(const char *file, int32_t bootindex) | ||
406 | +ssize_t rom_add_option(const char *file, int32_t bootindex) | ||
407 | { | ||
408 | return rom_add_file(file, "genroms", 0, bootindex, true, NULL, NULL); | ||
409 | } | ||
410 | @@ -XXX,XX +XXX,XX @@ out: | ||
411 | } | ||
412 | |||
413 | /* return size or -1 if error */ | ||
414 | -int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as) | ||
415 | +ssize_t load_targphys_hex_as(const char *filename, hwaddr *entry, | ||
416 | + AddressSpace *as) | ||
417 | { | ||
418 | gsize hex_blob_size; | ||
419 | gchar *hex_blob; | ||
420 | - int total_size = 0; | ||
421 | + ssize_t total_size = 0; | ||
422 | |||
423 | if (!g_file_get_contents(filename, &hex_blob, &hex_blob_size, NULL)) { | ||
424 | return -1; | ||
425 | diff --git a/hw/i386/x86.c b/hw/i386/x86.c | ||
426 | index XXXXXXX..XXXXXXX 100644 | ||
427 | --- a/hw/i386/x86.c | ||
428 | +++ b/hw/i386/x86.c | ||
429 | @@ -XXX,XX +XXX,XX @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, | ||
430 | char *filename; | ||
431 | MemoryRegion *bios, *isa_bios; | ||
432 | int bios_size, isa_bios_size; | ||
433 | - int ret; | ||
434 | + ssize_t ret; | ||
435 | |||
436 | /* BIOS load */ | ||
437 | bios_name = ms->firmware ?: default_firmware; | ||
438 | diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c | ||
439 | index XXXXXXX..XXXXXXX 100644 | ||
440 | --- a/hw/riscv/boot.c | ||
441 | +++ b/hw/riscv/boot.c | ||
442 | @@ -XXX,XX +XXX,XX @@ target_ulong riscv_load_firmware(const char *firmware_filename, | ||
443 | hwaddr firmware_load_addr, | ||
444 | symbol_fn_t sym_cb) | ||
445 | { | ||
446 | - uint64_t firmware_entry, firmware_size, firmware_end; | ||
447 | + uint64_t firmware_entry, firmware_end; | ||
448 | + ssize_t firmware_size; | ||
449 | |||
450 | if (load_elf_ram_sym(firmware_filename, NULL, NULL, NULL, | ||
451 | &firmware_entry, NULL, &firmware_end, NULL, | ||
452 | @@ -XXX,XX +XXX,XX @@ target_ulong riscv_load_kernel(const char *kernel_filename, | ||
453 | hwaddr riscv_load_initrd(const char *filename, uint64_t mem_size, | ||
454 | uint64_t kernel_entry, hwaddr *start) | ||
455 | { | ||
456 | - int size; | ||
457 | + ssize_t size; | ||
458 | |||
459 | /* | ||
460 | * We want to put the initrd far enough into RAM that when the | ||
461 | -- | ||
462 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Andrew Bresticker <abrestic@rivosinc.com> | ||
1 | 2 | ||
3 | Whether or not VSEIP is pending isn't reflected in env->mip and must | ||
4 | instead be determined from hstatus.vgein and hgeip. As a result a | ||
5 | CPU in WFI won't wake on a VSEIP, which violates the WFI behavior as | ||
6 | specified in the privileged ISA. Just use riscv_cpu_all_pending() | ||
7 | instead, which already accounts for VSEIP. | ||
8 | |||
9 | Signed-off-by: Andrew Bresticker <abrestic@rivosinc.com> | ||
10 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
11 | Message-Id: <20220531210544.181322-1-abrestic@rivosinc.com> | ||
12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | --- | ||
14 | target/riscv/cpu.h | 1 + | ||
15 | target/riscv/cpu.c | 2 +- | ||
16 | target/riscv/cpu_helper.c | 2 +- | ||
17 | 3 files changed, 3 insertions(+), 2 deletions(-) | ||
18 | |||
19 | diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/riscv/cpu.h | ||
22 | +++ b/target/riscv/cpu.h | ||
23 | @@ -XXX,XX +XXX,XX @@ int riscv_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); | ||
24 | int riscv_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); | ||
25 | int riscv_cpu_hviprio_index2irq(int index, int *out_irq, int *out_rdzero); | ||
26 | uint8_t riscv_cpu_default_priority(int irq); | ||
27 | +uint64_t riscv_cpu_all_pending(CPURISCVState *env); | ||
28 | int riscv_cpu_mirq_pending(CPURISCVState *env); | ||
29 | int riscv_cpu_sirq_pending(CPURISCVState *env); | ||
30 | int riscv_cpu_vsirq_pending(CPURISCVState *env); | ||
31 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/target/riscv/cpu.c | ||
34 | +++ b/target/riscv/cpu.c | ||
35 | @@ -XXX,XX +XXX,XX @@ static bool riscv_cpu_has_work(CPUState *cs) | ||
36 | * Definition of the WFI instruction requires it to ignore the privilege | ||
37 | * mode and delegation registers, but respect individual enables | ||
38 | */ | ||
39 | - return (env->mip & env->mie) != 0; | ||
40 | + return riscv_cpu_all_pending(env) != 0; | ||
41 | #else | ||
42 | return true; | ||
43 | #endif | ||
44 | diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/target/riscv/cpu_helper.c | ||
47 | +++ b/target/riscv/cpu_helper.c | ||
48 | @@ -XXX,XX +XXX,XX @@ static int riscv_cpu_pending_to_irq(CPURISCVState *env, | ||
49 | return best_irq; | ||
50 | } | ||
51 | |||
52 | -static uint64_t riscv_cpu_all_pending(CPURISCVState *env) | ||
53 | +uint64_t riscv_cpu_all_pending(CPURISCVState *env) | ||
54 | { | ||
55 | uint32_t gein = get_field(env->hstatus, HSTATUS_VGEIN); | ||
56 | uint64_t vsgein = (env->hgeip & (1ULL << gein)) ? MIP_VSEIP : 0; | ||
57 | -- | ||
58 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr> | ||
1 | 2 | ||
3 | Add an MXL_RV128 case in two switches so that no error is triggered when | ||
4 | using the -cpu x-rv128 option. | ||
5 | |||
6 | Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr> | ||
7 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
8 | Reviewed-by: Bin Meng <bmeng.cn@gmail.com> | ||
9 | Message-Id: <20220602155246.38837-1-frederic.petrot@univ-grenoble-alpes.fr> | ||
10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
11 | --- | ||
12 | target/riscv/debug.c | 2 ++ | ||
13 | 1 file changed, 2 insertions(+) | ||
14 | |||
15 | diff --git a/target/riscv/debug.c b/target/riscv/debug.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/debug.c | ||
18 | +++ b/target/riscv/debug.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static inline target_ulong trigger_type(CPURISCVState *env, | ||
20 | tdata1 = RV32_TYPE(type); | ||
21 | break; | ||
22 | case MXL_RV64: | ||
23 | + case MXL_RV128: | ||
24 | tdata1 = RV64_TYPE(type); | ||
25 | break; | ||
26 | default: | ||
27 | @@ -XXX,XX +XXX,XX @@ static target_ulong tdata1_validate(CPURISCVState *env, target_ulong val, | ||
28 | tdata1 = RV32_TYPE(t); | ||
29 | break; | ||
30 | case MXL_RV64: | ||
31 | + case MXL_RV128: | ||
32 | type = extract64(val, 60, 4); | ||
33 | dmode = extract64(val, 59, 1); | ||
34 | tdata1 = RV64_TYPE(t); | ||
35 | -- | ||
36 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: eopXD <yueh.ting.chen@gmail.com> | ||
1 | 2 | ||
3 | No functional change intended in this commit. | ||
4 | |||
5 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
6 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
7 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
8 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
9 | Message-Id: <165449614532.19704.7000832880482980398-1@git.sr.ht> | ||
10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
11 | --- | ||
12 | target/riscv/vector_helper.c | 1132 +++++++++++++++++----------------- | ||
13 | 1 file changed, 565 insertions(+), 567 deletions(-) | ||
14 | |||
15 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/vector_helper.c | ||
18 | +++ b/target/riscv/vector_helper.c | ||
19 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) | ||
20 | |||
21 | static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, | ||
22 | CPURISCVState *env, uint32_t desc, | ||
23 | - uint32_t esz, uint32_t dsz, | ||
24 | opivv2_fn *fn) | ||
25 | { | ||
26 | uint32_t vm = vext_vm(desc); | ||
27 | @@ -XXX,XX +XXX,XX @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, | ||
28 | } | ||
29 | |||
30 | /* generate the helpers for OPIVV */ | ||
31 | -#define GEN_VEXT_VV(NAME, ESZ, DSZ) \ | ||
32 | +#define GEN_VEXT_VV(NAME) \ | ||
33 | void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
34 | void *vs2, CPURISCVState *env, \ | ||
35 | uint32_t desc) \ | ||
36 | { \ | ||
37 | - do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ | ||
38 | + do_vext_vv(vd, v0, vs1, vs2, env, desc, \ | ||
39 | do_##NAME); \ | ||
40 | } | ||
41 | |||
42 | -GEN_VEXT_VV(vadd_vv_b, 1, 1) | ||
43 | -GEN_VEXT_VV(vadd_vv_h, 2, 2) | ||
44 | -GEN_VEXT_VV(vadd_vv_w, 4, 4) | ||
45 | -GEN_VEXT_VV(vadd_vv_d, 8, 8) | ||
46 | -GEN_VEXT_VV(vsub_vv_b, 1, 1) | ||
47 | -GEN_VEXT_VV(vsub_vv_h, 2, 2) | ||
48 | -GEN_VEXT_VV(vsub_vv_w, 4, 4) | ||
49 | -GEN_VEXT_VV(vsub_vv_d, 8, 8) | ||
50 | +GEN_VEXT_VV(vadd_vv_b) | ||
51 | +GEN_VEXT_VV(vadd_vv_h) | ||
52 | +GEN_VEXT_VV(vadd_vv_w) | ||
53 | +GEN_VEXT_VV(vadd_vv_d) | ||
54 | +GEN_VEXT_VV(vsub_vv_b) | ||
55 | +GEN_VEXT_VV(vsub_vv_h) | ||
56 | +GEN_VEXT_VV(vsub_vv_w) | ||
57 | +GEN_VEXT_VV(vsub_vv_d) | ||
58 | |||
59 | typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); | ||
60 | |||
61 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) | ||
62 | |||
63 | static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, | ||
64 | CPURISCVState *env, uint32_t desc, | ||
65 | - uint32_t esz, uint32_t dsz, | ||
66 | opivx2_fn fn) | ||
67 | { | ||
68 | uint32_t vm = vext_vm(desc); | ||
69 | @@ -XXX,XX +XXX,XX @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, | ||
70 | } | ||
71 | |||
72 | /* generate the helpers for OPIVX */ | ||
73 | -#define GEN_VEXT_VX(NAME, ESZ, DSZ) \ | ||
74 | +#define GEN_VEXT_VX(NAME) \ | ||
75 | void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
76 | void *vs2, CPURISCVState *env, \ | ||
77 | uint32_t desc) \ | ||
78 | { \ | ||
79 | - do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ | ||
80 | + do_vext_vx(vd, v0, s1, vs2, env, desc, \ | ||
81 | do_##NAME); \ | ||
82 | } | ||
83 | |||
84 | -GEN_VEXT_VX(vadd_vx_b, 1, 1) | ||
85 | -GEN_VEXT_VX(vadd_vx_h, 2, 2) | ||
86 | -GEN_VEXT_VX(vadd_vx_w, 4, 4) | ||
87 | -GEN_VEXT_VX(vadd_vx_d, 8, 8) | ||
88 | -GEN_VEXT_VX(vsub_vx_b, 1, 1) | ||
89 | -GEN_VEXT_VX(vsub_vx_h, 2, 2) | ||
90 | -GEN_VEXT_VX(vsub_vx_w, 4, 4) | ||
91 | -GEN_VEXT_VX(vsub_vx_d, 8, 8) | ||
92 | -GEN_VEXT_VX(vrsub_vx_b, 1, 1) | ||
93 | -GEN_VEXT_VX(vrsub_vx_h, 2, 2) | ||
94 | -GEN_VEXT_VX(vrsub_vx_w, 4, 4) | ||
95 | -GEN_VEXT_VX(vrsub_vx_d, 8, 8) | ||
96 | +GEN_VEXT_VX(vadd_vx_b) | ||
97 | +GEN_VEXT_VX(vadd_vx_h) | ||
98 | +GEN_VEXT_VX(vadd_vx_w) | ||
99 | +GEN_VEXT_VX(vadd_vx_d) | ||
100 | +GEN_VEXT_VX(vsub_vx_b) | ||
101 | +GEN_VEXT_VX(vsub_vx_h) | ||
102 | +GEN_VEXT_VX(vsub_vx_w) | ||
103 | +GEN_VEXT_VX(vsub_vx_d) | ||
104 | +GEN_VEXT_VX(vrsub_vx_b) | ||
105 | +GEN_VEXT_VX(vrsub_vx_h) | ||
106 | +GEN_VEXT_VX(vrsub_vx_w) | ||
107 | +GEN_VEXT_VX(vrsub_vx_d) | ||
108 | |||
109 | void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) | ||
110 | { | ||
111 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) | ||
112 | RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) | ||
113 | RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) | ||
114 | RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) | ||
115 | -GEN_VEXT_VV(vwaddu_vv_b, 1, 2) | ||
116 | -GEN_VEXT_VV(vwaddu_vv_h, 2, 4) | ||
117 | -GEN_VEXT_VV(vwaddu_vv_w, 4, 8) | ||
118 | -GEN_VEXT_VV(vwsubu_vv_b, 1, 2) | ||
119 | -GEN_VEXT_VV(vwsubu_vv_h, 2, 4) | ||
120 | -GEN_VEXT_VV(vwsubu_vv_w, 4, 8) | ||
121 | -GEN_VEXT_VV(vwadd_vv_b, 1, 2) | ||
122 | -GEN_VEXT_VV(vwadd_vv_h, 2, 4) | ||
123 | -GEN_VEXT_VV(vwadd_vv_w, 4, 8) | ||
124 | -GEN_VEXT_VV(vwsub_vv_b, 1, 2) | ||
125 | -GEN_VEXT_VV(vwsub_vv_h, 2, 4) | ||
126 | -GEN_VEXT_VV(vwsub_vv_w, 4, 8) | ||
127 | -GEN_VEXT_VV(vwaddu_wv_b, 1, 2) | ||
128 | -GEN_VEXT_VV(vwaddu_wv_h, 2, 4) | ||
129 | -GEN_VEXT_VV(vwaddu_wv_w, 4, 8) | ||
130 | -GEN_VEXT_VV(vwsubu_wv_b, 1, 2) | ||
131 | -GEN_VEXT_VV(vwsubu_wv_h, 2, 4) | ||
132 | -GEN_VEXT_VV(vwsubu_wv_w, 4, 8) | ||
133 | -GEN_VEXT_VV(vwadd_wv_b, 1, 2) | ||
134 | -GEN_VEXT_VV(vwadd_wv_h, 2, 4) | ||
135 | -GEN_VEXT_VV(vwadd_wv_w, 4, 8) | ||
136 | -GEN_VEXT_VV(vwsub_wv_b, 1, 2) | ||
137 | -GEN_VEXT_VV(vwsub_wv_h, 2, 4) | ||
138 | -GEN_VEXT_VV(vwsub_wv_w, 4, 8) | ||
139 | +GEN_VEXT_VV(vwaddu_vv_b) | ||
140 | +GEN_VEXT_VV(vwaddu_vv_h) | ||
141 | +GEN_VEXT_VV(vwaddu_vv_w) | ||
142 | +GEN_VEXT_VV(vwsubu_vv_b) | ||
143 | +GEN_VEXT_VV(vwsubu_vv_h) | ||
144 | +GEN_VEXT_VV(vwsubu_vv_w) | ||
145 | +GEN_VEXT_VV(vwadd_vv_b) | ||
146 | +GEN_VEXT_VV(vwadd_vv_h) | ||
147 | +GEN_VEXT_VV(vwadd_vv_w) | ||
148 | +GEN_VEXT_VV(vwsub_vv_b) | ||
149 | +GEN_VEXT_VV(vwsub_vv_h) | ||
150 | +GEN_VEXT_VV(vwsub_vv_w) | ||
151 | +GEN_VEXT_VV(vwaddu_wv_b) | ||
152 | +GEN_VEXT_VV(vwaddu_wv_h) | ||
153 | +GEN_VEXT_VV(vwaddu_wv_w) | ||
154 | +GEN_VEXT_VV(vwsubu_wv_b) | ||
155 | +GEN_VEXT_VV(vwsubu_wv_h) | ||
156 | +GEN_VEXT_VV(vwsubu_wv_w) | ||
157 | +GEN_VEXT_VV(vwadd_wv_b) | ||
158 | +GEN_VEXT_VV(vwadd_wv_h) | ||
159 | +GEN_VEXT_VV(vwadd_wv_w) | ||
160 | +GEN_VEXT_VV(vwsub_wv_b) | ||
161 | +GEN_VEXT_VV(vwsub_wv_h) | ||
162 | +GEN_VEXT_VV(vwsub_wv_w) | ||
163 | |||
164 | RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) | ||
165 | RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) | ||
166 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) | ||
167 | RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) | ||
168 | RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) | ||
169 | RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) | ||
170 | -GEN_VEXT_VX(vwaddu_vx_b, 1, 2) | ||
171 | -GEN_VEXT_VX(vwaddu_vx_h, 2, 4) | ||
172 | -GEN_VEXT_VX(vwaddu_vx_w, 4, 8) | ||
173 | -GEN_VEXT_VX(vwsubu_vx_b, 1, 2) | ||
174 | -GEN_VEXT_VX(vwsubu_vx_h, 2, 4) | ||
175 | -GEN_VEXT_VX(vwsubu_vx_w, 4, 8) | ||
176 | -GEN_VEXT_VX(vwadd_vx_b, 1, 2) | ||
177 | -GEN_VEXT_VX(vwadd_vx_h, 2, 4) | ||
178 | -GEN_VEXT_VX(vwadd_vx_w, 4, 8) | ||
179 | -GEN_VEXT_VX(vwsub_vx_b, 1, 2) | ||
180 | -GEN_VEXT_VX(vwsub_vx_h, 2, 4) | ||
181 | -GEN_VEXT_VX(vwsub_vx_w, 4, 8) | ||
182 | -GEN_VEXT_VX(vwaddu_wx_b, 1, 2) | ||
183 | -GEN_VEXT_VX(vwaddu_wx_h, 2, 4) | ||
184 | -GEN_VEXT_VX(vwaddu_wx_w, 4, 8) | ||
185 | -GEN_VEXT_VX(vwsubu_wx_b, 1, 2) | ||
186 | -GEN_VEXT_VX(vwsubu_wx_h, 2, 4) | ||
187 | -GEN_VEXT_VX(vwsubu_wx_w, 4, 8) | ||
188 | -GEN_VEXT_VX(vwadd_wx_b, 1, 2) | ||
189 | -GEN_VEXT_VX(vwadd_wx_h, 2, 4) | ||
190 | -GEN_VEXT_VX(vwadd_wx_w, 4, 8) | ||
191 | -GEN_VEXT_VX(vwsub_wx_b, 1, 2) | ||
192 | -GEN_VEXT_VX(vwsub_wx_h, 2, 4) | ||
193 | -GEN_VEXT_VX(vwsub_wx_w, 4, 8) | ||
194 | +GEN_VEXT_VX(vwaddu_vx_b) | ||
195 | +GEN_VEXT_VX(vwaddu_vx_h) | ||
196 | +GEN_VEXT_VX(vwaddu_vx_w) | ||
197 | +GEN_VEXT_VX(vwsubu_vx_b) | ||
198 | +GEN_VEXT_VX(vwsubu_vx_h) | ||
199 | +GEN_VEXT_VX(vwsubu_vx_w) | ||
200 | +GEN_VEXT_VX(vwadd_vx_b) | ||
201 | +GEN_VEXT_VX(vwadd_vx_h) | ||
202 | +GEN_VEXT_VX(vwadd_vx_w) | ||
203 | +GEN_VEXT_VX(vwsub_vx_b) | ||
204 | +GEN_VEXT_VX(vwsub_vx_h) | ||
205 | +GEN_VEXT_VX(vwsub_vx_w) | ||
206 | +GEN_VEXT_VX(vwaddu_wx_b) | ||
207 | +GEN_VEXT_VX(vwaddu_wx_h) | ||
208 | +GEN_VEXT_VX(vwaddu_wx_w) | ||
209 | +GEN_VEXT_VX(vwsubu_wx_b) | ||
210 | +GEN_VEXT_VX(vwsubu_wx_h) | ||
211 | +GEN_VEXT_VX(vwsubu_wx_w) | ||
212 | +GEN_VEXT_VX(vwadd_wx_b) | ||
213 | +GEN_VEXT_VX(vwadd_wx_h) | ||
214 | +GEN_VEXT_VX(vwadd_wx_w) | ||
215 | +GEN_VEXT_VX(vwsub_wx_b) | ||
216 | +GEN_VEXT_VX(vwsub_wx_h) | ||
217 | +GEN_VEXT_VX(vwsub_wx_w) | ||
218 | |||
219 | /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ | ||
220 | #define DO_VADC(N, M, C) (N + M + C) | ||
221 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) | ||
222 | RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) | ||
223 | RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) | ||
224 | RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) | ||
225 | -GEN_VEXT_VV(vand_vv_b, 1, 1) | ||
226 | -GEN_VEXT_VV(vand_vv_h, 2, 2) | ||
227 | -GEN_VEXT_VV(vand_vv_w, 4, 4) | ||
228 | -GEN_VEXT_VV(vand_vv_d, 8, 8) | ||
229 | -GEN_VEXT_VV(vor_vv_b, 1, 1) | ||
230 | -GEN_VEXT_VV(vor_vv_h, 2, 2) | ||
231 | -GEN_VEXT_VV(vor_vv_w, 4, 4) | ||
232 | -GEN_VEXT_VV(vor_vv_d, 8, 8) | ||
233 | -GEN_VEXT_VV(vxor_vv_b, 1, 1) | ||
234 | -GEN_VEXT_VV(vxor_vv_h, 2, 2) | ||
235 | -GEN_VEXT_VV(vxor_vv_w, 4, 4) | ||
236 | -GEN_VEXT_VV(vxor_vv_d, 8, 8) | ||
237 | +GEN_VEXT_VV(vand_vv_b) | ||
238 | +GEN_VEXT_VV(vand_vv_h) | ||
239 | +GEN_VEXT_VV(vand_vv_w) | ||
240 | +GEN_VEXT_VV(vand_vv_d) | ||
241 | +GEN_VEXT_VV(vor_vv_b) | ||
242 | +GEN_VEXT_VV(vor_vv_h) | ||
243 | +GEN_VEXT_VV(vor_vv_w) | ||
244 | +GEN_VEXT_VV(vor_vv_d) | ||
245 | +GEN_VEXT_VV(vxor_vv_b) | ||
246 | +GEN_VEXT_VV(vxor_vv_h) | ||
247 | +GEN_VEXT_VV(vxor_vv_w) | ||
248 | +GEN_VEXT_VV(vxor_vv_d) | ||
249 | |||
250 | RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) | ||
251 | RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) | ||
252 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) | ||
253 | RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) | ||
254 | RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) | ||
255 | RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) | ||
256 | -GEN_VEXT_VX(vand_vx_b, 1, 1) | ||
257 | -GEN_VEXT_VX(vand_vx_h, 2, 2) | ||
258 | -GEN_VEXT_VX(vand_vx_w, 4, 4) | ||
259 | -GEN_VEXT_VX(vand_vx_d, 8, 8) | ||
260 | -GEN_VEXT_VX(vor_vx_b, 1, 1) | ||
261 | -GEN_VEXT_VX(vor_vx_h, 2, 2) | ||
262 | -GEN_VEXT_VX(vor_vx_w, 4, 4) | ||
263 | -GEN_VEXT_VX(vor_vx_d, 8, 8) | ||
264 | -GEN_VEXT_VX(vxor_vx_b, 1, 1) | ||
265 | -GEN_VEXT_VX(vxor_vx_h, 2, 2) | ||
266 | -GEN_VEXT_VX(vxor_vx_w, 4, 4) | ||
267 | -GEN_VEXT_VX(vxor_vx_d, 8, 8) | ||
268 | +GEN_VEXT_VX(vand_vx_b) | ||
269 | +GEN_VEXT_VX(vand_vx_h) | ||
270 | +GEN_VEXT_VX(vand_vx_w) | ||
271 | +GEN_VEXT_VX(vand_vx_d) | ||
272 | +GEN_VEXT_VX(vor_vx_b) | ||
273 | +GEN_VEXT_VX(vor_vx_h) | ||
274 | +GEN_VEXT_VX(vor_vx_w) | ||
275 | +GEN_VEXT_VX(vor_vx_d) | ||
276 | +GEN_VEXT_VX(vxor_vx_b) | ||
277 | +GEN_VEXT_VX(vxor_vx_h) | ||
278 | +GEN_VEXT_VX(vxor_vx_w) | ||
279 | +GEN_VEXT_VX(vxor_vx_d) | ||
280 | |||
281 | /* Vector Single-Width Bit Shift Instructions */ | ||
282 | #define DO_SLL(N, M) (N << (M)) | ||
283 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) | ||
284 | RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) | ||
285 | RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) | ||
286 | RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) | ||
287 | -GEN_VEXT_VV(vminu_vv_b, 1, 1) | ||
288 | -GEN_VEXT_VV(vminu_vv_h, 2, 2) | ||
289 | -GEN_VEXT_VV(vminu_vv_w, 4, 4) | ||
290 | -GEN_VEXT_VV(vminu_vv_d, 8, 8) | ||
291 | -GEN_VEXT_VV(vmin_vv_b, 1, 1) | ||
292 | -GEN_VEXT_VV(vmin_vv_h, 2, 2) | ||
293 | -GEN_VEXT_VV(vmin_vv_w, 4, 4) | ||
294 | -GEN_VEXT_VV(vmin_vv_d, 8, 8) | ||
295 | -GEN_VEXT_VV(vmaxu_vv_b, 1, 1) | ||
296 | -GEN_VEXT_VV(vmaxu_vv_h, 2, 2) | ||
297 | -GEN_VEXT_VV(vmaxu_vv_w, 4, 4) | ||
298 | -GEN_VEXT_VV(vmaxu_vv_d, 8, 8) | ||
299 | -GEN_VEXT_VV(vmax_vv_b, 1, 1) | ||
300 | -GEN_VEXT_VV(vmax_vv_h, 2, 2) | ||
301 | -GEN_VEXT_VV(vmax_vv_w, 4, 4) | ||
302 | -GEN_VEXT_VV(vmax_vv_d, 8, 8) | ||
303 | +GEN_VEXT_VV(vminu_vv_b) | ||
304 | +GEN_VEXT_VV(vminu_vv_h) | ||
305 | +GEN_VEXT_VV(vminu_vv_w) | ||
306 | +GEN_VEXT_VV(vminu_vv_d) | ||
307 | +GEN_VEXT_VV(vmin_vv_b) | ||
308 | +GEN_VEXT_VV(vmin_vv_h) | ||
309 | +GEN_VEXT_VV(vmin_vv_w) | ||
310 | +GEN_VEXT_VV(vmin_vv_d) | ||
311 | +GEN_VEXT_VV(vmaxu_vv_b) | ||
312 | +GEN_VEXT_VV(vmaxu_vv_h) | ||
313 | +GEN_VEXT_VV(vmaxu_vv_w) | ||
314 | +GEN_VEXT_VV(vmaxu_vv_d) | ||
315 | +GEN_VEXT_VV(vmax_vv_b) | ||
316 | +GEN_VEXT_VV(vmax_vv_h) | ||
317 | +GEN_VEXT_VV(vmax_vv_w) | ||
318 | +GEN_VEXT_VV(vmax_vv_d) | ||
319 | |||
320 | RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) | ||
321 | RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) | ||
322 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) | ||
323 | RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) | ||
324 | RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) | ||
325 | RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) | ||
326 | -GEN_VEXT_VX(vminu_vx_b, 1, 1) | ||
327 | -GEN_VEXT_VX(vminu_vx_h, 2, 2) | ||
328 | -GEN_VEXT_VX(vminu_vx_w, 4, 4) | ||
329 | -GEN_VEXT_VX(vminu_vx_d, 8, 8) | ||
330 | -GEN_VEXT_VX(vmin_vx_b, 1, 1) | ||
331 | -GEN_VEXT_VX(vmin_vx_h, 2, 2) | ||
332 | -GEN_VEXT_VX(vmin_vx_w, 4, 4) | ||
333 | -GEN_VEXT_VX(vmin_vx_d, 8, 8) | ||
334 | -GEN_VEXT_VX(vmaxu_vx_b, 1, 1) | ||
335 | -GEN_VEXT_VX(vmaxu_vx_h, 2, 2) | ||
336 | -GEN_VEXT_VX(vmaxu_vx_w, 4, 4) | ||
337 | -GEN_VEXT_VX(vmaxu_vx_d, 8, 8) | ||
338 | -GEN_VEXT_VX(vmax_vx_b, 1, 1) | ||
339 | -GEN_VEXT_VX(vmax_vx_h, 2, 2) | ||
340 | -GEN_VEXT_VX(vmax_vx_w, 4, 4) | ||
341 | -GEN_VEXT_VX(vmax_vx_d, 8, 8) | ||
342 | +GEN_VEXT_VX(vminu_vx_b) | ||
343 | +GEN_VEXT_VX(vminu_vx_h) | ||
344 | +GEN_VEXT_VX(vminu_vx_w) | ||
345 | +GEN_VEXT_VX(vminu_vx_d) | ||
346 | +GEN_VEXT_VX(vmin_vx_b) | ||
347 | +GEN_VEXT_VX(vmin_vx_h) | ||
348 | +GEN_VEXT_VX(vmin_vx_w) | ||
349 | +GEN_VEXT_VX(vmin_vx_d) | ||
350 | +GEN_VEXT_VX(vmaxu_vx_b) | ||
351 | +GEN_VEXT_VX(vmaxu_vx_h) | ||
352 | +GEN_VEXT_VX(vmaxu_vx_w) | ||
353 | +GEN_VEXT_VX(vmaxu_vx_d) | ||
354 | +GEN_VEXT_VX(vmax_vx_b) | ||
355 | +GEN_VEXT_VX(vmax_vx_h) | ||
356 | +GEN_VEXT_VX(vmax_vx_w) | ||
357 | +GEN_VEXT_VX(vmax_vx_d) | ||
358 | |||
359 | /* Vector Single-Width Integer Multiply Instructions */ | ||
360 | #define DO_MUL(N, M) (N * M) | ||
361 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) | ||
362 | RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) | ||
363 | RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) | ||
364 | RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) | ||
365 | -GEN_VEXT_VV(vmul_vv_b, 1, 1) | ||
366 | -GEN_VEXT_VV(vmul_vv_h, 2, 2) | ||
367 | -GEN_VEXT_VV(vmul_vv_w, 4, 4) | ||
368 | -GEN_VEXT_VV(vmul_vv_d, 8, 8) | ||
369 | +GEN_VEXT_VV(vmul_vv_b) | ||
370 | +GEN_VEXT_VV(vmul_vv_h) | ||
371 | +GEN_VEXT_VV(vmul_vv_w) | ||
372 | +GEN_VEXT_VV(vmul_vv_d) | ||
373 | |||
374 | static int8_t do_mulh_b(int8_t s2, int8_t s1) | ||
375 | { | ||
376 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) | ||
377 | RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) | ||
378 | RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) | ||
379 | RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) | ||
380 | -GEN_VEXT_VV(vmulh_vv_b, 1, 1) | ||
381 | -GEN_VEXT_VV(vmulh_vv_h, 2, 2) | ||
382 | -GEN_VEXT_VV(vmulh_vv_w, 4, 4) | ||
383 | -GEN_VEXT_VV(vmulh_vv_d, 8, 8) | ||
384 | -GEN_VEXT_VV(vmulhu_vv_b, 1, 1) | ||
385 | -GEN_VEXT_VV(vmulhu_vv_h, 2, 2) | ||
386 | -GEN_VEXT_VV(vmulhu_vv_w, 4, 4) | ||
387 | -GEN_VEXT_VV(vmulhu_vv_d, 8, 8) | ||
388 | -GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) | ||
389 | -GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) | ||
390 | -GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) | ||
391 | -GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) | ||
392 | +GEN_VEXT_VV(vmulh_vv_b) | ||
393 | +GEN_VEXT_VV(vmulh_vv_h) | ||
394 | +GEN_VEXT_VV(vmulh_vv_w) | ||
395 | +GEN_VEXT_VV(vmulh_vv_d) | ||
396 | +GEN_VEXT_VV(vmulhu_vv_b) | ||
397 | +GEN_VEXT_VV(vmulhu_vv_h) | ||
398 | +GEN_VEXT_VV(vmulhu_vv_w) | ||
399 | +GEN_VEXT_VV(vmulhu_vv_d) | ||
400 | +GEN_VEXT_VV(vmulhsu_vv_b) | ||
401 | +GEN_VEXT_VV(vmulhsu_vv_h) | ||
402 | +GEN_VEXT_VV(vmulhsu_vv_w) | ||
403 | +GEN_VEXT_VV(vmulhsu_vv_d) | ||
404 | |||
405 | RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) | ||
406 | RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) | ||
407 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) | ||
408 | RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) | ||
409 | RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) | ||
410 | RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) | ||
411 | -GEN_VEXT_VX(vmul_vx_b, 1, 1) | ||
412 | -GEN_VEXT_VX(vmul_vx_h, 2, 2) | ||
413 | -GEN_VEXT_VX(vmul_vx_w, 4, 4) | ||
414 | -GEN_VEXT_VX(vmul_vx_d, 8, 8) | ||
415 | -GEN_VEXT_VX(vmulh_vx_b, 1, 1) | ||
416 | -GEN_VEXT_VX(vmulh_vx_h, 2, 2) | ||
417 | -GEN_VEXT_VX(vmulh_vx_w, 4, 4) | ||
418 | -GEN_VEXT_VX(vmulh_vx_d, 8, 8) | ||
419 | -GEN_VEXT_VX(vmulhu_vx_b, 1, 1) | ||
420 | -GEN_VEXT_VX(vmulhu_vx_h, 2, 2) | ||
421 | -GEN_VEXT_VX(vmulhu_vx_w, 4, 4) | ||
422 | -GEN_VEXT_VX(vmulhu_vx_d, 8, 8) | ||
423 | -GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) | ||
424 | -GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) | ||
425 | -GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) | ||
426 | -GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) | ||
427 | +GEN_VEXT_VX(vmul_vx_b) | ||
428 | +GEN_VEXT_VX(vmul_vx_h) | ||
429 | +GEN_VEXT_VX(vmul_vx_w) | ||
430 | +GEN_VEXT_VX(vmul_vx_d) | ||
431 | +GEN_VEXT_VX(vmulh_vx_b) | ||
432 | +GEN_VEXT_VX(vmulh_vx_h) | ||
433 | +GEN_VEXT_VX(vmulh_vx_w) | ||
434 | +GEN_VEXT_VX(vmulh_vx_d) | ||
435 | +GEN_VEXT_VX(vmulhu_vx_b) | ||
436 | +GEN_VEXT_VX(vmulhu_vx_h) | ||
437 | +GEN_VEXT_VX(vmulhu_vx_w) | ||
438 | +GEN_VEXT_VX(vmulhu_vx_d) | ||
439 | +GEN_VEXT_VX(vmulhsu_vx_b) | ||
440 | +GEN_VEXT_VX(vmulhsu_vx_h) | ||
441 | +GEN_VEXT_VX(vmulhsu_vx_w) | ||
442 | +GEN_VEXT_VX(vmulhsu_vx_d) | ||
443 | |||
444 | /* Vector Integer Divide Instructions */ | ||
445 | #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) | ||
446 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) | ||
447 | RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) | ||
448 | RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) | ||
449 | RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) | ||
450 | -GEN_VEXT_VV(vdivu_vv_b, 1, 1) | ||
451 | -GEN_VEXT_VV(vdivu_vv_h, 2, 2) | ||
452 | -GEN_VEXT_VV(vdivu_vv_w, 4, 4) | ||
453 | -GEN_VEXT_VV(vdivu_vv_d, 8, 8) | ||
454 | -GEN_VEXT_VV(vdiv_vv_b, 1, 1) | ||
455 | -GEN_VEXT_VV(vdiv_vv_h, 2, 2) | ||
456 | -GEN_VEXT_VV(vdiv_vv_w, 4, 4) | ||
457 | -GEN_VEXT_VV(vdiv_vv_d, 8, 8) | ||
458 | -GEN_VEXT_VV(vremu_vv_b, 1, 1) | ||
459 | -GEN_VEXT_VV(vremu_vv_h, 2, 2) | ||
460 | -GEN_VEXT_VV(vremu_vv_w, 4, 4) | ||
461 | -GEN_VEXT_VV(vremu_vv_d, 8, 8) | ||
462 | -GEN_VEXT_VV(vrem_vv_b, 1, 1) | ||
463 | -GEN_VEXT_VV(vrem_vv_h, 2, 2) | ||
464 | -GEN_VEXT_VV(vrem_vv_w, 4, 4) | ||
465 | -GEN_VEXT_VV(vrem_vv_d, 8, 8) | ||
466 | +GEN_VEXT_VV(vdivu_vv_b) | ||
467 | +GEN_VEXT_VV(vdivu_vv_h) | ||
468 | +GEN_VEXT_VV(vdivu_vv_w) | ||
469 | +GEN_VEXT_VV(vdivu_vv_d) | ||
470 | +GEN_VEXT_VV(vdiv_vv_b) | ||
471 | +GEN_VEXT_VV(vdiv_vv_h) | ||
472 | +GEN_VEXT_VV(vdiv_vv_w) | ||
473 | +GEN_VEXT_VV(vdiv_vv_d) | ||
474 | +GEN_VEXT_VV(vremu_vv_b) | ||
475 | +GEN_VEXT_VV(vremu_vv_h) | ||
476 | +GEN_VEXT_VV(vremu_vv_w) | ||
477 | +GEN_VEXT_VV(vremu_vv_d) | ||
478 | +GEN_VEXT_VV(vrem_vv_b) | ||
479 | +GEN_VEXT_VV(vrem_vv_h) | ||
480 | +GEN_VEXT_VV(vrem_vv_w) | ||
481 | +GEN_VEXT_VV(vrem_vv_d) | ||
482 | |||
483 | RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) | ||
484 | RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) | ||
485 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) | ||
486 | RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) | ||
487 | RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) | ||
488 | RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) | ||
489 | -GEN_VEXT_VX(vdivu_vx_b, 1, 1) | ||
490 | -GEN_VEXT_VX(vdivu_vx_h, 2, 2) | ||
491 | -GEN_VEXT_VX(vdivu_vx_w, 4, 4) | ||
492 | -GEN_VEXT_VX(vdivu_vx_d, 8, 8) | ||
493 | -GEN_VEXT_VX(vdiv_vx_b, 1, 1) | ||
494 | -GEN_VEXT_VX(vdiv_vx_h, 2, 2) | ||
495 | -GEN_VEXT_VX(vdiv_vx_w, 4, 4) | ||
496 | -GEN_VEXT_VX(vdiv_vx_d, 8, 8) | ||
497 | -GEN_VEXT_VX(vremu_vx_b, 1, 1) | ||
498 | -GEN_VEXT_VX(vremu_vx_h, 2, 2) | ||
499 | -GEN_VEXT_VX(vremu_vx_w, 4, 4) | ||
500 | -GEN_VEXT_VX(vremu_vx_d, 8, 8) | ||
501 | -GEN_VEXT_VX(vrem_vx_b, 1, 1) | ||
502 | -GEN_VEXT_VX(vrem_vx_h, 2, 2) | ||
503 | -GEN_VEXT_VX(vrem_vx_w, 4, 4) | ||
504 | -GEN_VEXT_VX(vrem_vx_d, 8, 8) | ||
505 | +GEN_VEXT_VX(vdivu_vx_b) | ||
506 | +GEN_VEXT_VX(vdivu_vx_h) | ||
507 | +GEN_VEXT_VX(vdivu_vx_w) | ||
508 | +GEN_VEXT_VX(vdivu_vx_d) | ||
509 | +GEN_VEXT_VX(vdiv_vx_b) | ||
510 | +GEN_VEXT_VX(vdiv_vx_h) | ||
511 | +GEN_VEXT_VX(vdiv_vx_w) | ||
512 | +GEN_VEXT_VX(vdiv_vx_d) | ||
513 | +GEN_VEXT_VX(vremu_vx_b) | ||
514 | +GEN_VEXT_VX(vremu_vx_h) | ||
515 | +GEN_VEXT_VX(vremu_vx_w) | ||
516 | +GEN_VEXT_VX(vremu_vx_d) | ||
517 | +GEN_VEXT_VX(vrem_vx_b) | ||
518 | +GEN_VEXT_VX(vrem_vx_h) | ||
519 | +GEN_VEXT_VX(vrem_vx_w) | ||
520 | +GEN_VEXT_VX(vrem_vx_d) | ||
521 | |||
522 | /* Vector Widening Integer Multiply Instructions */ | ||
523 | RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) | ||
524 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) | ||
525 | RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) | ||
526 | RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) | ||
527 | RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) | ||
528 | -GEN_VEXT_VV(vwmul_vv_b, 1, 2) | ||
529 | -GEN_VEXT_VV(vwmul_vv_h, 2, 4) | ||
530 | -GEN_VEXT_VV(vwmul_vv_w, 4, 8) | ||
531 | -GEN_VEXT_VV(vwmulu_vv_b, 1, 2) | ||
532 | -GEN_VEXT_VV(vwmulu_vv_h, 2, 4) | ||
533 | -GEN_VEXT_VV(vwmulu_vv_w, 4, 8) | ||
534 | -GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) | ||
535 | -GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) | ||
536 | -GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) | ||
537 | +GEN_VEXT_VV(vwmul_vv_b) | ||
538 | +GEN_VEXT_VV(vwmul_vv_h) | ||
539 | +GEN_VEXT_VV(vwmul_vv_w) | ||
540 | +GEN_VEXT_VV(vwmulu_vv_b) | ||
541 | +GEN_VEXT_VV(vwmulu_vv_h) | ||
542 | +GEN_VEXT_VV(vwmulu_vv_w) | ||
543 | +GEN_VEXT_VV(vwmulsu_vv_b) | ||
544 | +GEN_VEXT_VV(vwmulsu_vv_h) | ||
545 | +GEN_VEXT_VV(vwmulsu_vv_w) | ||
546 | |||
547 | RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) | ||
548 | RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) | ||
549 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) | ||
550 | RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) | ||
551 | RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) | ||
552 | RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) | ||
553 | -GEN_VEXT_VX(vwmul_vx_b, 1, 2) | ||
554 | -GEN_VEXT_VX(vwmul_vx_h, 2, 4) | ||
555 | -GEN_VEXT_VX(vwmul_vx_w, 4, 8) | ||
556 | -GEN_VEXT_VX(vwmulu_vx_b, 1, 2) | ||
557 | -GEN_VEXT_VX(vwmulu_vx_h, 2, 4) | ||
558 | -GEN_VEXT_VX(vwmulu_vx_w, 4, 8) | ||
559 | -GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) | ||
560 | -GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) | ||
561 | -GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) | ||
562 | +GEN_VEXT_VX(vwmul_vx_b) | ||
563 | +GEN_VEXT_VX(vwmul_vx_h) | ||
564 | +GEN_VEXT_VX(vwmul_vx_w) | ||
565 | +GEN_VEXT_VX(vwmulu_vx_b) | ||
566 | +GEN_VEXT_VX(vwmulu_vx_h) | ||
567 | +GEN_VEXT_VX(vwmulu_vx_w) | ||
568 | +GEN_VEXT_VX(vwmulsu_vx_b) | ||
569 | +GEN_VEXT_VX(vwmulsu_vx_h) | ||
570 | +GEN_VEXT_VX(vwmulsu_vx_w) | ||
571 | |||
572 | /* Vector Single-Width Integer Multiply-Add Instructions */ | ||
573 | #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
574 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) | ||
575 | RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) | ||
576 | RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) | ||
577 | RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) | ||
578 | -GEN_VEXT_VV(vmacc_vv_b, 1, 1) | ||
579 | -GEN_VEXT_VV(vmacc_vv_h, 2, 2) | ||
580 | -GEN_VEXT_VV(vmacc_vv_w, 4, 4) | ||
581 | -GEN_VEXT_VV(vmacc_vv_d, 8, 8) | ||
582 | -GEN_VEXT_VV(vnmsac_vv_b, 1, 1) | ||
583 | -GEN_VEXT_VV(vnmsac_vv_h, 2, 2) | ||
584 | -GEN_VEXT_VV(vnmsac_vv_w, 4, 4) | ||
585 | -GEN_VEXT_VV(vnmsac_vv_d, 8, 8) | ||
586 | -GEN_VEXT_VV(vmadd_vv_b, 1, 1) | ||
587 | -GEN_VEXT_VV(vmadd_vv_h, 2, 2) | ||
588 | -GEN_VEXT_VV(vmadd_vv_w, 4, 4) | ||
589 | -GEN_VEXT_VV(vmadd_vv_d, 8, 8) | ||
590 | -GEN_VEXT_VV(vnmsub_vv_b, 1, 1) | ||
591 | -GEN_VEXT_VV(vnmsub_vv_h, 2, 2) | ||
592 | -GEN_VEXT_VV(vnmsub_vv_w, 4, 4) | ||
593 | -GEN_VEXT_VV(vnmsub_vv_d, 8, 8) | ||
594 | +GEN_VEXT_VV(vmacc_vv_b) | ||
595 | +GEN_VEXT_VV(vmacc_vv_h) | ||
596 | +GEN_VEXT_VV(vmacc_vv_w) | ||
597 | +GEN_VEXT_VV(vmacc_vv_d) | ||
598 | +GEN_VEXT_VV(vnmsac_vv_b) | ||
599 | +GEN_VEXT_VV(vnmsac_vv_h) | ||
600 | +GEN_VEXT_VV(vnmsac_vv_w) | ||
601 | +GEN_VEXT_VV(vnmsac_vv_d) | ||
602 | +GEN_VEXT_VV(vmadd_vv_b) | ||
603 | +GEN_VEXT_VV(vmadd_vv_h) | ||
604 | +GEN_VEXT_VV(vmadd_vv_w) | ||
605 | +GEN_VEXT_VV(vmadd_vv_d) | ||
606 | +GEN_VEXT_VV(vnmsub_vv_b) | ||
607 | +GEN_VEXT_VV(vnmsub_vv_h) | ||
608 | +GEN_VEXT_VV(vnmsub_vv_w) | ||
609 | +GEN_VEXT_VV(vnmsub_vv_d) | ||
610 | |||
611 | #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
612 | static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ | ||
613 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) | ||
614 | RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) | ||
615 | RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) | ||
616 | RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) | ||
617 | -GEN_VEXT_VX(vmacc_vx_b, 1, 1) | ||
618 | -GEN_VEXT_VX(vmacc_vx_h, 2, 2) | ||
619 | -GEN_VEXT_VX(vmacc_vx_w, 4, 4) | ||
620 | -GEN_VEXT_VX(vmacc_vx_d, 8, 8) | ||
621 | -GEN_VEXT_VX(vnmsac_vx_b, 1, 1) | ||
622 | -GEN_VEXT_VX(vnmsac_vx_h, 2, 2) | ||
623 | -GEN_VEXT_VX(vnmsac_vx_w, 4, 4) | ||
624 | -GEN_VEXT_VX(vnmsac_vx_d, 8, 8) | ||
625 | -GEN_VEXT_VX(vmadd_vx_b, 1, 1) | ||
626 | -GEN_VEXT_VX(vmadd_vx_h, 2, 2) | ||
627 | -GEN_VEXT_VX(vmadd_vx_w, 4, 4) | ||
628 | -GEN_VEXT_VX(vmadd_vx_d, 8, 8) | ||
629 | -GEN_VEXT_VX(vnmsub_vx_b, 1, 1) | ||
630 | -GEN_VEXT_VX(vnmsub_vx_h, 2, 2) | ||
631 | -GEN_VEXT_VX(vnmsub_vx_w, 4, 4) | ||
632 | -GEN_VEXT_VX(vnmsub_vx_d, 8, 8) | ||
633 | +GEN_VEXT_VX(vmacc_vx_b) | ||
634 | +GEN_VEXT_VX(vmacc_vx_h) | ||
635 | +GEN_VEXT_VX(vmacc_vx_w) | ||
636 | +GEN_VEXT_VX(vmacc_vx_d) | ||
637 | +GEN_VEXT_VX(vnmsac_vx_b) | ||
638 | +GEN_VEXT_VX(vnmsac_vx_h) | ||
639 | +GEN_VEXT_VX(vnmsac_vx_w) | ||
640 | +GEN_VEXT_VX(vnmsac_vx_d) | ||
641 | +GEN_VEXT_VX(vmadd_vx_b) | ||
642 | +GEN_VEXT_VX(vmadd_vx_h) | ||
643 | +GEN_VEXT_VX(vmadd_vx_w) | ||
644 | +GEN_VEXT_VX(vmadd_vx_d) | ||
645 | +GEN_VEXT_VX(vnmsub_vx_b) | ||
646 | +GEN_VEXT_VX(vnmsub_vx_h) | ||
647 | +GEN_VEXT_VX(vnmsub_vx_w) | ||
648 | +GEN_VEXT_VX(vnmsub_vx_d) | ||
649 | |||
650 | /* Vector Widening Integer Multiply-Add Instructions */ | ||
651 | RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) | ||
652 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) | ||
653 | RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) | ||
654 | RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) | ||
655 | RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) | ||
656 | -GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) | ||
657 | -GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) | ||
658 | -GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) | ||
659 | -GEN_VEXT_VV(vwmacc_vv_b, 1, 2) | ||
660 | -GEN_VEXT_VV(vwmacc_vv_h, 2, 4) | ||
661 | -GEN_VEXT_VV(vwmacc_vv_w, 4, 8) | ||
662 | -GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) | ||
663 | -GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) | ||
664 | -GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) | ||
665 | +GEN_VEXT_VV(vwmaccu_vv_b) | ||
666 | +GEN_VEXT_VV(vwmaccu_vv_h) | ||
667 | +GEN_VEXT_VV(vwmaccu_vv_w) | ||
668 | +GEN_VEXT_VV(vwmacc_vv_b) | ||
669 | +GEN_VEXT_VV(vwmacc_vv_h) | ||
670 | +GEN_VEXT_VV(vwmacc_vv_w) | ||
671 | +GEN_VEXT_VV(vwmaccsu_vv_b) | ||
672 | +GEN_VEXT_VV(vwmaccsu_vv_h) | ||
673 | +GEN_VEXT_VV(vwmaccsu_vv_w) | ||
674 | |||
675 | RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) | ||
676 | RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) | ||
677 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) | ||
678 | RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) | ||
679 | RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) | ||
680 | RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) | ||
681 | -GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) | ||
682 | -GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) | ||
683 | -GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) | ||
684 | -GEN_VEXT_VX(vwmacc_vx_b, 1, 2) | ||
685 | -GEN_VEXT_VX(vwmacc_vx_h, 2, 4) | ||
686 | -GEN_VEXT_VX(vwmacc_vx_w, 4, 8) | ||
687 | -GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) | ||
688 | -GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) | ||
689 | -GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) | ||
690 | -GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) | ||
691 | -GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) | ||
692 | -GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) | ||
693 | +GEN_VEXT_VX(vwmaccu_vx_b) | ||
694 | +GEN_VEXT_VX(vwmaccu_vx_h) | ||
695 | +GEN_VEXT_VX(vwmaccu_vx_w) | ||
696 | +GEN_VEXT_VX(vwmacc_vx_b) | ||
697 | +GEN_VEXT_VX(vwmacc_vx_h) | ||
698 | +GEN_VEXT_VX(vwmacc_vx_w) | ||
699 | +GEN_VEXT_VX(vwmaccsu_vx_b) | ||
700 | +GEN_VEXT_VX(vwmaccsu_vx_h) | ||
701 | +GEN_VEXT_VX(vwmaccsu_vx_w) | ||
702 | +GEN_VEXT_VX(vwmaccus_vx_b) | ||
703 | +GEN_VEXT_VX(vwmaccus_vx_h) | ||
704 | +GEN_VEXT_VX(vwmaccus_vx_w) | ||
705 | |||
706 | /* Vector Integer Merge and Move Instructions */ | ||
707 | #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ | ||
708 | @@ -XXX,XX +XXX,XX @@ vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, | ||
709 | static inline void | ||
710 | vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, | ||
711 | CPURISCVState *env, | ||
712 | - uint32_t desc, uint32_t esz, uint32_t dsz, | ||
713 | + uint32_t desc, | ||
714 | opivv2_rm_fn *fn) | ||
715 | { | ||
716 | uint32_t vm = vext_vm(desc); | ||
717 | @@ -XXX,XX +XXX,XX @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, | ||
718 | } | ||
719 | |||
720 | /* generate helpers for fixed point instructions with OPIVV format */ | ||
721 | -#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ | ||
722 | +#define GEN_VEXT_VV_RM(NAME) \ | ||
723 | void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
724 | CPURISCVState *env, uint32_t desc) \ | ||
725 | { \ | ||
726 | - vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ | ||
727 | + vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ | ||
728 | do_##NAME); \ | ||
729 | } | ||
730 | |||
731 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) | ||
732 | RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) | ||
733 | RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) | ||
734 | RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) | ||
735 | -GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) | ||
736 | -GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) | ||
737 | -GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) | ||
738 | -GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) | ||
739 | +GEN_VEXT_VV_RM(vsaddu_vv_b) | ||
740 | +GEN_VEXT_VV_RM(vsaddu_vv_h) | ||
741 | +GEN_VEXT_VV_RM(vsaddu_vv_w) | ||
742 | +GEN_VEXT_VV_RM(vsaddu_vv_d) | ||
743 | |||
744 | typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, | ||
745 | CPURISCVState *env, int vxrm); | ||
746 | @@ -XXX,XX +XXX,XX @@ vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, | ||
747 | static inline void | ||
748 | vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, | ||
749 | CPURISCVState *env, | ||
750 | - uint32_t desc, uint32_t esz, uint32_t dsz, | ||
751 | + uint32_t desc, | ||
752 | opivx2_rm_fn *fn) | ||
753 | { | ||
754 | uint32_t vm = vext_vm(desc); | ||
755 | @@ -XXX,XX +XXX,XX @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, | ||
756 | } | ||
757 | |||
758 | /* generate helpers for fixed point instructions with OPIVX format */ | ||
759 | -#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ | ||
760 | +#define GEN_VEXT_VX_RM(NAME) \ | ||
761 | void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
762 | void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
763 | { \ | ||
764 | - vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ | ||
765 | + vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ | ||
766 | do_##NAME); \ | ||
767 | } | ||
768 | |||
769 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) | ||
770 | RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) | ||
771 | RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) | ||
772 | RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) | ||
773 | -GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) | ||
774 | -GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) | ||
775 | -GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) | ||
776 | -GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) | ||
777 | +GEN_VEXT_VX_RM(vsaddu_vx_b) | ||
778 | +GEN_VEXT_VX_RM(vsaddu_vx_h) | ||
779 | +GEN_VEXT_VX_RM(vsaddu_vx_w) | ||
780 | +GEN_VEXT_VX_RM(vsaddu_vx_d) | ||
781 | |||
782 | static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
783 | { | ||
784 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) | ||
785 | RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) | ||
786 | RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) | ||
787 | RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) | ||
788 | -GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) | ||
789 | -GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) | ||
790 | -GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) | ||
791 | -GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) | ||
792 | +GEN_VEXT_VV_RM(vsadd_vv_b) | ||
793 | +GEN_VEXT_VV_RM(vsadd_vv_h) | ||
794 | +GEN_VEXT_VV_RM(vsadd_vv_w) | ||
795 | +GEN_VEXT_VV_RM(vsadd_vv_d) | ||
796 | |||
797 | RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) | ||
798 | RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) | ||
799 | RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) | ||
800 | RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) | ||
801 | -GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) | ||
802 | -GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) | ||
803 | -GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) | ||
804 | -GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) | ||
805 | +GEN_VEXT_VX_RM(vsadd_vx_b) | ||
806 | +GEN_VEXT_VX_RM(vsadd_vx_h) | ||
807 | +GEN_VEXT_VX_RM(vsadd_vx_w) | ||
808 | +GEN_VEXT_VX_RM(vsadd_vx_d) | ||
809 | |||
810 | static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) | ||
811 | { | ||
812 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) | ||
813 | RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) | ||
814 | RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) | ||
815 | RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) | ||
816 | -GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) | ||
817 | -GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) | ||
818 | -GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) | ||
819 | -GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) | ||
820 | +GEN_VEXT_VV_RM(vssubu_vv_b) | ||
821 | +GEN_VEXT_VV_RM(vssubu_vv_h) | ||
822 | +GEN_VEXT_VV_RM(vssubu_vv_w) | ||
823 | +GEN_VEXT_VV_RM(vssubu_vv_d) | ||
824 | |||
825 | RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) | ||
826 | RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) | ||
827 | RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) | ||
828 | RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) | ||
829 | -GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) | ||
830 | -GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) | ||
831 | -GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) | ||
832 | -GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) | ||
833 | +GEN_VEXT_VX_RM(vssubu_vx_b) | ||
834 | +GEN_VEXT_VX_RM(vssubu_vx_h) | ||
835 | +GEN_VEXT_VX_RM(vssubu_vx_w) | ||
836 | +GEN_VEXT_VX_RM(vssubu_vx_d) | ||
837 | |||
838 | static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
839 | { | ||
840 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) | ||
841 | RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) | ||
842 | RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) | ||
843 | RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) | ||
844 | -GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) | ||
845 | -GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) | ||
846 | -GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) | ||
847 | -GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) | ||
848 | +GEN_VEXT_VV_RM(vssub_vv_b) | ||
849 | +GEN_VEXT_VV_RM(vssub_vv_h) | ||
850 | +GEN_VEXT_VV_RM(vssub_vv_w) | ||
851 | +GEN_VEXT_VV_RM(vssub_vv_d) | ||
852 | |||
853 | RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) | ||
854 | RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) | ||
855 | RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) | ||
856 | RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) | ||
857 | -GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) | ||
858 | -GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) | ||
859 | -GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) | ||
860 | -GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) | ||
861 | +GEN_VEXT_VX_RM(vssub_vx_b) | ||
862 | +GEN_VEXT_VX_RM(vssub_vx_h) | ||
863 | +GEN_VEXT_VX_RM(vssub_vx_w) | ||
864 | +GEN_VEXT_VX_RM(vssub_vx_d) | ||
865 | |||
866 | /* Vector Single-Width Averaging Add and Subtract */ | ||
867 | static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) | ||
868 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) | ||
869 | RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) | ||
870 | RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) | ||
871 | RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) | ||
872 | -GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) | ||
873 | -GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) | ||
874 | -GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) | ||
875 | -GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) | ||
876 | +GEN_VEXT_VV_RM(vaadd_vv_b) | ||
877 | +GEN_VEXT_VV_RM(vaadd_vv_h) | ||
878 | +GEN_VEXT_VV_RM(vaadd_vv_w) | ||
879 | +GEN_VEXT_VV_RM(vaadd_vv_d) | ||
880 | |||
881 | RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) | ||
882 | RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) | ||
883 | RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) | ||
884 | RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) | ||
885 | -GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) | ||
886 | -GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) | ||
887 | -GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) | ||
888 | -GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) | ||
889 | +GEN_VEXT_VX_RM(vaadd_vx_b) | ||
890 | +GEN_VEXT_VX_RM(vaadd_vx_h) | ||
891 | +GEN_VEXT_VX_RM(vaadd_vx_w) | ||
892 | +GEN_VEXT_VX_RM(vaadd_vx_d) | ||
893 | |||
894 | static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, | ||
895 | uint32_t a, uint32_t b) | ||
896 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) | ||
897 | RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) | ||
898 | RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) | ||
899 | RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) | ||
900 | -GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) | ||
901 | -GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) | ||
902 | -GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) | ||
903 | -GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) | ||
904 | +GEN_VEXT_VV_RM(vaaddu_vv_b) | ||
905 | +GEN_VEXT_VV_RM(vaaddu_vv_h) | ||
906 | +GEN_VEXT_VV_RM(vaaddu_vv_w) | ||
907 | +GEN_VEXT_VV_RM(vaaddu_vv_d) | ||
908 | |||
909 | RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) | ||
910 | RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) | ||
911 | RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) | ||
912 | RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) | ||
913 | -GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) | ||
914 | -GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) | ||
915 | -GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) | ||
916 | -GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) | ||
917 | +GEN_VEXT_VX_RM(vaaddu_vx_b) | ||
918 | +GEN_VEXT_VX_RM(vaaddu_vx_h) | ||
919 | +GEN_VEXT_VX_RM(vaaddu_vx_w) | ||
920 | +GEN_VEXT_VX_RM(vaaddu_vx_d) | ||
921 | |||
922 | static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) | ||
923 | { | ||
924 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) | ||
925 | RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) | ||
926 | RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) | ||
927 | RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) | ||
928 | -GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) | ||
929 | -GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) | ||
930 | -GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) | ||
931 | -GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) | ||
932 | +GEN_VEXT_VV_RM(vasub_vv_b) | ||
933 | +GEN_VEXT_VV_RM(vasub_vv_h) | ||
934 | +GEN_VEXT_VV_RM(vasub_vv_w) | ||
935 | +GEN_VEXT_VV_RM(vasub_vv_d) | ||
936 | |||
937 | RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) | ||
938 | RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) | ||
939 | RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) | ||
940 | RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) | ||
941 | -GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) | ||
942 | -GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) | ||
943 | -GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) | ||
944 | -GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) | ||
945 | +GEN_VEXT_VX_RM(vasub_vx_b) | ||
946 | +GEN_VEXT_VX_RM(vasub_vx_h) | ||
947 | +GEN_VEXT_VX_RM(vasub_vx_w) | ||
948 | +GEN_VEXT_VX_RM(vasub_vx_d) | ||
949 | |||
950 | static inline uint32_t asubu32(CPURISCVState *env, int vxrm, | ||
951 | uint32_t a, uint32_t b) | ||
952 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) | ||
953 | RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) | ||
954 | RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) | ||
955 | RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) | ||
956 | -GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) | ||
957 | -GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) | ||
958 | -GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) | ||
959 | -GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) | ||
960 | +GEN_VEXT_VV_RM(vasubu_vv_b) | ||
961 | +GEN_VEXT_VV_RM(vasubu_vv_h) | ||
962 | +GEN_VEXT_VV_RM(vasubu_vv_w) | ||
963 | +GEN_VEXT_VV_RM(vasubu_vv_d) | ||
964 | |||
965 | RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) | ||
966 | RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) | ||
967 | RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) | ||
968 | RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) | ||
969 | -GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) | ||
970 | -GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) | ||
971 | -GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) | ||
972 | -GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) | ||
973 | +GEN_VEXT_VX_RM(vasubu_vx_b) | ||
974 | +GEN_VEXT_VX_RM(vasubu_vx_h) | ||
975 | +GEN_VEXT_VX_RM(vasubu_vx_w) | ||
976 | +GEN_VEXT_VX_RM(vasubu_vx_d) | ||
977 | |||
978 | /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ | ||
979 | static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
980 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) | ||
981 | RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) | ||
982 | RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) | ||
983 | RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) | ||
984 | -GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) | ||
985 | -GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) | ||
986 | -GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) | ||
987 | -GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) | ||
988 | +GEN_VEXT_VV_RM(vsmul_vv_b) | ||
989 | +GEN_VEXT_VV_RM(vsmul_vv_h) | ||
990 | +GEN_VEXT_VV_RM(vsmul_vv_w) | ||
991 | +GEN_VEXT_VV_RM(vsmul_vv_d) | ||
992 | |||
993 | RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) | ||
994 | RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) | ||
995 | RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) | ||
996 | RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) | ||
997 | -GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) | ||
998 | -GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) | ||
999 | -GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) | ||
1000 | -GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) | ||
1001 | +GEN_VEXT_VX_RM(vsmul_vx_b) | ||
1002 | +GEN_VEXT_VX_RM(vsmul_vx_h) | ||
1003 | +GEN_VEXT_VX_RM(vsmul_vx_w) | ||
1004 | +GEN_VEXT_VX_RM(vsmul_vx_d) | ||
1005 | |||
1006 | /* Vector Single-Width Scaling Shift Instructions */ | ||
1007 | static inline uint8_t | ||
1008 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) | ||
1009 | RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) | ||
1010 | RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) | ||
1011 | RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) | ||
1012 | -GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) | ||
1013 | -GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) | ||
1014 | -GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) | ||
1015 | -GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) | ||
1016 | +GEN_VEXT_VV_RM(vssrl_vv_b) | ||
1017 | +GEN_VEXT_VV_RM(vssrl_vv_h) | ||
1018 | +GEN_VEXT_VV_RM(vssrl_vv_w) | ||
1019 | +GEN_VEXT_VV_RM(vssrl_vv_d) | ||
1020 | |||
1021 | RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) | ||
1022 | RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) | ||
1023 | RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) | ||
1024 | RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) | ||
1025 | -GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) | ||
1026 | -GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) | ||
1027 | -GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) | ||
1028 | -GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) | ||
1029 | +GEN_VEXT_VX_RM(vssrl_vx_b) | ||
1030 | +GEN_VEXT_VX_RM(vssrl_vx_h) | ||
1031 | +GEN_VEXT_VX_RM(vssrl_vx_w) | ||
1032 | +GEN_VEXT_VX_RM(vssrl_vx_d) | ||
1033 | |||
1034 | static inline int8_t | ||
1035 | vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
1036 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) | ||
1037 | RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) | ||
1038 | RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) | ||
1039 | RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) | ||
1040 | -GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) | ||
1041 | -GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) | ||
1042 | -GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) | ||
1043 | -GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) | ||
1044 | +GEN_VEXT_VV_RM(vssra_vv_b) | ||
1045 | +GEN_VEXT_VV_RM(vssra_vv_h) | ||
1046 | +GEN_VEXT_VV_RM(vssra_vv_w) | ||
1047 | +GEN_VEXT_VV_RM(vssra_vv_d) | ||
1048 | |||
1049 | RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) | ||
1050 | RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) | ||
1051 | RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) | ||
1052 | RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) | ||
1053 | -GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) | ||
1054 | -GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) | ||
1055 | -GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) | ||
1056 | -GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) | ||
1057 | +GEN_VEXT_VX_RM(vssra_vx_b) | ||
1058 | +GEN_VEXT_VX_RM(vssra_vx_h) | ||
1059 | +GEN_VEXT_VX_RM(vssra_vx_w) | ||
1060 | +GEN_VEXT_VX_RM(vssra_vx_d) | ||
1061 | |||
1062 | /* Vector Narrowing Fixed-Point Clip Instructions */ | ||
1063 | static inline int8_t | ||
1064 | @@ -XXX,XX +XXX,XX @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) | ||
1065 | RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) | ||
1066 | RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) | ||
1067 | RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) | ||
1068 | -GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1) | ||
1069 | -GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2) | ||
1070 | -GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4) | ||
1071 | +GEN_VEXT_VV_RM(vnclip_wv_b) | ||
1072 | +GEN_VEXT_VV_RM(vnclip_wv_h) | ||
1073 | +GEN_VEXT_VV_RM(vnclip_wv_w) | ||
1074 | |||
1075 | RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) | ||
1076 | RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) | ||
1077 | RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) | ||
1078 | -GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1) | ||
1079 | -GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2) | ||
1080 | -GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4) | ||
1081 | +GEN_VEXT_VX_RM(vnclip_wx_b) | ||
1082 | +GEN_VEXT_VX_RM(vnclip_wx_h) | ||
1083 | +GEN_VEXT_VX_RM(vnclip_wx_w) | ||
1084 | |||
1085 | static inline uint8_t | ||
1086 | vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) | ||
1087 | @@ -XXX,XX +XXX,XX @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) | ||
1088 | RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) | ||
1089 | RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) | ||
1090 | RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) | ||
1091 | -GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1) | ||
1092 | -GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2) | ||
1093 | -GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4) | ||
1094 | +GEN_VEXT_VV_RM(vnclipu_wv_b) | ||
1095 | +GEN_VEXT_VV_RM(vnclipu_wv_h) | ||
1096 | +GEN_VEXT_VV_RM(vnclipu_wv_w) | ||
1097 | |||
1098 | RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) | ||
1099 | RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) | ||
1100 | RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) | ||
1101 | -GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1) | ||
1102 | -GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2) | ||
1103 | -GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4) | ||
1104 | +GEN_VEXT_VX_RM(vnclipu_wx_b) | ||
1105 | +GEN_VEXT_VX_RM(vnclipu_wx_h) | ||
1106 | +GEN_VEXT_VX_RM(vnclipu_wx_w) | ||
1107 | |||
1108 | /* | ||
1109 | *** Vector Float Point Arithmetic Instructions | ||
1110 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ | ||
1111 | *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ | ||
1112 | } | ||
1113 | |||
1114 | -#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ | ||
1115 | +#define GEN_VEXT_VV_ENV(NAME) \ | ||
1116 | void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
1117 | void *vs2, CPURISCVState *env, \ | ||
1118 | uint32_t desc) \ | ||
1119 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
1120 | RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) | ||
1121 | RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) | ||
1122 | RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) | ||
1123 | -GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) | ||
1124 | -GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) | ||
1125 | -GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) | ||
1126 | +GEN_VEXT_VV_ENV(vfadd_vv_h) | ||
1127 | +GEN_VEXT_VV_ENV(vfadd_vv_w) | ||
1128 | +GEN_VEXT_VV_ENV(vfadd_vv_d) | ||
1129 | |||
1130 | #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
1131 | static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
1132 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
1133 | *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ | ||
1134 | } | ||
1135 | |||
1136 | -#define GEN_VEXT_VF(NAME, ESZ, DSZ) \ | ||
1137 | +#define GEN_VEXT_VF(NAME) \ | ||
1138 | void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ | ||
1139 | void *vs2, CPURISCVState *env, \ | ||
1140 | uint32_t desc) \ | ||
1141 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ | ||
1142 | RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) | ||
1143 | RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) | ||
1144 | RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) | ||
1145 | -GEN_VEXT_VF(vfadd_vf_h, 2, 2) | ||
1146 | -GEN_VEXT_VF(vfadd_vf_w, 4, 4) | ||
1147 | -GEN_VEXT_VF(vfadd_vf_d, 8, 8) | ||
1148 | +GEN_VEXT_VF(vfadd_vf_h) | ||
1149 | +GEN_VEXT_VF(vfadd_vf_w) | ||
1150 | +GEN_VEXT_VF(vfadd_vf_d) | ||
1151 | |||
1152 | RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) | ||
1153 | RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) | ||
1154 | RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) | ||
1155 | -GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) | ||
1156 | -GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) | ||
1157 | -GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) | ||
1158 | +GEN_VEXT_VV_ENV(vfsub_vv_h) | ||
1159 | +GEN_VEXT_VV_ENV(vfsub_vv_w) | ||
1160 | +GEN_VEXT_VV_ENV(vfsub_vv_d) | ||
1161 | RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) | ||
1162 | RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) | ||
1163 | RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) | ||
1164 | -GEN_VEXT_VF(vfsub_vf_h, 2, 2) | ||
1165 | -GEN_VEXT_VF(vfsub_vf_w, 4, 4) | ||
1166 | -GEN_VEXT_VF(vfsub_vf_d, 8, 8) | ||
1167 | +GEN_VEXT_VF(vfsub_vf_h) | ||
1168 | +GEN_VEXT_VF(vfsub_vf_w) | ||
1169 | +GEN_VEXT_VF(vfsub_vf_d) | ||
1170 | |||
1171 | static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) | ||
1172 | { | ||
1173 | @@ -XXX,XX +XXX,XX @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) | ||
1174 | RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) | ||
1175 | RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) | ||
1176 | RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) | ||
1177 | -GEN_VEXT_VF(vfrsub_vf_h, 2, 2) | ||
1178 | -GEN_VEXT_VF(vfrsub_vf_w, 4, 4) | ||
1179 | -GEN_VEXT_VF(vfrsub_vf_d, 8, 8) | ||
1180 | +GEN_VEXT_VF(vfrsub_vf_h) | ||
1181 | +GEN_VEXT_VF(vfrsub_vf_w) | ||
1182 | +GEN_VEXT_VF(vfrsub_vf_d) | ||
1183 | |||
1184 | /* Vector Widening Floating-Point Add/Subtract Instructions */ | ||
1185 | static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) | ||
1186 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) | ||
1187 | |||
1188 | RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) | ||
1189 | RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) | ||
1190 | -GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) | ||
1191 | -GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) | ||
1192 | +GEN_VEXT_VV_ENV(vfwadd_vv_h) | ||
1193 | +GEN_VEXT_VV_ENV(vfwadd_vv_w) | ||
1194 | RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) | ||
1195 | RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) | ||
1196 | -GEN_VEXT_VF(vfwadd_vf_h, 2, 4) | ||
1197 | -GEN_VEXT_VF(vfwadd_vf_w, 4, 8) | ||
1198 | +GEN_VEXT_VF(vfwadd_vf_h) | ||
1199 | +GEN_VEXT_VF(vfwadd_vf_w) | ||
1200 | |||
1201 | static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) | ||
1202 | { | ||
1203 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) | ||
1204 | |||
1205 | RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) | ||
1206 | RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) | ||
1207 | -GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) | ||
1208 | -GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) | ||
1209 | +GEN_VEXT_VV_ENV(vfwsub_vv_h) | ||
1210 | +GEN_VEXT_VV_ENV(vfwsub_vv_w) | ||
1211 | RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) | ||
1212 | RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) | ||
1213 | -GEN_VEXT_VF(vfwsub_vf_h, 2, 4) | ||
1214 | -GEN_VEXT_VF(vfwsub_vf_w, 4, 8) | ||
1215 | +GEN_VEXT_VF(vfwsub_vf_h) | ||
1216 | +GEN_VEXT_VF(vfwsub_vf_w) | ||
1217 | |||
1218 | static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) | ||
1219 | { | ||
1220 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) | ||
1221 | |||
1222 | RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) | ||
1223 | RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) | ||
1224 | -GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) | ||
1225 | -GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) | ||
1226 | +GEN_VEXT_VV_ENV(vfwadd_wv_h) | ||
1227 | +GEN_VEXT_VV_ENV(vfwadd_wv_w) | ||
1228 | RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) | ||
1229 | RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) | ||
1230 | -GEN_VEXT_VF(vfwadd_wf_h, 2, 4) | ||
1231 | -GEN_VEXT_VF(vfwadd_wf_w, 4, 8) | ||
1232 | +GEN_VEXT_VF(vfwadd_wf_h) | ||
1233 | +GEN_VEXT_VF(vfwadd_wf_w) | ||
1234 | |||
1235 | static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) | ||
1236 | { | ||
1237 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) | ||
1238 | |||
1239 | RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) | ||
1240 | RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) | ||
1241 | -GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) | ||
1242 | -GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) | ||
1243 | +GEN_VEXT_VV_ENV(vfwsub_wv_h) | ||
1244 | +GEN_VEXT_VV_ENV(vfwsub_wv_w) | ||
1245 | RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) | ||
1246 | RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) | ||
1247 | -GEN_VEXT_VF(vfwsub_wf_h, 2, 4) | ||
1248 | -GEN_VEXT_VF(vfwsub_wf_w, 4, 8) | ||
1249 | +GEN_VEXT_VF(vfwsub_wf_h) | ||
1250 | +GEN_VEXT_VF(vfwsub_wf_w) | ||
1251 | |||
1252 | /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ | ||
1253 | RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) | ||
1254 | RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) | ||
1255 | RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) | ||
1256 | -GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) | ||
1257 | -GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) | ||
1258 | -GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) | ||
1259 | +GEN_VEXT_VV_ENV(vfmul_vv_h) | ||
1260 | +GEN_VEXT_VV_ENV(vfmul_vv_w) | ||
1261 | +GEN_VEXT_VV_ENV(vfmul_vv_d) | ||
1262 | RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) | ||
1263 | RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) | ||
1264 | RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) | ||
1265 | -GEN_VEXT_VF(vfmul_vf_h, 2, 2) | ||
1266 | -GEN_VEXT_VF(vfmul_vf_w, 4, 4) | ||
1267 | -GEN_VEXT_VF(vfmul_vf_d, 8, 8) | ||
1268 | +GEN_VEXT_VF(vfmul_vf_h) | ||
1269 | +GEN_VEXT_VF(vfmul_vf_w) | ||
1270 | +GEN_VEXT_VF(vfmul_vf_d) | ||
1271 | |||
1272 | RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) | ||
1273 | RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) | ||
1274 | RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) | ||
1275 | -GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) | ||
1276 | -GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) | ||
1277 | -GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) | ||
1278 | +GEN_VEXT_VV_ENV(vfdiv_vv_h) | ||
1279 | +GEN_VEXT_VV_ENV(vfdiv_vv_w) | ||
1280 | +GEN_VEXT_VV_ENV(vfdiv_vv_d) | ||
1281 | RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) | ||
1282 | RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) | ||
1283 | RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) | ||
1284 | -GEN_VEXT_VF(vfdiv_vf_h, 2, 2) | ||
1285 | -GEN_VEXT_VF(vfdiv_vf_w, 4, 4) | ||
1286 | -GEN_VEXT_VF(vfdiv_vf_d, 8, 8) | ||
1287 | +GEN_VEXT_VF(vfdiv_vf_h) | ||
1288 | +GEN_VEXT_VF(vfdiv_vf_w) | ||
1289 | +GEN_VEXT_VF(vfdiv_vf_d) | ||
1290 | |||
1291 | static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) | ||
1292 | { | ||
1293 | @@ -XXX,XX +XXX,XX @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) | ||
1294 | RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) | ||
1295 | RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) | ||
1296 | RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) | ||
1297 | -GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) | ||
1298 | -GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) | ||
1299 | -GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) | ||
1300 | +GEN_VEXT_VF(vfrdiv_vf_h) | ||
1301 | +GEN_VEXT_VF(vfrdiv_vf_w) | ||
1302 | +GEN_VEXT_VF(vfrdiv_vf_d) | ||
1303 | |||
1304 | /* Vector Widening Floating-Point Multiply */ | ||
1305 | static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) | ||
1306 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) | ||
1307 | } | ||
1308 | RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) | ||
1309 | RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) | ||
1310 | -GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) | ||
1311 | -GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) | ||
1312 | +GEN_VEXT_VV_ENV(vfwmul_vv_h) | ||
1313 | +GEN_VEXT_VV_ENV(vfwmul_vv_w) | ||
1314 | RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) | ||
1315 | RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) | ||
1316 | -GEN_VEXT_VF(vfwmul_vf_h, 2, 4) | ||
1317 | -GEN_VEXT_VF(vfwmul_vf_w, 4, 8) | ||
1318 | +GEN_VEXT_VF(vfwmul_vf_h) | ||
1319 | +GEN_VEXT_VF(vfwmul_vf_w) | ||
1320 | |||
1321 | /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ | ||
1322 | #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
1323 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1324 | RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) | ||
1325 | RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) | ||
1326 | RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) | ||
1327 | -GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) | ||
1328 | -GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) | ||
1329 | -GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) | ||
1330 | +GEN_VEXT_VV_ENV(vfmacc_vv_h) | ||
1331 | +GEN_VEXT_VV_ENV(vfmacc_vv_w) | ||
1332 | +GEN_VEXT_VV_ENV(vfmacc_vv_d) | ||
1333 | |||
1334 | #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
1335 | static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
1336 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
1337 | RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) | ||
1338 | RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) | ||
1339 | RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) | ||
1340 | -GEN_VEXT_VF(vfmacc_vf_h, 2, 2) | ||
1341 | -GEN_VEXT_VF(vfmacc_vf_w, 4, 4) | ||
1342 | -GEN_VEXT_VF(vfmacc_vf_d, 8, 8) | ||
1343 | +GEN_VEXT_VF(vfmacc_vf_h) | ||
1344 | +GEN_VEXT_VF(vfmacc_vf_w) | ||
1345 | +GEN_VEXT_VF(vfmacc_vf_d) | ||
1346 | |||
1347 | static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1348 | { | ||
1349 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1350 | RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) | ||
1351 | RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) | ||
1352 | RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) | ||
1353 | -GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) | ||
1354 | -GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) | ||
1355 | -GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) | ||
1356 | +GEN_VEXT_VV_ENV(vfnmacc_vv_h) | ||
1357 | +GEN_VEXT_VV_ENV(vfnmacc_vv_w) | ||
1358 | +GEN_VEXT_VV_ENV(vfnmacc_vv_d) | ||
1359 | RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) | ||
1360 | RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) | ||
1361 | RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) | ||
1362 | -GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) | ||
1363 | -GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) | ||
1364 | -GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) | ||
1365 | +GEN_VEXT_VF(vfnmacc_vf_h) | ||
1366 | +GEN_VEXT_VF(vfnmacc_vf_w) | ||
1367 | +GEN_VEXT_VF(vfnmacc_vf_d) | ||
1368 | |||
1369 | static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1370 | { | ||
1371 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1372 | RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) | ||
1373 | RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) | ||
1374 | RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) | ||
1375 | -GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) | ||
1376 | -GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) | ||
1377 | -GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) | ||
1378 | +GEN_VEXT_VV_ENV(vfmsac_vv_h) | ||
1379 | +GEN_VEXT_VV_ENV(vfmsac_vv_w) | ||
1380 | +GEN_VEXT_VV_ENV(vfmsac_vv_d) | ||
1381 | RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) | ||
1382 | RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) | ||
1383 | RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) | ||
1384 | -GEN_VEXT_VF(vfmsac_vf_h, 2, 2) | ||
1385 | -GEN_VEXT_VF(vfmsac_vf_w, 4, 4) | ||
1386 | -GEN_VEXT_VF(vfmsac_vf_d, 8, 8) | ||
1387 | +GEN_VEXT_VF(vfmsac_vf_h) | ||
1388 | +GEN_VEXT_VF(vfmsac_vf_w) | ||
1389 | +GEN_VEXT_VF(vfmsac_vf_d) | ||
1390 | |||
1391 | static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1392 | { | ||
1393 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1394 | RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) | ||
1395 | RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) | ||
1396 | RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) | ||
1397 | -GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) | ||
1398 | -GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) | ||
1399 | -GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) | ||
1400 | +GEN_VEXT_VV_ENV(vfnmsac_vv_h) | ||
1401 | +GEN_VEXT_VV_ENV(vfnmsac_vv_w) | ||
1402 | +GEN_VEXT_VV_ENV(vfnmsac_vv_d) | ||
1403 | RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) | ||
1404 | RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) | ||
1405 | RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) | ||
1406 | -GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) | ||
1407 | -GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) | ||
1408 | -GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) | ||
1409 | +GEN_VEXT_VF(vfnmsac_vf_h) | ||
1410 | +GEN_VEXT_VF(vfnmsac_vf_w) | ||
1411 | +GEN_VEXT_VF(vfnmsac_vf_d) | ||
1412 | |||
1413 | static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1414 | { | ||
1415 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1416 | RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) | ||
1417 | RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) | ||
1418 | RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) | ||
1419 | -GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) | ||
1420 | -GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) | ||
1421 | -GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) | ||
1422 | +GEN_VEXT_VV_ENV(vfmadd_vv_h) | ||
1423 | +GEN_VEXT_VV_ENV(vfmadd_vv_w) | ||
1424 | +GEN_VEXT_VV_ENV(vfmadd_vv_d) | ||
1425 | RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) | ||
1426 | RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) | ||
1427 | RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) | ||
1428 | -GEN_VEXT_VF(vfmadd_vf_h, 2, 2) | ||
1429 | -GEN_VEXT_VF(vfmadd_vf_w, 4, 4) | ||
1430 | -GEN_VEXT_VF(vfmadd_vf_d, 8, 8) | ||
1431 | +GEN_VEXT_VF(vfmadd_vf_h) | ||
1432 | +GEN_VEXT_VF(vfmadd_vf_w) | ||
1433 | +GEN_VEXT_VF(vfmadd_vf_d) | ||
1434 | |||
1435 | static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1436 | { | ||
1437 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1438 | RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) | ||
1439 | RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) | ||
1440 | RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) | ||
1441 | -GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) | ||
1442 | -GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) | ||
1443 | -GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) | ||
1444 | +GEN_VEXT_VV_ENV(vfnmadd_vv_h) | ||
1445 | +GEN_VEXT_VV_ENV(vfnmadd_vv_w) | ||
1446 | +GEN_VEXT_VV_ENV(vfnmadd_vv_d) | ||
1447 | RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) | ||
1448 | RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) | ||
1449 | RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) | ||
1450 | -GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) | ||
1451 | -GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) | ||
1452 | -GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) | ||
1453 | +GEN_VEXT_VF(vfnmadd_vf_h) | ||
1454 | +GEN_VEXT_VF(vfnmadd_vf_w) | ||
1455 | +GEN_VEXT_VF(vfnmadd_vf_d) | ||
1456 | |||
1457 | static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1458 | { | ||
1459 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1460 | RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) | ||
1461 | RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) | ||
1462 | RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) | ||
1463 | -GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) | ||
1464 | -GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) | ||
1465 | -GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) | ||
1466 | +GEN_VEXT_VV_ENV(vfmsub_vv_h) | ||
1467 | +GEN_VEXT_VV_ENV(vfmsub_vv_w) | ||
1468 | +GEN_VEXT_VV_ENV(vfmsub_vv_d) | ||
1469 | RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) | ||
1470 | RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) | ||
1471 | RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) | ||
1472 | -GEN_VEXT_VF(vfmsub_vf_h, 2, 2) | ||
1473 | -GEN_VEXT_VF(vfmsub_vf_w, 4, 4) | ||
1474 | -GEN_VEXT_VF(vfmsub_vf_d, 8, 8) | ||
1475 | +GEN_VEXT_VF(vfmsub_vf_h) | ||
1476 | +GEN_VEXT_VF(vfmsub_vf_w) | ||
1477 | +GEN_VEXT_VF(vfmsub_vf_d) | ||
1478 | |||
1479 | static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
1480 | { | ||
1481 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
1482 | RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) | ||
1483 | RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) | ||
1484 | RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) | ||
1485 | -GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) | ||
1486 | -GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) | ||
1487 | -GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) | ||
1488 | +GEN_VEXT_VV_ENV(vfnmsub_vv_h) | ||
1489 | +GEN_VEXT_VV_ENV(vfnmsub_vv_w) | ||
1490 | +GEN_VEXT_VV_ENV(vfnmsub_vv_d) | ||
1491 | RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) | ||
1492 | RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) | ||
1493 | RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) | ||
1494 | -GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) | ||
1495 | -GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) | ||
1496 | -GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) | ||
1497 | +GEN_VEXT_VF(vfnmsub_vf_h) | ||
1498 | +GEN_VEXT_VF(vfnmsub_vf_w) | ||
1499 | +GEN_VEXT_VF(vfnmsub_vf_d) | ||
1500 | |||
1501 | /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ | ||
1502 | static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
1503 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
1504 | |||
1505 | RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) | ||
1506 | RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) | ||
1507 | -GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) | ||
1508 | -GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) | ||
1509 | +GEN_VEXT_VV_ENV(vfwmacc_vv_h) | ||
1510 | +GEN_VEXT_VV_ENV(vfwmacc_vv_w) | ||
1511 | RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) | ||
1512 | RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) | ||
1513 | -GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) | ||
1514 | -GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) | ||
1515 | +GEN_VEXT_VF(vfwmacc_vf_h) | ||
1516 | +GEN_VEXT_VF(vfwmacc_vf_w) | ||
1517 | |||
1518 | static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
1519 | { | ||
1520 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
1521 | |||
1522 | RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) | ||
1523 | RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) | ||
1524 | -GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) | ||
1525 | -GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) | ||
1526 | +GEN_VEXT_VV_ENV(vfwnmacc_vv_h) | ||
1527 | +GEN_VEXT_VV_ENV(vfwnmacc_vv_w) | ||
1528 | RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) | ||
1529 | RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) | ||
1530 | -GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) | ||
1531 | -GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) | ||
1532 | +GEN_VEXT_VF(vfwnmacc_vf_h) | ||
1533 | +GEN_VEXT_VF(vfwnmacc_vf_w) | ||
1534 | |||
1535 | static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
1536 | { | ||
1537 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
1538 | |||
1539 | RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) | ||
1540 | RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) | ||
1541 | -GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) | ||
1542 | -GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) | ||
1543 | +GEN_VEXT_VV_ENV(vfwmsac_vv_h) | ||
1544 | +GEN_VEXT_VV_ENV(vfwmsac_vv_w) | ||
1545 | RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) | ||
1546 | RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) | ||
1547 | -GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) | ||
1548 | -GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) | ||
1549 | +GEN_VEXT_VF(vfwmsac_vf_h) | ||
1550 | +GEN_VEXT_VF(vfwmsac_vf_w) | ||
1551 | |||
1552 | static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
1553 | { | ||
1554 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
1555 | |||
1556 | RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) | ||
1557 | RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) | ||
1558 | -GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) | ||
1559 | -GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) | ||
1560 | +GEN_VEXT_VV_ENV(vfwnmsac_vv_h) | ||
1561 | +GEN_VEXT_VV_ENV(vfwnmsac_vv_w) | ||
1562 | RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) | ||
1563 | RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) | ||
1564 | -GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) | ||
1565 | -GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) | ||
1566 | +GEN_VEXT_VF(vfwnmsac_vf_h) | ||
1567 | +GEN_VEXT_VF(vfwnmsac_vf_w) | ||
1568 | |||
1569 | /* Vector Floating-Point Square-Root Instruction */ | ||
1570 | /* (TD, T2, TX2) */ | ||
1571 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i, \ | ||
1572 | *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ | ||
1573 | } | ||
1574 | |||
1575 | -#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ | ||
1576 | +#define GEN_VEXT_V_ENV(NAME) \ | ||
1577 | void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
1578 | CPURISCVState *env, uint32_t desc) \ | ||
1579 | { \ | ||
1580 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
1581 | RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) | ||
1582 | RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) | ||
1583 | RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) | ||
1584 | -GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) | ||
1585 | -GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) | ||
1586 | -GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) | ||
1587 | +GEN_VEXT_V_ENV(vfsqrt_v_h) | ||
1588 | +GEN_VEXT_V_ENV(vfsqrt_v_w) | ||
1589 | +GEN_VEXT_V_ENV(vfsqrt_v_d) | ||
1590 | |||
1591 | /* | ||
1592 | * Vector Floating-Point Reciprocal Square-Root Estimate Instruction | ||
1593 | @@ -XXX,XX +XXX,XX @@ static float64 frsqrt7_d(float64 f, float_status *s) | ||
1594 | RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) | ||
1595 | RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) | ||
1596 | RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) | ||
1597 | -GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2) | ||
1598 | -GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4) | ||
1599 | -GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8) | ||
1600 | +GEN_VEXT_V_ENV(vfrsqrt7_v_h) | ||
1601 | +GEN_VEXT_V_ENV(vfrsqrt7_v_w) | ||
1602 | +GEN_VEXT_V_ENV(vfrsqrt7_v_d) | ||
1603 | |||
1604 | /* | ||
1605 | * Vector Floating-Point Reciprocal Estimate Instruction | ||
1606 | @@ -XXX,XX +XXX,XX @@ static float64 frec7_d(float64 f, float_status *s) | ||
1607 | RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) | ||
1608 | RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) | ||
1609 | RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) | ||
1610 | -GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2) | ||
1611 | -GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4) | ||
1612 | -GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8) | ||
1613 | +GEN_VEXT_V_ENV(vfrec7_v_h) | ||
1614 | +GEN_VEXT_V_ENV(vfrec7_v_w) | ||
1615 | +GEN_VEXT_V_ENV(vfrec7_v_d) | ||
1616 | |||
1617 | /* Vector Floating-Point MIN/MAX Instructions */ | ||
1618 | RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) | ||
1619 | RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) | ||
1620 | RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) | ||
1621 | -GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) | ||
1622 | -GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) | ||
1623 | -GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) | ||
1624 | +GEN_VEXT_VV_ENV(vfmin_vv_h) | ||
1625 | +GEN_VEXT_VV_ENV(vfmin_vv_w) | ||
1626 | +GEN_VEXT_VV_ENV(vfmin_vv_d) | ||
1627 | RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) | ||
1628 | RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) | ||
1629 | RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) | ||
1630 | -GEN_VEXT_VF(vfmin_vf_h, 2, 2) | ||
1631 | -GEN_VEXT_VF(vfmin_vf_w, 4, 4) | ||
1632 | -GEN_VEXT_VF(vfmin_vf_d, 8, 8) | ||
1633 | +GEN_VEXT_VF(vfmin_vf_h) | ||
1634 | +GEN_VEXT_VF(vfmin_vf_w) | ||
1635 | +GEN_VEXT_VF(vfmin_vf_d) | ||
1636 | |||
1637 | RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) | ||
1638 | RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) | ||
1639 | RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) | ||
1640 | -GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) | ||
1641 | -GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) | ||
1642 | -GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) | ||
1643 | +GEN_VEXT_VV_ENV(vfmax_vv_h) | ||
1644 | +GEN_VEXT_VV_ENV(vfmax_vv_w) | ||
1645 | +GEN_VEXT_VV_ENV(vfmax_vv_d) | ||
1646 | RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) | ||
1647 | RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) | ||
1648 | RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) | ||
1649 | -GEN_VEXT_VF(vfmax_vf_h, 2, 2) | ||
1650 | -GEN_VEXT_VF(vfmax_vf_w, 4, 4) | ||
1651 | -GEN_VEXT_VF(vfmax_vf_d, 8, 8) | ||
1652 | +GEN_VEXT_VF(vfmax_vf_h) | ||
1653 | +GEN_VEXT_VF(vfmax_vf_w) | ||
1654 | +GEN_VEXT_VF(vfmax_vf_d) | ||
1655 | |||
1656 | /* Vector Floating-Point Sign-Injection Instructions */ | ||
1657 | static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) | ||
1658 | @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) | ||
1659 | RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) | ||
1660 | RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) | ||
1661 | RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) | ||
1662 | -GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) | ||
1663 | -GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) | ||
1664 | -GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) | ||
1665 | +GEN_VEXT_VV_ENV(vfsgnj_vv_h) | ||
1666 | +GEN_VEXT_VV_ENV(vfsgnj_vv_w) | ||
1667 | +GEN_VEXT_VV_ENV(vfsgnj_vv_d) | ||
1668 | RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) | ||
1669 | RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) | ||
1670 | RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) | ||
1671 | -GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) | ||
1672 | -GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) | ||
1673 | -GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) | ||
1674 | +GEN_VEXT_VF(vfsgnj_vf_h) | ||
1675 | +GEN_VEXT_VF(vfsgnj_vf_w) | ||
1676 | +GEN_VEXT_VF(vfsgnj_vf_d) | ||
1677 | |||
1678 | static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) | ||
1679 | { | ||
1680 | @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) | ||
1681 | RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) | ||
1682 | RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) | ||
1683 | RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) | ||
1684 | -GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) | ||
1685 | -GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) | ||
1686 | -GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) | ||
1687 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_h) | ||
1688 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_w) | ||
1689 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_d) | ||
1690 | RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) | ||
1691 | RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) | ||
1692 | RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) | ||
1693 | -GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) | ||
1694 | -GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) | ||
1695 | -GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) | ||
1696 | +GEN_VEXT_VF(vfsgnjn_vf_h) | ||
1697 | +GEN_VEXT_VF(vfsgnjn_vf_w) | ||
1698 | +GEN_VEXT_VF(vfsgnjn_vf_d) | ||
1699 | |||
1700 | static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) | ||
1701 | { | ||
1702 | @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) | ||
1703 | RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) | ||
1704 | RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) | ||
1705 | RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) | ||
1706 | -GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) | ||
1707 | -GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) | ||
1708 | -GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) | ||
1709 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_h) | ||
1710 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_w) | ||
1711 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_d) | ||
1712 | RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) | ||
1713 | RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) | ||
1714 | RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) | ||
1715 | -GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) | ||
1716 | -GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) | ||
1717 | -GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) | ||
1718 | +GEN_VEXT_VF(vfsgnjx_vf_h) | ||
1719 | +GEN_VEXT_VF(vfsgnjx_vf_w) | ||
1720 | +GEN_VEXT_VF(vfsgnjx_vf_d) | ||
1721 | |||
1722 | /* Vector Floating-Point Compare Instructions */ | ||
1723 | #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ | ||
1724 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i) \ | ||
1725 | *((TD *)vd + HD(i)) = OP(s2); \ | ||
1726 | } | ||
1727 | |||
1728 | -#define GEN_VEXT_V(NAME, ESZ, DSZ) \ | ||
1729 | +#define GEN_VEXT_V(NAME) \ | ||
1730 | void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
1731 | CPURISCVState *env, uint32_t desc) \ | ||
1732 | { \ | ||
1733 | @@ -XXX,XX +XXX,XX @@ target_ulong fclass_d(uint64_t frs1) | ||
1734 | RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) | ||
1735 | RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) | ||
1736 | RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) | ||
1737 | -GEN_VEXT_V(vfclass_v_h, 2, 2) | ||
1738 | -GEN_VEXT_V(vfclass_v_w, 4, 4) | ||
1739 | -GEN_VEXT_V(vfclass_v_d, 8, 8) | ||
1740 | +GEN_VEXT_V(vfclass_v_h) | ||
1741 | +GEN_VEXT_V(vfclass_v_w) | ||
1742 | +GEN_VEXT_V(vfclass_v_d) | ||
1743 | |||
1744 | /* Vector Floating-Point Merge Instruction */ | ||
1745 | #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ | ||
1746 | @@ -XXX,XX +XXX,XX @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) | ||
1747 | RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) | ||
1748 | RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) | ||
1749 | RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) | ||
1750 | -GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) | ||
1751 | -GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) | ||
1752 | -GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) | ||
1753 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) | ||
1754 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) | ||
1755 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) | ||
1756 | |||
1757 | /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ | ||
1758 | RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) | ||
1759 | RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) | ||
1760 | RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) | ||
1761 | -GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) | ||
1762 | -GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) | ||
1763 | -GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) | ||
1764 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_h) | ||
1765 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_w) | ||
1766 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_d) | ||
1767 | |||
1768 | /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ | ||
1769 | RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) | ||
1770 | RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) | ||
1771 | RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) | ||
1772 | -GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) | ||
1773 | -GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) | ||
1774 | -GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) | ||
1775 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) | ||
1776 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) | ||
1777 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) | ||
1778 | |||
1779 | /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ | ||
1780 | RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) | ||
1781 | RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) | ||
1782 | RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) | ||
1783 | -GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) | ||
1784 | -GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) | ||
1785 | -GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) | ||
1786 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_h) | ||
1787 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_w) | ||
1788 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_d) | ||
1789 | |||
1790 | /* Widening Floating-Point/Integer Type-Convert Instructions */ | ||
1791 | /* (TD, T2, TX2) */ | ||
1792 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) | ||
1793 | /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ | ||
1794 | RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) | ||
1795 | RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) | ||
1796 | -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) | ||
1797 | -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) | ||
1798 | +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) | ||
1799 | +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) | ||
1800 | |||
1801 | /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ | ||
1802 | RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) | ||
1803 | RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) | ||
1804 | -GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) | ||
1805 | -GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) | ||
1806 | +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) | ||
1807 | +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) | ||
1808 | |||
1809 | /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ | ||
1810 | RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) | ||
1811 | RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) | ||
1812 | RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) | ||
1813 | -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2) | ||
1814 | -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) | ||
1815 | -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) | ||
1816 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) | ||
1817 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) | ||
1818 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) | ||
1819 | |||
1820 | /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ | ||
1821 | RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) | ||
1822 | RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) | ||
1823 | RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) | ||
1824 | -GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2) | ||
1825 | -GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) | ||
1826 | -GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) | ||
1827 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) | ||
1828 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) | ||
1829 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) | ||
1830 | |||
1831 | /* | ||
1832 | * vfwcvt.f.f.v vd, vs2, vm | ||
1833 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfwcvtffv16(uint16_t a, float_status *s) | ||
1834 | |||
1835 | RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) | ||
1836 | RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) | ||
1837 | -GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) | ||
1838 | -GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) | ||
1839 | +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) | ||
1840 | +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) | ||
1841 | |||
1842 | /* Narrowing Floating-Point/Integer Type-Convert Instructions */ | ||
1843 | /* (TD, T2, TX2) */ | ||
1844 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) | ||
1845 | RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) | ||
1846 | RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) | ||
1847 | RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) | ||
1848 | -GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1) | ||
1849 | -GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2) | ||
1850 | -GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4) | ||
1851 | +GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) | ||
1852 | +GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) | ||
1853 | +GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) | ||
1854 | |||
1855 | /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ | ||
1856 | RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) | ||
1857 | RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) | ||
1858 | RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) | ||
1859 | -GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1) | ||
1860 | -GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2) | ||
1861 | -GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4) | ||
1862 | +GEN_VEXT_V_ENV(vfncvt_x_f_w_b) | ||
1863 | +GEN_VEXT_V_ENV(vfncvt_x_f_w_h) | ||
1864 | +GEN_VEXT_V_ENV(vfncvt_x_f_w_w) | ||
1865 | |||
1866 | /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ | ||
1867 | RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) | ||
1868 | RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) | ||
1869 | -GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2) | ||
1870 | -GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4) | ||
1871 | +GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) | ||
1872 | +GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) | ||
1873 | |||
1874 | /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ | ||
1875 | RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) | ||
1876 | RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) | ||
1877 | -GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2) | ||
1878 | -GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4) | ||
1879 | +GEN_VEXT_V_ENV(vfncvt_f_x_w_h) | ||
1880 | +GEN_VEXT_V_ENV(vfncvt_f_x_w_w) | ||
1881 | |||
1882 | /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ | ||
1883 | static uint16_t vfncvtffv16(uint32_t a, float_status *s) | ||
1884 | @@ -XXX,XX +XXX,XX @@ static uint16_t vfncvtffv16(uint32_t a, float_status *s) | ||
1885 | |||
1886 | RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) | ||
1887 | RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) | ||
1888 | -GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2) | ||
1889 | -GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4) | ||
1890 | +GEN_VEXT_V_ENV(vfncvt_f_f_w_h) | ||
1891 | +GEN_VEXT_V_ENV(vfncvt_f_f_w_w) | ||
1892 | |||
1893 | /* | ||
1894 | *** Vector Reduction Operations | ||
1895 | -- | ||
1896 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: eopXD <yueh.ting.chen@gmail.com> | ||
1 | 2 | ||
3 | No functional change intended in this commit. | ||
4 | |||
5 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
6 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
7 | Message-Id: <165449614532.19704.7000832880482980398-2@git.sr.ht> | ||
8 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
9 | --- | ||
10 | target/riscv/vector_helper.c | 35 ++++++++++++++++------------------- | ||
11 | 1 file changed, 16 insertions(+), 19 deletions(-) | ||
12 | |||
13 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/riscv/vector_helper.c | ||
16 | +++ b/target/riscv/vector_helper.c | ||
17 | @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, | ||
18 | target_ulong stride, CPURISCVState *env, | ||
19 | uint32_t desc, uint32_t vm, | ||
20 | vext_ldst_elem_fn *ldst_elem, | ||
21 | - uint32_t esz, uintptr_t ra, MMUAccessType access_type) | ||
22 | + uint32_t esz, uintptr_t ra) | ||
23 | { | ||
24 | uint32_t i, k; | ||
25 | uint32_t nf = vext_nf(desc); | ||
26 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ | ||
27 | { \ | ||
28 | uint32_t vm = vext_vm(desc); \ | ||
29 | vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ | ||
30 | - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ | ||
31 | + ctzl(sizeof(ETYPE)), GETPC()); \ | ||
32 | } | ||
33 | |||
34 | GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
36 | { \ | ||
37 | uint32_t vm = vext_vm(desc); \ | ||
38 | vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ | ||
39 | - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ | ||
40 | + ctzl(sizeof(ETYPE)), GETPC()); \ | ||
41 | } | ||
42 | |||
43 | GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) | ||
44 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) | ||
45 | static void | ||
46 | vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
47 | vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, | ||
48 | - uintptr_t ra, MMUAccessType access_type) | ||
49 | + uintptr_t ra) | ||
50 | { | ||
51 | uint32_t i, k; | ||
52 | uint32_t nf = vext_nf(desc); | ||
53 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ | ||
54 | { \ | ||
55 | uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ | ||
56 | vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ | ||
57 | - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ | ||
58 | + ctzl(sizeof(ETYPE)), GETPC()); \ | ||
59 | } \ | ||
60 | \ | ||
61 | void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
62 | CPURISCVState *env, uint32_t desc) \ | ||
63 | { \ | ||
64 | vext_ldst_us(vd, base, env, desc, LOAD_FN, \ | ||
65 | - ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \ | ||
66 | + ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ | ||
67 | } | ||
68 | |||
69 | GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) | ||
70 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ | ||
71 | { \ | ||
72 | uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ | ||
73 | vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ | ||
74 | - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ | ||
75 | + ctzl(sizeof(ETYPE)), GETPC()); \ | ||
76 | } \ | ||
77 | \ | ||
78 | void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
79 | CPURISCVState *env, uint32_t desc) \ | ||
80 | { \ | ||
81 | vext_ldst_us(vd, base, env, desc, STORE_FN, \ | ||
82 | - ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \ | ||
83 | + ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ | ||
84 | } | ||
85 | |||
86 | GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) | ||
87 | @@ -XXX,XX +XXX,XX @@ void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, | ||
88 | /* evl = ceil(vl/8) */ | ||
89 | uint8_t evl = (env->vl + 7) >> 3; | ||
90 | vext_ldst_us(vd, base, env, desc, lde_b, | ||
91 | - 0, evl, GETPC(), MMU_DATA_LOAD); | ||
92 | + 0, evl, GETPC()); | ||
93 | } | ||
94 | |||
95 | void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, | ||
96 | @@ -XXX,XX +XXX,XX @@ void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, | ||
97 | /* evl = ceil(vl/8) */ | ||
98 | uint8_t evl = (env->vl + 7) >> 3; | ||
99 | vext_ldst_us(vd, base, env, desc, ste_b, | ||
100 | - 0, evl, GETPC(), MMU_DATA_STORE); | ||
101 | + 0, evl, GETPC()); | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, | ||
106 | void *vs2, CPURISCVState *env, uint32_t desc, | ||
107 | vext_get_index_addr get_index_addr, | ||
108 | vext_ldst_elem_fn *ldst_elem, | ||
109 | - uint32_t esz, uintptr_t ra, MMUAccessType access_type) | ||
110 | + uint32_t esz, uintptr_t ra) | ||
111 | { | ||
112 | uint32_t i, k; | ||
113 | uint32_t nf = vext_nf(desc); | ||
114 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
115 | void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
116 | { \ | ||
117 | vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ | ||
118 | - LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ | ||
119 | + LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ | ||
120 | } | ||
121 | |||
122 | GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) | ||
123 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ | ||
124 | { \ | ||
125 | vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ | ||
126 | STORE_FN, ctzl(sizeof(ETYPE)), \ | ||
127 | - GETPC(), MMU_DATA_STORE); \ | ||
128 | + GETPC()); \ | ||
129 | } | ||
130 | |||
131 | GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) | ||
132 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) | ||
133 | */ | ||
134 | static void | ||
135 | vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
136 | - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, | ||
137 | - MMUAccessType access_type) | ||
138 | + vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra) | ||
139 | { | ||
140 | uint32_t i, k, off, pos; | ||
141 | uint32_t nf = vext_nf(desc); | ||
142 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, target_ulong base, \ | ||
143 | CPURISCVState *env, uint32_t desc) \ | ||
144 | { \ | ||
145 | vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ | ||
146 | - ctzl(sizeof(ETYPE)), GETPC(), \ | ||
147 | - MMU_DATA_LOAD); \ | ||
148 | + ctzl(sizeof(ETYPE)), GETPC()); \ | ||
149 | } | ||
150 | |||
151 | GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) | ||
152 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, target_ulong base, \ | ||
153 | CPURISCVState *env, uint32_t desc) \ | ||
154 | { \ | ||
155 | vext_ldst_whole(vd, base, env, desc, STORE_FN, \ | ||
156 | - ctzl(sizeof(ETYPE)), GETPC(), \ | ||
157 | - MMU_DATA_STORE); \ | ||
158 | + ctzl(sizeof(ETYPE)), GETPC()); \ | ||
159 | } | ||
160 | |||
161 | GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) | ||
162 | -- | ||
163 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: eopXD <yueh.ting.chen@gmail.com> | ||
1 | 2 | ||
3 | No functional change intended in this commit. | ||
4 | |||
5 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
6 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
7 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
8 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
9 | Message-Id: <165449614532.19704.7000832880482980398-3@git.sr.ht> | ||
10 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
11 | --- | ||
12 | target/riscv/vector_helper.c | 76 ++++++++++++++++++------------------ | ||
13 | 1 file changed, 38 insertions(+), 38 deletions(-) | ||
14 | |||
15 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/riscv/vector_helper.c | ||
18 | +++ b/target/riscv/vector_helper.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static inline int32_t vext_lmul(uint32_t desc) | ||
20 | /* | ||
21 | * Get the maximum number of elements can be operated. | ||
22 | * | ||
23 | - * esz: log2 of element size in bytes. | ||
24 | + * log2_esz: log2 of element size in bytes. | ||
25 | */ | ||
26 | -static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) | ||
27 | +static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) | ||
28 | { | ||
29 | /* | ||
30 | * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. | ||
31 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) | ||
32 | uint32_t vlenb = simd_maxsz(desc); | ||
33 | |||
34 | /* Return VLMAX */ | ||
35 | - int scale = vext_lmul(desc) - esz; | ||
36 | + int scale = vext_lmul(desc) - log2_esz; | ||
37 | return scale < 0 ? vlenb >> -scale : vlenb << scale; | ||
38 | } | ||
39 | |||
40 | @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, | ||
41 | target_ulong stride, CPURISCVState *env, | ||
42 | uint32_t desc, uint32_t vm, | ||
43 | vext_ldst_elem_fn *ldst_elem, | ||
44 | - uint32_t esz, uintptr_t ra) | ||
45 | + uint32_t log2_esz, uintptr_t ra) | ||
46 | { | ||
47 | uint32_t i, k; | ||
48 | uint32_t nf = vext_nf(desc); | ||
49 | - uint32_t max_elems = vext_max_elems(desc, esz); | ||
50 | + uint32_t max_elems = vext_max_elems(desc, log2_esz); | ||
51 | |||
52 | for (i = env->vstart; i < env->vl; i++, env->vstart++) { | ||
53 | if (!vm && !vext_elem_mask(v0, i)) { | ||
54 | @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, | ||
55 | |||
56 | k = 0; | ||
57 | while (k < nf) { | ||
58 | - target_ulong addr = base + stride * i + (k << esz); | ||
59 | + target_ulong addr = base + stride * i + (k << log2_esz); | ||
60 | ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); | ||
61 | k++; | ||
62 | } | ||
63 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) | ||
64 | /* unmasked unit-stride load and store operation*/ | ||
65 | static void | ||
66 | vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
67 | - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, | ||
68 | + vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, | ||
69 | uintptr_t ra) | ||
70 | { | ||
71 | uint32_t i, k; | ||
72 | uint32_t nf = vext_nf(desc); | ||
73 | - uint32_t max_elems = vext_max_elems(desc, esz); | ||
74 | + uint32_t max_elems = vext_max_elems(desc, log2_esz); | ||
75 | |||
76 | /* load bytes from guest memory */ | ||
77 | for (i = env->vstart; i < evl; i++, env->vstart++) { | ||
78 | k = 0; | ||
79 | while (k < nf) { | ||
80 | - target_ulong addr = base + ((i * nf + k) << esz); | ||
81 | + target_ulong addr = base + ((i * nf + k) << log2_esz); | ||
82 | ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); | ||
83 | k++; | ||
84 | } | ||
85 | @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, | ||
86 | void *vs2, CPURISCVState *env, uint32_t desc, | ||
87 | vext_get_index_addr get_index_addr, | ||
88 | vext_ldst_elem_fn *ldst_elem, | ||
89 | - uint32_t esz, uintptr_t ra) | ||
90 | + uint32_t log2_esz, uintptr_t ra) | ||
91 | { | ||
92 | uint32_t i, k; | ||
93 | uint32_t nf = vext_nf(desc); | ||
94 | uint32_t vm = vext_vm(desc); | ||
95 | - uint32_t max_elems = vext_max_elems(desc, esz); | ||
96 | + uint32_t max_elems = vext_max_elems(desc, log2_esz); | ||
97 | |||
98 | /* load bytes from guest memory */ | ||
99 | for (i = env->vstart; i < env->vl; i++, env->vstart++) { | ||
100 | @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, | ||
101 | |||
102 | k = 0; | ||
103 | while (k < nf) { | ||
104 | - abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); | ||
105 | + abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); | ||
106 | ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); | ||
107 | k++; | ||
108 | } | ||
109 | @@ -XXX,XX +XXX,XX @@ static inline void | ||
110 | vext_ldff(void *vd, void *v0, target_ulong base, | ||
111 | CPURISCVState *env, uint32_t desc, | ||
112 | vext_ldst_elem_fn *ldst_elem, | ||
113 | - uint32_t esz, uintptr_t ra) | ||
114 | + uint32_t log2_esz, uintptr_t ra) | ||
115 | { | ||
116 | void *host; | ||
117 | uint32_t i, k, vl = 0; | ||
118 | uint32_t nf = vext_nf(desc); | ||
119 | uint32_t vm = vext_vm(desc); | ||
120 | - uint32_t max_elems = vext_max_elems(desc, esz); | ||
121 | + uint32_t max_elems = vext_max_elems(desc, log2_esz); | ||
122 | target_ulong addr, offset, remain; | ||
123 | |||
124 | /* probe every access*/ | ||
125 | @@ -XXX,XX +XXX,XX @@ vext_ldff(void *vd, void *v0, target_ulong base, | ||
126 | if (!vm && !vext_elem_mask(v0, i)) { | ||
127 | continue; | ||
128 | } | ||
129 | - addr = adjust_addr(env, base + i * (nf << esz)); | ||
130 | + addr = adjust_addr(env, base + i * (nf << log2_esz)); | ||
131 | if (i == 0) { | ||
132 | - probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); | ||
133 | + probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); | ||
134 | } else { | ||
135 | /* if it triggers an exception, no need to check watchpoint */ | ||
136 | - remain = nf << esz; | ||
137 | + remain = nf << log2_esz; | ||
138 | while (remain > 0) { | ||
139 | offset = -(addr | TARGET_PAGE_MASK); | ||
140 | host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, | ||
141 | @@ -XXX,XX +XXX,XX @@ ProbeSuccess: | ||
142 | continue; | ||
143 | } | ||
144 | while (k < nf) { | ||
145 | - target_ulong addr = base + ((i * nf + k) << esz); | ||
146 | + target_ulong addr = base + ((i * nf + k) << log2_esz); | ||
147 | ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); | ||
148 | k++; | ||
149 | } | ||
150 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) | ||
151 | */ | ||
152 | static void | ||
153 | vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
154 | - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra) | ||
155 | + vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) | ||
156 | { | ||
157 | uint32_t i, k, off, pos; | ||
158 | uint32_t nf = vext_nf(desc); | ||
159 | uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; | ||
160 | - uint32_t max_elems = vlenb >> esz; | ||
161 | + uint32_t max_elems = vlenb >> log2_esz; | ||
162 | |||
163 | k = env->vstart / max_elems; | ||
164 | off = env->vstart % max_elems; | ||
165 | @@ -XXX,XX +XXX,XX @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
166 | if (off) { | ||
167 | /* load/store rest of elements of current segment pointed by vstart */ | ||
168 | for (pos = off; pos < max_elems; pos++, env->vstart++) { | ||
169 | - target_ulong addr = base + ((pos + k * max_elems) << esz); | ||
170 | + target_ulong addr = base + ((pos + k * max_elems) << log2_esz); | ||
171 | ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); | ||
172 | } | ||
173 | k++; | ||
174 | @@ -XXX,XX +XXX,XX @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
175 | /* load/store elements for rest of segments */ | ||
176 | for (; k < nf; k++) { | ||
177 | for (i = 0; i < max_elems; i++, env->vstart++) { | ||
178 | - target_ulong addr = base + ((i + k * max_elems) << esz); | ||
179 | + target_ulong addr = base + ((i + k * max_elems) << log2_esz); | ||
180 | ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); | ||
181 | } | ||
182 | } | ||
183 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) | ||
184 | GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) | ||
185 | GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) | ||
186 | |||
187 | -#define GEN_VEXT_VSLIE1UP(ESZ, H) \ | ||
188 | -static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
189 | - CPURISCVState *env, uint32_t desc) \ | ||
190 | +#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ | ||
191 | +static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ | ||
192 | + void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
193 | { \ | ||
194 | - typedef uint##ESZ##_t ETYPE; \ | ||
195 | + typedef uint##BITWIDTH##_t ETYPE; \ | ||
196 | uint32_t vm = vext_vm(desc); \ | ||
197 | uint32_t vl = env->vl; \ | ||
198 | uint32_t i; \ | ||
199 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIE1UP(16, H2) | ||
200 | GEN_VEXT_VSLIE1UP(32, H4) | ||
201 | GEN_VEXT_VSLIE1UP(64, H8) | ||
202 | |||
203 | -#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ | ||
204 | +#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ | ||
205 | void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
206 | CPURISCVState *env, uint32_t desc) \ | ||
207 | { \ | ||
208 | - vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ | ||
209 | + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ | ||
210 | } | ||
211 | |||
212 | /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ | ||
213 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) | ||
214 | GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) | ||
215 | GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) | ||
216 | |||
217 | -#define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ | ||
218 | -static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
219 | - CPURISCVState *env, uint32_t desc) \ | ||
220 | +#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ | ||
221 | +static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ | ||
222 | + void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
223 | { \ | ||
224 | - typedef uint##ESZ##_t ETYPE; \ | ||
225 | + typedef uint##BITWIDTH##_t ETYPE; \ | ||
226 | uint32_t vm = vext_vm(desc); \ | ||
227 | uint32_t vl = env->vl; \ | ||
228 | uint32_t i; \ | ||
229 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1DOWN(16, H2) | ||
230 | GEN_VEXT_VSLIDE1DOWN(32, H4) | ||
231 | GEN_VEXT_VSLIDE1DOWN(64, H8) | ||
232 | |||
233 | -#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ | ||
234 | +#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ | ||
235 | void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
236 | CPURISCVState *env, uint32_t desc) \ | ||
237 | { \ | ||
238 | - vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ | ||
239 | + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ | ||
240 | } | ||
241 | |||
242 | /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ | ||
243 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) | ||
244 | GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) | ||
245 | |||
246 | /* Vector Floating-Point Slide Instructions */ | ||
247 | -#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ | ||
248 | +#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ | ||
249 | void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
250 | CPURISCVState *env, uint32_t desc) \ | ||
251 | { \ | ||
252 | - vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ | ||
253 | + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ | ||
254 | } | ||
255 | |||
256 | /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ | ||
257 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) | ||
258 | GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) | ||
259 | GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) | ||
260 | |||
261 | -#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ | ||
262 | +#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ | ||
263 | void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
264 | CPURISCVState *env, uint32_t desc) \ | ||
265 | { \ | ||
266 | - vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ | ||
267 | + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ | ||
268 | } | ||
269 | |||
270 | /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ | ||
271 | -- | ||
272 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: eopXD <yueh.ting.chen@gmail.com> | |
2 | |||
3 | According to v-spec (section 5.4): | ||
4 | When vstart ≥ vl, there are no body elements, and no elements are | ||
5 | updated in any destination vector register group, including that | ||
6 | no tail elements are updated with agnostic values. | ||
7 | |||
8 | vmsbf.m, vmsif.m, vmsof.m, viota.m, vcompress instructions themselves | ||
9 | require vstart to be zero. So they don't need the early exit. | ||
10 | |||
11 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
12 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
13 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
14 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
15 | Message-Id: <165449614532.19704.7000832880482980398-4@git.sr.ht> | ||
16 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
17 | --- | ||
18 | target/riscv/insn_trans/trans_rvv.c.inc | 27 +++++++++++++++++++++++++ | ||
19 | 1 file changed, 27 insertions(+) | ||
20 | |||
21 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
24 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
25 | @@ -XXX,XX +XXX,XX @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data, | ||
26 | |||
27 | TCGLabel *over = gen_new_label(); | ||
28 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
29 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
30 | |||
31 | dest = tcg_temp_new_ptr(); | ||
32 | mask = tcg_temp_new_ptr(); | ||
33 | @@ -XXX,XX +XXX,XX @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, | ||
34 | |||
35 | TCGLabel *over = gen_new_label(); | ||
36 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
37 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
38 | |||
39 | dest = tcg_temp_new_ptr(); | ||
40 | mask = tcg_temp_new_ptr(); | ||
41 | @@ -XXX,XX +XXX,XX @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, | ||
42 | |||
43 | TCGLabel *over = gen_new_label(); | ||
44 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
45 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
46 | |||
47 | dest = tcg_temp_new_ptr(); | ||
48 | mask = tcg_temp_new_ptr(); | ||
49 | @@ -XXX,XX +XXX,XX @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, | ||
50 | |||
51 | TCGLabel *over = gen_new_label(); | ||
52 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
53 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
54 | |||
55 | dest = tcg_temp_new_ptr(); | ||
56 | mask = tcg_temp_new_ptr(); | ||
57 | @@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, | ||
58 | } | ||
59 | |||
60 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
61 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
62 | |||
63 | if (a->vm && s->vl_eq_vlmax) { | ||
64 | gvec_fn(s->sew, vreg_ofs(s, a->rd), | ||
65 | @@ -XXX,XX +XXX,XX @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, | ||
66 | |||
67 | TCGLabel *over = gen_new_label(); | ||
68 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
69 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
70 | |||
71 | dest = tcg_temp_new_ptr(); | ||
72 | mask = tcg_temp_new_ptr(); | ||
73 | @@ -XXX,XX +XXX,XX @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, | ||
74 | |||
75 | TCGLabel *over = gen_new_label(); | ||
76 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
77 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
78 | |||
79 | dest = tcg_temp_new_ptr(); | ||
80 | mask = tcg_temp_new_ptr(); | ||
81 | @@ -XXX,XX +XXX,XX @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, | ||
82 | uint32_t data = 0; | ||
83 | TCGLabel *over = gen_new_label(); | ||
84 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
85 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
86 | |||
87 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
88 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
89 | @@ -XXX,XX +XXX,XX @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, | ||
90 | uint32_t data = 0; | ||
91 | TCGLabel *over = gen_new_label(); | ||
92 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
93 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
94 | |||
95 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
96 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
97 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
98 | }; \ | ||
99 | TCGLabel *over = gen_new_label(); \ | ||
100 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
101 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
102 | \ | ||
103 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
104 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
105 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
106 | }; \ | ||
107 | TCGLabel *over = gen_new_label(); \ | ||
108 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
109 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
110 | \ | ||
111 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
112 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
113 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) | ||
114 | }; | ||
115 | TCGLabel *over = gen_new_label(); | ||
116 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
117 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
118 | |||
119 | tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), | ||
120 | cpu_env, s->cfg_ptr->vlen / 8, | ||
121 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) | ||
122 | TCGv s1; | ||
123 | TCGLabel *over = gen_new_label(); | ||
124 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
125 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
126 | |||
127 | s1 = get_gpr(s, a->rs1, EXT_SIGN); | ||
128 | |||
129 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) | ||
130 | }; | ||
131 | TCGLabel *over = gen_new_label(); | ||
132 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
133 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
134 | |||
135 | s1 = tcg_constant_i64(simm); | ||
136 | dest = tcg_temp_new_ptr(); | ||
137 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
138 | TCGLabel *over = gen_new_label(); \ | ||
139 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
140 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
141 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
142 | \ | ||
143 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
144 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
145 | @@ -XXX,XX +XXX,XX @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, | ||
146 | |||
147 | TCGLabel *over = gen_new_label(); | ||
148 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
149 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
150 | |||
151 | dest = tcg_temp_new_ptr(); | ||
152 | mask = tcg_temp_new_ptr(); | ||
153 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
154 | TCGLabel *over = gen_new_label(); \ | ||
155 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
156 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
157 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\ | ||
158 | \ | ||
159 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
160 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
161 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
162 | TCGLabel *over = gen_new_label(); \ | ||
163 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
164 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
165 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
166 | \ | ||
167 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
168 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
169 | @@ -XXX,XX +XXX,XX @@ static bool do_opfv(DisasContext *s, arg_rmr *a, | ||
170 | TCGLabel *over = gen_new_label(); | ||
171 | gen_set_rm(s, rm); | ||
172 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
173 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
174 | |||
175 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
176 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
177 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) | ||
178 | }; | ||
179 | TCGLabel *over = gen_new_label(); | ||
180 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
181 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
182 | |||
183 | t1 = tcg_temp_new_i64(); | ||
184 | /* NaN-box f[rs1] */ | ||
185 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
186 | TCGLabel *over = gen_new_label(); \ | ||
187 | gen_set_rm(s, FRM); \ | ||
188 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
189 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
190 | \ | ||
191 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
192 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
193 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
194 | TCGLabel *over = gen_new_label(); \ | ||
195 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
196 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
197 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
198 | \ | ||
199 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
200 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
201 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
202 | TCGLabel *over = gen_new_label(); \ | ||
203 | gen_set_rm(s, FRM); \ | ||
204 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
205 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
206 | \ | ||
207 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
208 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
209 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
210 | TCGLabel *over = gen_new_label(); \ | ||
211 | gen_set_rm(s, FRM); \ | ||
212 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
213 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
214 | \ | ||
215 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
216 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
217 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \ | ||
218 | gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ | ||
219 | TCGLabel *over = gen_new_label(); \ | ||
220 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ | ||
221 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
222 | \ | ||
223 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
224 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
225 | @@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) | ||
226 | uint32_t data = 0; | ||
227 | TCGLabel *over = gen_new_label(); | ||
228 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
229 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
230 | |||
231 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
232 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
233 | @@ -XXX,XX +XXX,XX @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) | ||
234 | gen_helper_gvec_3_ptr *fn; | ||
235 | TCGLabel *over = gen_new_label(); | ||
236 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
237 | + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
238 | |||
239 | static gen_helper_gvec_3_ptr * const fns[6][4] = { | ||
240 | { | ||
241 | -- | ||
242 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: eopXD <eop.chen@sifive.com> | ||
1 | 2 | ||
3 | According to v-spec, tail agnostic behavior can be either kept as | ||
4 | undisturbed or set elements' bits to all 1s. To distinguish the | ||
5 | difference of tail policies, QEMU should be able to simulate the tail | ||
6 | agnostic behavior as "set tail elements' bits to all 1s". | ||
7 | |||
8 | There are multiple possibility for agnostic elements according to | ||
9 | v-spec. The main intent of this patch-set tries to add option that | ||
10 | can distinguish between tail policies. Setting agnostic elements to | ||
11 | all 1s allows QEMU to express this. | ||
12 | |||
13 | This is the first commit regarding the optional tail agnostic | ||
14 | behavior. Follow-up commits will add this optional behavior | ||
15 | for all rvv instructions. | ||
16 | |||
17 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
18 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
19 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
20 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
21 | Message-Id: <165449614532.19704.7000832880482980398-5@git.sr.ht> | ||
22 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
23 | --- | ||
24 | target/riscv/cpu.h | 2 + | ||
25 | target/riscv/internals.h | 5 +- | ||
26 | target/riscv/cpu_helper.c | 2 + | ||
27 | target/riscv/translate.c | 2 + | ||
28 | target/riscv/vector_helper.c | 296 +++++++++++++----------- | ||
29 | target/riscv/insn_trans/trans_rvv.c.inc | 3 +- | ||
30 | 6 files changed, 178 insertions(+), 132 deletions(-) | ||
31 | |||
32 | diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/target/riscv/cpu.h | ||
35 | +++ b/target/riscv/cpu.h | ||
36 | @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { | ||
37 | bool ext_zve32f; | ||
38 | bool ext_zve64f; | ||
39 | bool ext_zmmul; | ||
40 | + bool rvv_ta_all_1s; | ||
41 | |||
42 | uint32_t mvendorid; | ||
43 | uint64_t marchid; | ||
44 | @@ -XXX,XX +XXX,XX @@ FIELD(TB_FLAGS, XL, 20, 2) | ||
45 | /* If PointerMasking should be applied */ | ||
46 | FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1) | ||
47 | FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1) | ||
48 | +FIELD(TB_FLAGS, VTA, 24, 1) | ||
49 | |||
50 | #ifdef TARGET_RISCV32 | ||
51 | #define riscv_cpu_mxl(env) ((void)(env), MXL_RV32) | ||
52 | diff --git a/target/riscv/internals.h b/target/riscv/internals.h | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/target/riscv/internals.h | ||
55 | +++ b/target/riscv/internals.h | ||
56 | @@ -XXX,XX +XXX,XX @@ | ||
57 | /* share data between vector helpers and decode code */ | ||
58 | FIELD(VDATA, VM, 0, 1) | ||
59 | FIELD(VDATA, LMUL, 1, 3) | ||
60 | -FIELD(VDATA, NF, 4, 4) | ||
61 | -FIELD(VDATA, WD, 4, 1) | ||
62 | +FIELD(VDATA, VTA, 4, 1) | ||
63 | +FIELD(VDATA, NF, 5, 4) | ||
64 | +FIELD(VDATA, WD, 5, 1) | ||
65 | |||
66 | /* float point classify helpers */ | ||
67 | target_ulong fclass_h(uint64_t frs1); | ||
68 | diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/target/riscv/cpu_helper.c | ||
71 | +++ b/target/riscv/cpu_helper.c | ||
72 | @@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc, | ||
73 | flags = FIELD_DP32(flags, TB_FLAGS, LMUL, | ||
74 | FIELD_EX64(env->vtype, VTYPE, VLMUL)); | ||
75 | flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax); | ||
76 | + flags = FIELD_DP32(flags, TB_FLAGS, VTA, | ||
77 | + FIELD_EX64(env->vtype, VTYPE, VTA)); | ||
78 | } else { | ||
79 | flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1); | ||
80 | } | ||
81 | diff --git a/target/riscv/translate.c b/target/riscv/translate.c | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/target/riscv/translate.c | ||
84 | +++ b/target/riscv/translate.c | ||
85 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
86 | */ | ||
87 | int8_t lmul; | ||
88 | uint8_t sew; | ||
89 | + uint8_t vta; | ||
90 | target_ulong vstart; | ||
91 | bool vl_eq_vlmax; | ||
92 | uint8_t ntemp; | ||
93 | @@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) | ||
94 | ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL); | ||
95 | ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); | ||
96 | ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3); | ||
97 | + ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s; | ||
98 | ctx->vstart = env->vstart; | ||
99 | ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); | ||
100 | ctx->misa_mxl_max = env->misa_mxl_max; | ||
101 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
102 | index XXXXXXX..XXXXXXX 100644 | ||
103 | --- a/target/riscv/vector_helper.c | ||
104 | +++ b/target/riscv/vector_helper.c | ||
105 | @@ -XXX,XX +XXX,XX @@ static inline int32_t vext_lmul(uint32_t desc) | ||
106 | return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); | ||
107 | } | ||
108 | |||
109 | +static inline uint32_t vext_vta(uint32_t desc) | ||
110 | +{ | ||
111 | + return FIELD_EX32(simd_data(desc), VDATA, VTA); | ||
112 | +} | ||
113 | + | ||
114 | /* | ||
115 | * Get the maximum number of elements can be operated. | ||
116 | * | ||
117 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) | ||
118 | return scale < 0 ? vlenb >> -scale : vlenb << scale; | ||
119 | } | ||
120 | |||
121 | +/* | ||
122 | + * Get number of total elements, including prestart, body and tail elements. | ||
123 | + * Note that when LMUL < 1, the tail includes the elements past VLMAX that | ||
124 | + * are held in the same vector register. | ||
125 | + */ | ||
126 | +static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, | ||
127 | + uint32_t esz) | ||
128 | +{ | ||
129 | + uint32_t vlenb = simd_maxsz(desc); | ||
130 | + uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); | ||
131 | + int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : | ||
132 | + ctzl(esz) - ctzl(sew) + vext_lmul(desc); | ||
133 | + return (vlenb << emul) / esz; | ||
134 | +} | ||
135 | + | ||
136 | static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) | ||
137 | { | ||
138 | return (addr & env->cur_pmmask) | env->cur_pmbase; | ||
139 | @@ -XXX,XX +XXX,XX @@ static void probe_pages(CPURISCVState *env, target_ulong addr, | ||
140 | } | ||
141 | } | ||
142 | |||
143 | +/* set agnostic elements to 1s */ | ||
144 | +static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, | ||
145 | + uint32_t tot) | ||
146 | +{ | ||
147 | + if (is_agnostic == 0) { | ||
148 | + /* policy undisturbed */ | ||
149 | + return; | ||
150 | + } | ||
151 | + if (tot - cnt == 0) { | ||
152 | + return ; | ||
153 | + } | ||
154 | + memset(base + cnt, -1, tot - cnt); | ||
155 | +} | ||
156 | + | ||
157 | static inline void vext_set_elem_mask(void *v0, int index, | ||
158 | uint8_t value) | ||
159 | { | ||
160 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) | ||
161 | |||
162 | static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, | ||
163 | CPURISCVState *env, uint32_t desc, | ||
164 | - opivv2_fn *fn) | ||
165 | + opivv2_fn *fn, uint32_t esz) | ||
166 | { | ||
167 | uint32_t vm = vext_vm(desc); | ||
168 | uint32_t vl = env->vl; | ||
169 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
170 | + uint32_t vta = vext_vta(desc); | ||
171 | uint32_t i; | ||
172 | |||
173 | for (i = env->vstart; i < vl; i++) { | ||
174 | @@ -XXX,XX +XXX,XX @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, | ||
175 | fn(vd, vs1, vs2, i); | ||
176 | } | ||
177 | env->vstart = 0; | ||
178 | + /* set tail elements to 1s */ | ||
179 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); | ||
180 | } | ||
181 | |||
182 | /* generate the helpers for OPIVV */ | ||
183 | -#define GEN_VEXT_VV(NAME) \ | ||
184 | +#define GEN_VEXT_VV(NAME, ESZ) \ | ||
185 | void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
186 | void *vs2, CPURISCVState *env, \ | ||
187 | uint32_t desc) \ | ||
188 | { \ | ||
189 | do_vext_vv(vd, v0, vs1, vs2, env, desc, \ | ||
190 | - do_##NAME); \ | ||
191 | + do_##NAME, ESZ); \ | ||
192 | } | ||
193 | |||
194 | -GEN_VEXT_VV(vadd_vv_b) | ||
195 | -GEN_VEXT_VV(vadd_vv_h) | ||
196 | -GEN_VEXT_VV(vadd_vv_w) | ||
197 | -GEN_VEXT_VV(vadd_vv_d) | ||
198 | -GEN_VEXT_VV(vsub_vv_b) | ||
199 | -GEN_VEXT_VV(vsub_vv_h) | ||
200 | -GEN_VEXT_VV(vsub_vv_w) | ||
201 | -GEN_VEXT_VV(vsub_vv_d) | ||
202 | +GEN_VEXT_VV(vadd_vv_b, 1) | ||
203 | +GEN_VEXT_VV(vadd_vv_h, 2) | ||
204 | +GEN_VEXT_VV(vadd_vv_w, 4) | ||
205 | +GEN_VEXT_VV(vadd_vv_d, 8) | ||
206 | +GEN_VEXT_VV(vsub_vv_b, 1) | ||
207 | +GEN_VEXT_VV(vsub_vv_h, 2) | ||
208 | +GEN_VEXT_VV(vsub_vv_w, 4) | ||
209 | +GEN_VEXT_VV(vsub_vv_d, 8) | ||
210 | |||
211 | typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); | ||
212 | |||
213 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) | ||
214 | RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) | ||
215 | RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) | ||
216 | RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) | ||
217 | -GEN_VEXT_VV(vwaddu_vv_b) | ||
218 | -GEN_VEXT_VV(vwaddu_vv_h) | ||
219 | -GEN_VEXT_VV(vwaddu_vv_w) | ||
220 | -GEN_VEXT_VV(vwsubu_vv_b) | ||
221 | -GEN_VEXT_VV(vwsubu_vv_h) | ||
222 | -GEN_VEXT_VV(vwsubu_vv_w) | ||
223 | -GEN_VEXT_VV(vwadd_vv_b) | ||
224 | -GEN_VEXT_VV(vwadd_vv_h) | ||
225 | -GEN_VEXT_VV(vwadd_vv_w) | ||
226 | -GEN_VEXT_VV(vwsub_vv_b) | ||
227 | -GEN_VEXT_VV(vwsub_vv_h) | ||
228 | -GEN_VEXT_VV(vwsub_vv_w) | ||
229 | -GEN_VEXT_VV(vwaddu_wv_b) | ||
230 | -GEN_VEXT_VV(vwaddu_wv_h) | ||
231 | -GEN_VEXT_VV(vwaddu_wv_w) | ||
232 | -GEN_VEXT_VV(vwsubu_wv_b) | ||
233 | -GEN_VEXT_VV(vwsubu_wv_h) | ||
234 | -GEN_VEXT_VV(vwsubu_wv_w) | ||
235 | -GEN_VEXT_VV(vwadd_wv_b) | ||
236 | -GEN_VEXT_VV(vwadd_wv_h) | ||
237 | -GEN_VEXT_VV(vwadd_wv_w) | ||
238 | -GEN_VEXT_VV(vwsub_wv_b) | ||
239 | -GEN_VEXT_VV(vwsub_wv_h) | ||
240 | -GEN_VEXT_VV(vwsub_wv_w) | ||
241 | +GEN_VEXT_VV(vwaddu_vv_b, 2) | ||
242 | +GEN_VEXT_VV(vwaddu_vv_h, 4) | ||
243 | +GEN_VEXT_VV(vwaddu_vv_w, 8) | ||
244 | +GEN_VEXT_VV(vwsubu_vv_b, 2) | ||
245 | +GEN_VEXT_VV(vwsubu_vv_h, 4) | ||
246 | +GEN_VEXT_VV(vwsubu_vv_w, 8) | ||
247 | +GEN_VEXT_VV(vwadd_vv_b, 2) | ||
248 | +GEN_VEXT_VV(vwadd_vv_h, 4) | ||
249 | +GEN_VEXT_VV(vwadd_vv_w, 8) | ||
250 | +GEN_VEXT_VV(vwsub_vv_b, 2) | ||
251 | +GEN_VEXT_VV(vwsub_vv_h, 4) | ||
252 | +GEN_VEXT_VV(vwsub_vv_w, 8) | ||
253 | +GEN_VEXT_VV(vwaddu_wv_b, 2) | ||
254 | +GEN_VEXT_VV(vwaddu_wv_h, 4) | ||
255 | +GEN_VEXT_VV(vwaddu_wv_w, 8) | ||
256 | +GEN_VEXT_VV(vwsubu_wv_b, 2) | ||
257 | +GEN_VEXT_VV(vwsubu_wv_h, 4) | ||
258 | +GEN_VEXT_VV(vwsubu_wv_w, 8) | ||
259 | +GEN_VEXT_VV(vwadd_wv_b, 2) | ||
260 | +GEN_VEXT_VV(vwadd_wv_h, 4) | ||
261 | +GEN_VEXT_VV(vwadd_wv_w, 8) | ||
262 | +GEN_VEXT_VV(vwsub_wv_b, 2) | ||
263 | +GEN_VEXT_VV(vwsub_wv_h, 4) | ||
264 | +GEN_VEXT_VV(vwsub_wv_w, 8) | ||
265 | |||
266 | RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) | ||
267 | RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) | ||
268 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) | ||
269 | RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) | ||
270 | RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) | ||
271 | RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) | ||
272 | -GEN_VEXT_VV(vand_vv_b) | ||
273 | -GEN_VEXT_VV(vand_vv_h) | ||
274 | -GEN_VEXT_VV(vand_vv_w) | ||
275 | -GEN_VEXT_VV(vand_vv_d) | ||
276 | -GEN_VEXT_VV(vor_vv_b) | ||
277 | -GEN_VEXT_VV(vor_vv_h) | ||
278 | -GEN_VEXT_VV(vor_vv_w) | ||
279 | -GEN_VEXT_VV(vor_vv_d) | ||
280 | -GEN_VEXT_VV(vxor_vv_b) | ||
281 | -GEN_VEXT_VV(vxor_vv_h) | ||
282 | -GEN_VEXT_VV(vxor_vv_w) | ||
283 | -GEN_VEXT_VV(vxor_vv_d) | ||
284 | +GEN_VEXT_VV(vand_vv_b, 1) | ||
285 | +GEN_VEXT_VV(vand_vv_h, 2) | ||
286 | +GEN_VEXT_VV(vand_vv_w, 4) | ||
287 | +GEN_VEXT_VV(vand_vv_d, 8) | ||
288 | +GEN_VEXT_VV(vor_vv_b, 1) | ||
289 | +GEN_VEXT_VV(vor_vv_h, 2) | ||
290 | +GEN_VEXT_VV(vor_vv_w, 4) | ||
291 | +GEN_VEXT_VV(vor_vv_d, 8) | ||
292 | +GEN_VEXT_VV(vxor_vv_b, 1) | ||
293 | +GEN_VEXT_VV(vxor_vv_h, 2) | ||
294 | +GEN_VEXT_VV(vxor_vv_w, 4) | ||
295 | +GEN_VEXT_VV(vxor_vv_d, 8) | ||
296 | |||
297 | RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) | ||
298 | RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) | ||
299 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) | ||
300 | RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) | ||
301 | RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) | ||
302 | RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) | ||
303 | -GEN_VEXT_VV(vminu_vv_b) | ||
304 | -GEN_VEXT_VV(vminu_vv_h) | ||
305 | -GEN_VEXT_VV(vminu_vv_w) | ||
306 | -GEN_VEXT_VV(vminu_vv_d) | ||
307 | -GEN_VEXT_VV(vmin_vv_b) | ||
308 | -GEN_VEXT_VV(vmin_vv_h) | ||
309 | -GEN_VEXT_VV(vmin_vv_w) | ||
310 | -GEN_VEXT_VV(vmin_vv_d) | ||
311 | -GEN_VEXT_VV(vmaxu_vv_b) | ||
312 | -GEN_VEXT_VV(vmaxu_vv_h) | ||
313 | -GEN_VEXT_VV(vmaxu_vv_w) | ||
314 | -GEN_VEXT_VV(vmaxu_vv_d) | ||
315 | -GEN_VEXT_VV(vmax_vv_b) | ||
316 | -GEN_VEXT_VV(vmax_vv_h) | ||
317 | -GEN_VEXT_VV(vmax_vv_w) | ||
318 | -GEN_VEXT_VV(vmax_vv_d) | ||
319 | +GEN_VEXT_VV(vminu_vv_b, 1) | ||
320 | +GEN_VEXT_VV(vminu_vv_h, 2) | ||
321 | +GEN_VEXT_VV(vminu_vv_w, 4) | ||
322 | +GEN_VEXT_VV(vminu_vv_d, 8) | ||
323 | +GEN_VEXT_VV(vmin_vv_b, 1) | ||
324 | +GEN_VEXT_VV(vmin_vv_h, 2) | ||
325 | +GEN_VEXT_VV(vmin_vv_w, 4) | ||
326 | +GEN_VEXT_VV(vmin_vv_d, 8) | ||
327 | +GEN_VEXT_VV(vmaxu_vv_b, 1) | ||
328 | +GEN_VEXT_VV(vmaxu_vv_h, 2) | ||
329 | +GEN_VEXT_VV(vmaxu_vv_w, 4) | ||
330 | +GEN_VEXT_VV(vmaxu_vv_d, 8) | ||
331 | +GEN_VEXT_VV(vmax_vv_b, 1) | ||
332 | +GEN_VEXT_VV(vmax_vv_h, 2) | ||
333 | +GEN_VEXT_VV(vmax_vv_w, 4) | ||
334 | +GEN_VEXT_VV(vmax_vv_d, 8) | ||
335 | |||
336 | RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) | ||
337 | RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) | ||
338 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) | ||
339 | RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) | ||
340 | RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) | ||
341 | RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) | ||
342 | -GEN_VEXT_VV(vmul_vv_b) | ||
343 | -GEN_VEXT_VV(vmul_vv_h) | ||
344 | -GEN_VEXT_VV(vmul_vv_w) | ||
345 | -GEN_VEXT_VV(vmul_vv_d) | ||
346 | +GEN_VEXT_VV(vmul_vv_b, 1) | ||
347 | +GEN_VEXT_VV(vmul_vv_h, 2) | ||
348 | +GEN_VEXT_VV(vmul_vv_w, 4) | ||
349 | +GEN_VEXT_VV(vmul_vv_d, 8) | ||
350 | |||
351 | static int8_t do_mulh_b(int8_t s2, int8_t s1) | ||
352 | { | ||
353 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) | ||
354 | RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) | ||
355 | RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) | ||
356 | RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) | ||
357 | -GEN_VEXT_VV(vmulh_vv_b) | ||
358 | -GEN_VEXT_VV(vmulh_vv_h) | ||
359 | -GEN_VEXT_VV(vmulh_vv_w) | ||
360 | -GEN_VEXT_VV(vmulh_vv_d) | ||
361 | -GEN_VEXT_VV(vmulhu_vv_b) | ||
362 | -GEN_VEXT_VV(vmulhu_vv_h) | ||
363 | -GEN_VEXT_VV(vmulhu_vv_w) | ||
364 | -GEN_VEXT_VV(vmulhu_vv_d) | ||
365 | -GEN_VEXT_VV(vmulhsu_vv_b) | ||
366 | -GEN_VEXT_VV(vmulhsu_vv_h) | ||
367 | -GEN_VEXT_VV(vmulhsu_vv_w) | ||
368 | -GEN_VEXT_VV(vmulhsu_vv_d) | ||
369 | +GEN_VEXT_VV(vmulh_vv_b, 1) | ||
370 | +GEN_VEXT_VV(vmulh_vv_h, 2) | ||
371 | +GEN_VEXT_VV(vmulh_vv_w, 4) | ||
372 | +GEN_VEXT_VV(vmulh_vv_d, 8) | ||
373 | +GEN_VEXT_VV(vmulhu_vv_b, 1) | ||
374 | +GEN_VEXT_VV(vmulhu_vv_h, 2) | ||
375 | +GEN_VEXT_VV(vmulhu_vv_w, 4) | ||
376 | +GEN_VEXT_VV(vmulhu_vv_d, 8) | ||
377 | +GEN_VEXT_VV(vmulhsu_vv_b, 1) | ||
378 | +GEN_VEXT_VV(vmulhsu_vv_h, 2) | ||
379 | +GEN_VEXT_VV(vmulhsu_vv_w, 4) | ||
380 | +GEN_VEXT_VV(vmulhsu_vv_d, 8) | ||
381 | |||
382 | RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) | ||
383 | RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) | ||
384 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) | ||
385 | RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) | ||
386 | RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) | ||
387 | RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) | ||
388 | -GEN_VEXT_VV(vdivu_vv_b) | ||
389 | -GEN_VEXT_VV(vdivu_vv_h) | ||
390 | -GEN_VEXT_VV(vdivu_vv_w) | ||
391 | -GEN_VEXT_VV(vdivu_vv_d) | ||
392 | -GEN_VEXT_VV(vdiv_vv_b) | ||
393 | -GEN_VEXT_VV(vdiv_vv_h) | ||
394 | -GEN_VEXT_VV(vdiv_vv_w) | ||
395 | -GEN_VEXT_VV(vdiv_vv_d) | ||
396 | -GEN_VEXT_VV(vremu_vv_b) | ||
397 | -GEN_VEXT_VV(vremu_vv_h) | ||
398 | -GEN_VEXT_VV(vremu_vv_w) | ||
399 | -GEN_VEXT_VV(vremu_vv_d) | ||
400 | -GEN_VEXT_VV(vrem_vv_b) | ||
401 | -GEN_VEXT_VV(vrem_vv_h) | ||
402 | -GEN_VEXT_VV(vrem_vv_w) | ||
403 | -GEN_VEXT_VV(vrem_vv_d) | ||
404 | +GEN_VEXT_VV(vdivu_vv_b, 1) | ||
405 | +GEN_VEXT_VV(vdivu_vv_h, 2) | ||
406 | +GEN_VEXT_VV(vdivu_vv_w, 4) | ||
407 | +GEN_VEXT_VV(vdivu_vv_d, 8) | ||
408 | +GEN_VEXT_VV(vdiv_vv_b, 1) | ||
409 | +GEN_VEXT_VV(vdiv_vv_h, 2) | ||
410 | +GEN_VEXT_VV(vdiv_vv_w, 4) | ||
411 | +GEN_VEXT_VV(vdiv_vv_d, 8) | ||
412 | +GEN_VEXT_VV(vremu_vv_b, 1) | ||
413 | +GEN_VEXT_VV(vremu_vv_h, 2) | ||
414 | +GEN_VEXT_VV(vremu_vv_w, 4) | ||
415 | +GEN_VEXT_VV(vremu_vv_d, 8) | ||
416 | +GEN_VEXT_VV(vrem_vv_b, 1) | ||
417 | +GEN_VEXT_VV(vrem_vv_h, 2) | ||
418 | +GEN_VEXT_VV(vrem_vv_w, 4) | ||
419 | +GEN_VEXT_VV(vrem_vv_d, 8) | ||
420 | |||
421 | RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) | ||
422 | RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) | ||
423 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) | ||
424 | RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) | ||
425 | RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) | ||
426 | RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) | ||
427 | -GEN_VEXT_VV(vwmul_vv_b) | ||
428 | -GEN_VEXT_VV(vwmul_vv_h) | ||
429 | -GEN_VEXT_VV(vwmul_vv_w) | ||
430 | -GEN_VEXT_VV(vwmulu_vv_b) | ||
431 | -GEN_VEXT_VV(vwmulu_vv_h) | ||
432 | -GEN_VEXT_VV(vwmulu_vv_w) | ||
433 | -GEN_VEXT_VV(vwmulsu_vv_b) | ||
434 | -GEN_VEXT_VV(vwmulsu_vv_h) | ||
435 | -GEN_VEXT_VV(vwmulsu_vv_w) | ||
436 | +GEN_VEXT_VV(vwmul_vv_b, 2) | ||
437 | +GEN_VEXT_VV(vwmul_vv_h, 4) | ||
438 | +GEN_VEXT_VV(vwmul_vv_w, 8) | ||
439 | +GEN_VEXT_VV(vwmulu_vv_b, 2) | ||
440 | +GEN_VEXT_VV(vwmulu_vv_h, 4) | ||
441 | +GEN_VEXT_VV(vwmulu_vv_w, 8) | ||
442 | +GEN_VEXT_VV(vwmulsu_vv_b, 2) | ||
443 | +GEN_VEXT_VV(vwmulsu_vv_h, 4) | ||
444 | +GEN_VEXT_VV(vwmulsu_vv_w, 8) | ||
445 | |||
446 | RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) | ||
447 | RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) | ||
448 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) | ||
449 | RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) | ||
450 | RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) | ||
451 | RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) | ||
452 | -GEN_VEXT_VV(vmacc_vv_b) | ||
453 | -GEN_VEXT_VV(vmacc_vv_h) | ||
454 | -GEN_VEXT_VV(vmacc_vv_w) | ||
455 | -GEN_VEXT_VV(vmacc_vv_d) | ||
456 | -GEN_VEXT_VV(vnmsac_vv_b) | ||
457 | -GEN_VEXT_VV(vnmsac_vv_h) | ||
458 | -GEN_VEXT_VV(vnmsac_vv_w) | ||
459 | -GEN_VEXT_VV(vnmsac_vv_d) | ||
460 | -GEN_VEXT_VV(vmadd_vv_b) | ||
461 | -GEN_VEXT_VV(vmadd_vv_h) | ||
462 | -GEN_VEXT_VV(vmadd_vv_w) | ||
463 | -GEN_VEXT_VV(vmadd_vv_d) | ||
464 | -GEN_VEXT_VV(vnmsub_vv_b) | ||
465 | -GEN_VEXT_VV(vnmsub_vv_h) | ||
466 | -GEN_VEXT_VV(vnmsub_vv_w) | ||
467 | -GEN_VEXT_VV(vnmsub_vv_d) | ||
468 | +GEN_VEXT_VV(vmacc_vv_b, 1) | ||
469 | +GEN_VEXT_VV(vmacc_vv_h, 2) | ||
470 | +GEN_VEXT_VV(vmacc_vv_w, 4) | ||
471 | +GEN_VEXT_VV(vmacc_vv_d, 8) | ||
472 | +GEN_VEXT_VV(vnmsac_vv_b, 1) | ||
473 | +GEN_VEXT_VV(vnmsac_vv_h, 2) | ||
474 | +GEN_VEXT_VV(vnmsac_vv_w, 4) | ||
475 | +GEN_VEXT_VV(vnmsac_vv_d, 8) | ||
476 | +GEN_VEXT_VV(vmadd_vv_b, 1) | ||
477 | +GEN_VEXT_VV(vmadd_vv_h, 2) | ||
478 | +GEN_VEXT_VV(vmadd_vv_w, 4) | ||
479 | +GEN_VEXT_VV(vmadd_vv_d, 8) | ||
480 | +GEN_VEXT_VV(vnmsub_vv_b, 1) | ||
481 | +GEN_VEXT_VV(vnmsub_vv_h, 2) | ||
482 | +GEN_VEXT_VV(vnmsub_vv_w, 4) | ||
483 | +GEN_VEXT_VV(vnmsub_vv_d, 8) | ||
484 | |||
485 | #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
486 | static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ | ||
487 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) | ||
488 | RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) | ||
489 | RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) | ||
490 | RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) | ||
491 | -GEN_VEXT_VV(vwmaccu_vv_b) | ||
492 | -GEN_VEXT_VV(vwmaccu_vv_h) | ||
493 | -GEN_VEXT_VV(vwmaccu_vv_w) | ||
494 | -GEN_VEXT_VV(vwmacc_vv_b) | ||
495 | -GEN_VEXT_VV(vwmacc_vv_h) | ||
496 | -GEN_VEXT_VV(vwmacc_vv_w) | ||
497 | -GEN_VEXT_VV(vwmaccsu_vv_b) | ||
498 | -GEN_VEXT_VV(vwmaccsu_vv_h) | ||
499 | -GEN_VEXT_VV(vwmaccsu_vv_w) | ||
500 | +GEN_VEXT_VV(vwmaccu_vv_b, 2) | ||
501 | +GEN_VEXT_VV(vwmaccu_vv_h, 4) | ||
502 | +GEN_VEXT_VV(vwmaccu_vv_w, 8) | ||
503 | +GEN_VEXT_VV(vwmacc_vv_b, 2) | ||
504 | +GEN_VEXT_VV(vwmacc_vv_h, 4) | ||
505 | +GEN_VEXT_VV(vwmacc_vv_w, 8) | ||
506 | +GEN_VEXT_VV(vwmaccsu_vv_b, 2) | ||
507 | +GEN_VEXT_VV(vwmaccsu_vv_h, 4) | ||
508 | +GEN_VEXT_VV(vwmaccsu_vv_w, 8) | ||
509 | |||
510 | RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) | ||
511 | RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) | ||
512 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
513 | index XXXXXXX..XXXXXXX 100644 | ||
514 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
515 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
516 | @@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, | ||
517 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
518 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); | ||
519 | |||
520 | - if (a->vm && s->vl_eq_vlmax) { | ||
521 | + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
522 | gvec_fn(s->sew, vreg_ofs(s, a->rd), | ||
523 | vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), | ||
524 | MAXSZ(s), MAXSZ(s)); | ||
525 | @@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, | ||
526 | |||
527 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
528 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
529 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
530 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
531 | vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), | ||
532 | cpu_env, s->cfg_ptr->vlen / 8, | ||
533 | -- | ||
534 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: eopXD <yueh.ting.chen@gmail.com> | |
2 | |||
3 | Destination register of unit-stride mask load and store instructions are | ||
4 | always written with a tail-agnostic policy. | ||
5 | |||
6 | A vector segment load / store instruction may contain fractional lmul | ||
7 | with nf * lmul > 1. The rest of the elements in the last register should | ||
8 | be treated as tail elements. | ||
9 | |||
10 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
11 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
12 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
13 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
14 | Message-Id: <165449614532.19704.7000832880482980398-6@git.sr.ht> | ||
15 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
16 | --- | ||
17 | target/riscv/translate.c | 2 + | ||
18 | target/riscv/vector_helper.c | 60 +++++++++++++++++++++++++ | ||
19 | target/riscv/insn_trans/trans_rvv.c.inc | 6 +++ | ||
20 | 3 files changed, 68 insertions(+) | ||
21 | |||
22 | diff --git a/target/riscv/translate.c b/target/riscv/translate.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/target/riscv/translate.c | ||
25 | +++ b/target/riscv/translate.c | ||
26 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
27 | int8_t lmul; | ||
28 | uint8_t sew; | ||
29 | uint8_t vta; | ||
30 | + bool cfg_vta_all_1s; | ||
31 | target_ulong vstart; | ||
32 | bool vl_eq_vlmax; | ||
33 | uint8_t ntemp; | ||
34 | @@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) | ||
35 | ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); | ||
36 | ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3); | ||
37 | ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s; | ||
38 | + ctx->cfg_vta_all_1s = cpu->cfg.rvv_ta_all_1s; | ||
39 | ctx->vstart = env->vstart; | ||
40 | ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); | ||
41 | ctx->misa_mxl_max = env->misa_mxl_max; | ||
42 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/riscv/vector_helper.c | ||
45 | +++ b/target/riscv/vector_helper.c | ||
46 | @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, | ||
47 | uint32_t i, k; | ||
48 | uint32_t nf = vext_nf(desc); | ||
49 | uint32_t max_elems = vext_max_elems(desc, log2_esz); | ||
50 | + uint32_t esz = 1 << log2_esz; | ||
51 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
52 | + uint32_t vta = vext_vta(desc); | ||
53 | |||
54 | for (i = env->vstart; i < env->vl; i++, env->vstart++) { | ||
55 | if (!vm && !vext_elem_mask(v0, i)) { | ||
56 | @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, | ||
57 | } | ||
58 | } | ||
59 | env->vstart = 0; | ||
60 | + /* set tail elements to 1s */ | ||
61 | + for (k = 0; k < nf; ++k) { | ||
62 | + vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, | ||
63 | + (k * max_elems + max_elems) * esz); | ||
64 | + } | ||
65 | + if (nf * max_elems % total_elems != 0) { | ||
66 | + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; | ||
67 | + uint32_t registers_used = | ||
68 | + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; | ||
69 | + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, | ||
70 | + registers_used * vlenb); | ||
71 | + } | ||
72 | } | ||
73 | |||
74 | #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ | ||
75 | @@ -XXX,XX +XXX,XX @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
76 | uint32_t i, k; | ||
77 | uint32_t nf = vext_nf(desc); | ||
78 | uint32_t max_elems = vext_max_elems(desc, log2_esz); | ||
79 | + uint32_t esz = 1 << log2_esz; | ||
80 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
81 | + uint32_t vta = vext_vta(desc); | ||
82 | |||
83 | /* load bytes from guest memory */ | ||
84 | for (i = env->vstart; i < evl; i++, env->vstart++) { | ||
85 | @@ -XXX,XX +XXX,XX @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, | ||
86 | } | ||
87 | } | ||
88 | env->vstart = 0; | ||
89 | + /* set tail elements to 1s */ | ||
90 | + for (k = 0; k < nf; ++k) { | ||
91 | + vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz, | ||
92 | + (k * max_elems + max_elems) * esz); | ||
93 | + } | ||
94 | + if (nf * max_elems % total_elems != 0) { | ||
95 | + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; | ||
96 | + uint32_t registers_used = | ||
97 | + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; | ||
98 | + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, | ||
99 | + registers_used * vlenb); | ||
100 | + } | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, | ||
105 | uint32_t nf = vext_nf(desc); | ||
106 | uint32_t vm = vext_vm(desc); | ||
107 | uint32_t max_elems = vext_max_elems(desc, log2_esz); | ||
108 | + uint32_t esz = 1 << log2_esz; | ||
109 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
110 | + uint32_t vta = vext_vta(desc); | ||
111 | |||
112 | /* load bytes from guest memory */ | ||
113 | for (i = env->vstart; i < env->vl; i++, env->vstart++) { | ||
114 | @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, | ||
115 | } | ||
116 | } | ||
117 | env->vstart = 0; | ||
118 | + /* set tail elements to 1s */ | ||
119 | + for (k = 0; k < nf; ++k) { | ||
120 | + vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, | ||
121 | + (k * max_elems + max_elems) * esz); | ||
122 | + } | ||
123 | + if (nf * max_elems % total_elems != 0) { | ||
124 | + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; | ||
125 | + uint32_t registers_used = | ||
126 | + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; | ||
127 | + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, | ||
128 | + registers_used * vlenb); | ||
129 | + } | ||
130 | } | ||
131 | |||
132 | #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ | ||
133 | @@ -XXX,XX +XXX,XX @@ vext_ldff(void *vd, void *v0, target_ulong base, | ||
134 | uint32_t nf = vext_nf(desc); | ||
135 | uint32_t vm = vext_vm(desc); | ||
136 | uint32_t max_elems = vext_max_elems(desc, log2_esz); | ||
137 | + uint32_t esz = 1 << log2_esz; | ||
138 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
139 | + uint32_t vta = vext_vta(desc); | ||
140 | target_ulong addr, offset, remain; | ||
141 | |||
142 | /* probe every access*/ | ||
143 | @@ -XXX,XX +XXX,XX @@ ProbeSuccess: | ||
144 | } | ||
145 | } | ||
146 | env->vstart = 0; | ||
147 | + /* set tail elements to 1s */ | ||
148 | + for (k = 0; k < nf; ++k) { | ||
149 | + vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, | ||
150 | + (k * max_elems + max_elems) * esz); | ||
151 | + } | ||
152 | + if (nf * max_elems % total_elems != 0) { | ||
153 | + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; | ||
154 | + uint32_t registers_used = | ||
155 | + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; | ||
156 | + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, | ||
157 | + registers_used * vlenb); | ||
158 | + } | ||
159 | } | ||
160 | |||
161 | #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ | ||
162 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
163 | index XXXXXXX..XXXXXXX 100644 | ||
164 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
165 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
166 | @@ -XXX,XX +XXX,XX @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) | ||
167 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
168 | data = FIELD_DP32(data, VDATA, LMUL, emul); | ||
169 | data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
170 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
171 | return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); | ||
172 | } | ||
173 | |||
174 | @@ -XXX,XX +XXX,XX @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew) | ||
175 | /* EMUL = 1, NFIELDS = 1 */ | ||
176 | data = FIELD_DP32(data, VDATA, LMUL, 0); | ||
177 | data = FIELD_DP32(data, VDATA, NF, 1); | ||
178 | + /* Mask destination register are always tail-agnostic */ | ||
179 | + data = FIELD_DP32(data, VDATA, VTA, s->cfg_vta_all_1s); | ||
180 | return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); | ||
181 | } | ||
182 | |||
183 | @@ -XXX,XX +XXX,XX @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) | ||
184 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
185 | data = FIELD_DP32(data, VDATA, LMUL, emul); | ||
186 | data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
187 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
188 | return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false); | ||
189 | } | ||
190 | |||
191 | @@ -XXX,XX +XXX,XX @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) | ||
192 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
193 | data = FIELD_DP32(data, VDATA, LMUL, emul); | ||
194 | data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
195 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
196 | return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false); | ||
197 | } | ||
198 | |||
199 | @@ -XXX,XX +XXX,XX @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) | ||
200 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
201 | data = FIELD_DP32(data, VDATA, LMUL, emul); | ||
202 | data = FIELD_DP32(data, VDATA, NF, a->nf); | ||
203 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
204 | return ldff_trans(a->rd, a->rs1, data, fn, s); | ||
205 | } | ||
206 | |||
207 | -- | ||
208 | 2.36.1 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <f4bug@amsat.org> | 1 | From: eopXD <yueh.ting.chen@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Once a "One Time Programmable" is programmed, it shouldn't be reset. | 3 | `vmadc` and `vmsbc` produces a mask value, they always operate with |
4 | a tail agnostic policy. | ||
4 | 5 | ||
5 | Do not re-initialize the OTP content in the DeviceReset handler, | 6 | Signed-off-by: eop Chen <eop.chen@sifive.com> |
6 | initialize it once in the DeviceRealize one. | 7 | Reviewed-by: Frank Chang <frank.chang@sifive.com> |
7 | 8 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | |
8 | Fixes: 9fb45c62ae8 ("riscv: sifive: Implement a model for SiFive FU540 OTP") | 9 | Acked-by: Alistair Francis <alistair.francis@wdc.com> |
9 | Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 10 | Message-Id: <165449614532.19704.7000832880482980398-7@git.sr.ht> |
10 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
11 | Message-Id: <20211119104757.331579-1-f4bug@amsat.org> | ||
12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
13 | --- | 12 | --- |
14 | hw/misc/sifive_u_otp.c | 13 +++++-------- | 13 | target/riscv/internals.h | 5 +- |
15 | 1 file changed, 5 insertions(+), 8 deletions(-) | 14 | target/riscv/vector_helper.c | 314 +++++++++++++----------- |
15 | target/riscv/insn_trans/trans_rvv.c.inc | 13 +- | ||
16 | 3 files changed, 190 insertions(+), 142 deletions(-) | ||
16 | 17 | ||
17 | diff --git a/hw/misc/sifive_u_otp.c b/hw/misc/sifive_u_otp.c | 18 | diff --git a/target/riscv/internals.h b/target/riscv/internals.h |
18 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/hw/misc/sifive_u_otp.c | 20 | --- a/target/riscv/internals.h |
20 | +++ b/hw/misc/sifive_u_otp.c | 21 | +++ b/target/riscv/internals.h |
21 | @@ -XXX,XX +XXX,XX @@ static void sifive_u_otp_realize(DeviceState *dev, Error **errp) | 22 | @@ -XXX,XX +XXX,XX @@ |
22 | 23 | FIELD(VDATA, VM, 0, 1) | |
23 | if (blk_pread(s->blk, 0, s->fuse, filesize) != filesize) { | 24 | FIELD(VDATA, LMUL, 1, 3) |
24 | error_setg(errp, "failed to read the initial flash content"); | 25 | FIELD(VDATA, VTA, 4, 1) |
25 | + return; | 26 | -FIELD(VDATA, NF, 5, 4) |
26 | } | 27 | -FIELD(VDATA, WD, 5, 1) |
27 | } | 28 | +FIELD(VDATA, VTA_ALL_1S, 5, 1) |
29 | +FIELD(VDATA, NF, 6, 4) | ||
30 | +FIELD(VDATA, WD, 6, 1) | ||
31 | |||
32 | /* float point classify helpers */ | ||
33 | target_ulong fclass_h(uint64_t frs1); | ||
34 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/target/riscv/vector_helper.c | ||
37 | +++ b/target/riscv/vector_helper.c | ||
38 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_vta(uint32_t desc) | ||
39 | return FIELD_EX32(simd_data(desc), VDATA, VTA); | ||
40 | } | ||
41 | |||
42 | +static inline uint32_t vext_vta_all_1s(uint32_t desc) | ||
43 | +{ | ||
44 | + return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); | ||
45 | +} | ||
46 | + | ||
47 | /* | ||
48 | * Get the maximum number of elements can be operated. | ||
49 | * | ||
50 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) | ||
51 | |||
52 | static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, | ||
53 | CPURISCVState *env, uint32_t desc, | ||
54 | - opivx2_fn fn) | ||
55 | + opivx2_fn fn, uint32_t esz) | ||
56 | { | ||
57 | uint32_t vm = vext_vm(desc); | ||
58 | uint32_t vl = env->vl; | ||
59 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
60 | + uint32_t vta = vext_vta(desc); | ||
61 | uint32_t i; | ||
62 | |||
63 | for (i = env->vstart; i < vl; i++) { | ||
64 | @@ -XXX,XX +XXX,XX @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, | ||
65 | fn(vd, s1, vs2, i); | ||
28 | } | 66 | } |
67 | env->vstart = 0; | ||
68 | + /* set tail elements to 1s */ | ||
69 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); | ||
70 | } | ||
71 | |||
72 | /* generate the helpers for OPIVX */ | ||
73 | -#define GEN_VEXT_VX(NAME) \ | ||
74 | +#define GEN_VEXT_VX(NAME, ESZ) \ | ||
75 | void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
76 | void *vs2, CPURISCVState *env, \ | ||
77 | uint32_t desc) \ | ||
78 | { \ | ||
79 | do_vext_vx(vd, v0, s1, vs2, env, desc, \ | ||
80 | - do_##NAME); \ | ||
29 | -} | 81 | -} |
30 | - | 82 | - |
31 | -static void sifive_u_otp_reset(DeviceState *dev) | 83 | -GEN_VEXT_VX(vadd_vx_b) |
32 | -{ | 84 | -GEN_VEXT_VX(vadd_vx_h) |
33 | - SiFiveUOTPState *s = SIFIVE_U_OTP(dev); | 85 | -GEN_VEXT_VX(vadd_vx_w) |
34 | 86 | -GEN_VEXT_VX(vadd_vx_d) | |
35 | /* Initialize all fuses' initial value to 0xFFs */ | 87 | -GEN_VEXT_VX(vsub_vx_b) |
36 | memset(s->fuse, 0xff, sizeof(s->fuse)); | 88 | -GEN_VEXT_VX(vsub_vx_h) |
37 | @@ -XXX,XX +XXX,XX @@ static void sifive_u_otp_reset(DeviceState *dev) | 89 | -GEN_VEXT_VX(vsub_vx_w) |
38 | serial_data = s->serial; | 90 | -GEN_VEXT_VX(vsub_vx_d) |
39 | if (blk_pwrite(s->blk, index * SIFIVE_U_OTP_FUSE_WORD, | 91 | -GEN_VEXT_VX(vrsub_vx_b) |
40 | &serial_data, SIFIVE_U_OTP_FUSE_WORD, 0) < 0) { | 92 | -GEN_VEXT_VX(vrsub_vx_h) |
41 | - error_report("write error index<%d>", index); | 93 | -GEN_VEXT_VX(vrsub_vx_w) |
42 | + error_setg(errp, "failed to write index<%d>", index); | 94 | -GEN_VEXT_VX(vrsub_vx_d) |
43 | + return; | 95 | + do_##NAME, ESZ); \ |
44 | } | 96 | +} |
45 | 97 | + | |
46 | serial_data = ~(s->serial); | 98 | +GEN_VEXT_VX(vadd_vx_b, 1) |
47 | if (blk_pwrite(s->blk, (index + 1) * SIFIVE_U_OTP_FUSE_WORD, | 99 | +GEN_VEXT_VX(vadd_vx_h, 2) |
48 | &serial_data, SIFIVE_U_OTP_FUSE_WORD, 0) < 0) { | 100 | +GEN_VEXT_VX(vadd_vx_w, 4) |
49 | - error_report("write error index<%d>", index + 1); | 101 | +GEN_VEXT_VX(vadd_vx_d, 8) |
50 | + error_setg(errp, "failed to write index<%d>", index + 1); | 102 | +GEN_VEXT_VX(vsub_vx_b, 1) |
51 | + return; | 103 | +GEN_VEXT_VX(vsub_vx_h, 2) |
52 | } | 104 | +GEN_VEXT_VX(vsub_vx_w, 4) |
105 | +GEN_VEXT_VX(vsub_vx_d, 8) | ||
106 | +GEN_VEXT_VX(vrsub_vx_b, 1) | ||
107 | +GEN_VEXT_VX(vrsub_vx_h, 2) | ||
108 | +GEN_VEXT_VX(vrsub_vx_w, 4) | ||
109 | +GEN_VEXT_VX(vrsub_vx_d, 8) | ||
110 | |||
111 | void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) | ||
112 | { | ||
113 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) | ||
114 | RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) | ||
115 | RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) | ||
116 | RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) | ||
117 | -GEN_VEXT_VX(vwaddu_vx_b) | ||
118 | -GEN_VEXT_VX(vwaddu_vx_h) | ||
119 | -GEN_VEXT_VX(vwaddu_vx_w) | ||
120 | -GEN_VEXT_VX(vwsubu_vx_b) | ||
121 | -GEN_VEXT_VX(vwsubu_vx_h) | ||
122 | -GEN_VEXT_VX(vwsubu_vx_w) | ||
123 | -GEN_VEXT_VX(vwadd_vx_b) | ||
124 | -GEN_VEXT_VX(vwadd_vx_h) | ||
125 | -GEN_VEXT_VX(vwadd_vx_w) | ||
126 | -GEN_VEXT_VX(vwsub_vx_b) | ||
127 | -GEN_VEXT_VX(vwsub_vx_h) | ||
128 | -GEN_VEXT_VX(vwsub_vx_w) | ||
129 | -GEN_VEXT_VX(vwaddu_wx_b) | ||
130 | -GEN_VEXT_VX(vwaddu_wx_h) | ||
131 | -GEN_VEXT_VX(vwaddu_wx_w) | ||
132 | -GEN_VEXT_VX(vwsubu_wx_b) | ||
133 | -GEN_VEXT_VX(vwsubu_wx_h) | ||
134 | -GEN_VEXT_VX(vwsubu_wx_w) | ||
135 | -GEN_VEXT_VX(vwadd_wx_b) | ||
136 | -GEN_VEXT_VX(vwadd_wx_h) | ||
137 | -GEN_VEXT_VX(vwadd_wx_w) | ||
138 | -GEN_VEXT_VX(vwsub_wx_b) | ||
139 | -GEN_VEXT_VX(vwsub_wx_h) | ||
140 | -GEN_VEXT_VX(vwsub_wx_w) | ||
141 | +GEN_VEXT_VX(vwaddu_vx_b, 2) | ||
142 | +GEN_VEXT_VX(vwaddu_vx_h, 4) | ||
143 | +GEN_VEXT_VX(vwaddu_vx_w, 8) | ||
144 | +GEN_VEXT_VX(vwsubu_vx_b, 2) | ||
145 | +GEN_VEXT_VX(vwsubu_vx_h, 4) | ||
146 | +GEN_VEXT_VX(vwsubu_vx_w, 8) | ||
147 | +GEN_VEXT_VX(vwadd_vx_b, 2) | ||
148 | +GEN_VEXT_VX(vwadd_vx_h, 4) | ||
149 | +GEN_VEXT_VX(vwadd_vx_w, 8) | ||
150 | +GEN_VEXT_VX(vwsub_vx_b, 2) | ||
151 | +GEN_VEXT_VX(vwsub_vx_h, 4) | ||
152 | +GEN_VEXT_VX(vwsub_vx_w, 8) | ||
153 | +GEN_VEXT_VX(vwaddu_wx_b, 2) | ||
154 | +GEN_VEXT_VX(vwaddu_wx_h, 4) | ||
155 | +GEN_VEXT_VX(vwaddu_wx_w, 8) | ||
156 | +GEN_VEXT_VX(vwsubu_wx_b, 2) | ||
157 | +GEN_VEXT_VX(vwsubu_wx_h, 4) | ||
158 | +GEN_VEXT_VX(vwsubu_wx_w, 8) | ||
159 | +GEN_VEXT_VX(vwadd_wx_b, 2) | ||
160 | +GEN_VEXT_VX(vwadd_wx_h, 4) | ||
161 | +GEN_VEXT_VX(vwadd_wx_w, 8) | ||
162 | +GEN_VEXT_VX(vwsub_wx_b, 2) | ||
163 | +GEN_VEXT_VX(vwsub_wx_h, 4) | ||
164 | +GEN_VEXT_VX(vwsub_wx_w, 8) | ||
165 | |||
166 | /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ | ||
167 | #define DO_VADC(N, M, C) (N + M + C) | ||
168 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
169 | CPURISCVState *env, uint32_t desc) \ | ||
170 | { \ | ||
171 | uint32_t vl = env->vl; \ | ||
172 | + uint32_t esz = sizeof(ETYPE); \ | ||
173 | + uint32_t total_elems = \ | ||
174 | + vext_get_total_elems(env, desc, esz); \ | ||
175 | + uint32_t vta = vext_vta(desc); \ | ||
176 | uint32_t i; \ | ||
177 | \ | ||
178 | for (i = env->vstart; i < vl; i++) { \ | ||
179 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
180 | *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ | ||
181 | } \ | ||
182 | env->vstart = 0; \ | ||
183 | + /* set tail elements to 1s */ \ | ||
184 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
185 | } | ||
186 | |||
187 | GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) | ||
188 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
189 | CPURISCVState *env, uint32_t desc) \ | ||
190 | { \ | ||
191 | uint32_t vl = env->vl; \ | ||
192 | + uint32_t esz = sizeof(ETYPE); \ | ||
193 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
194 | + uint32_t vta = vext_vta(desc); \ | ||
195 | uint32_t i; \ | ||
196 | \ | ||
197 | for (i = env->vstart; i < vl; i++) { \ | ||
198 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
199 | *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ | ||
200 | } \ | ||
201 | env->vstart = 0; \ | ||
202 | + /* set tail elements to 1s */ \ | ||
203 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
204 | } | ||
205 | |||
206 | GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) | ||
207 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
208 | { \ | ||
209 | uint32_t vl = env->vl; \ | ||
210 | uint32_t vm = vext_vm(desc); \ | ||
211 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ | ||
212 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ | ||
213 | uint32_t i; \ | ||
214 | \ | ||
215 | for (i = env->vstart; i < vl; i++) { \ | ||
216 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
217 | vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ | ||
218 | } \ | ||
219 | env->vstart = 0; \ | ||
220 | + /* mask destination register are always tail-agnostic */ \ | ||
221 | + /* set tail elements to 1s */ \ | ||
222 | + if (vta_all_1s) { \ | ||
223 | + for (; i < total_elems; i++) { \ | ||
224 | + vext_set_elem_mask(vd, i, 1); \ | ||
225 | + } \ | ||
226 | + } \ | ||
227 | } | ||
228 | |||
229 | GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) | ||
230 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
231 | { \ | ||
232 | uint32_t vl = env->vl; \ | ||
233 | uint32_t vm = vext_vm(desc); \ | ||
234 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ | ||
235 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ | ||
236 | uint32_t i; \ | ||
237 | \ | ||
238 | for (i = env->vstart; i < vl; i++) { \ | ||
239 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
240 | DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ | ||
241 | } \ | ||
242 | env->vstart = 0; \ | ||
243 | + /* mask destination register are always tail-agnostic */ \ | ||
244 | + /* set tail elements to 1s */ \ | ||
245 | + if (vta_all_1s) { \ | ||
246 | + for (; i < total_elems; i++) { \ | ||
247 | + vext_set_elem_mask(vd, i, 1); \ | ||
248 | + } \ | ||
249 | + } \ | ||
250 | } | ||
251 | |||
252 | GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) | ||
253 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) | ||
254 | RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) | ||
255 | RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) | ||
256 | RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) | ||
257 | -GEN_VEXT_VX(vand_vx_b) | ||
258 | -GEN_VEXT_VX(vand_vx_h) | ||
259 | -GEN_VEXT_VX(vand_vx_w) | ||
260 | -GEN_VEXT_VX(vand_vx_d) | ||
261 | -GEN_VEXT_VX(vor_vx_b) | ||
262 | -GEN_VEXT_VX(vor_vx_h) | ||
263 | -GEN_VEXT_VX(vor_vx_w) | ||
264 | -GEN_VEXT_VX(vor_vx_d) | ||
265 | -GEN_VEXT_VX(vxor_vx_b) | ||
266 | -GEN_VEXT_VX(vxor_vx_h) | ||
267 | -GEN_VEXT_VX(vxor_vx_w) | ||
268 | -GEN_VEXT_VX(vxor_vx_d) | ||
269 | +GEN_VEXT_VX(vand_vx_b, 1) | ||
270 | +GEN_VEXT_VX(vand_vx_h, 2) | ||
271 | +GEN_VEXT_VX(vand_vx_w, 4) | ||
272 | +GEN_VEXT_VX(vand_vx_d, 8) | ||
273 | +GEN_VEXT_VX(vor_vx_b, 1) | ||
274 | +GEN_VEXT_VX(vor_vx_h, 2) | ||
275 | +GEN_VEXT_VX(vor_vx_w, 4) | ||
276 | +GEN_VEXT_VX(vor_vx_d, 8) | ||
277 | +GEN_VEXT_VX(vxor_vx_b, 1) | ||
278 | +GEN_VEXT_VX(vxor_vx_h, 2) | ||
279 | +GEN_VEXT_VX(vxor_vx_w, 4) | ||
280 | +GEN_VEXT_VX(vxor_vx_d, 8) | ||
281 | |||
282 | /* Vector Single-Width Bit Shift Instructions */ | ||
283 | #define DO_SLL(N, M) (N << (M)) | ||
284 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) | ||
285 | RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) | ||
286 | RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) | ||
287 | RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) | ||
288 | -GEN_VEXT_VX(vminu_vx_b) | ||
289 | -GEN_VEXT_VX(vminu_vx_h) | ||
290 | -GEN_VEXT_VX(vminu_vx_w) | ||
291 | -GEN_VEXT_VX(vminu_vx_d) | ||
292 | -GEN_VEXT_VX(vmin_vx_b) | ||
293 | -GEN_VEXT_VX(vmin_vx_h) | ||
294 | -GEN_VEXT_VX(vmin_vx_w) | ||
295 | -GEN_VEXT_VX(vmin_vx_d) | ||
296 | -GEN_VEXT_VX(vmaxu_vx_b) | ||
297 | -GEN_VEXT_VX(vmaxu_vx_h) | ||
298 | -GEN_VEXT_VX(vmaxu_vx_w) | ||
299 | -GEN_VEXT_VX(vmaxu_vx_d) | ||
300 | -GEN_VEXT_VX(vmax_vx_b) | ||
301 | -GEN_VEXT_VX(vmax_vx_h) | ||
302 | -GEN_VEXT_VX(vmax_vx_w) | ||
303 | -GEN_VEXT_VX(vmax_vx_d) | ||
304 | +GEN_VEXT_VX(vminu_vx_b, 1) | ||
305 | +GEN_VEXT_VX(vminu_vx_h, 2) | ||
306 | +GEN_VEXT_VX(vminu_vx_w, 4) | ||
307 | +GEN_VEXT_VX(vminu_vx_d, 8) | ||
308 | +GEN_VEXT_VX(vmin_vx_b, 1) | ||
309 | +GEN_VEXT_VX(vmin_vx_h, 2) | ||
310 | +GEN_VEXT_VX(vmin_vx_w, 4) | ||
311 | +GEN_VEXT_VX(vmin_vx_d, 8) | ||
312 | +GEN_VEXT_VX(vmaxu_vx_b, 1) | ||
313 | +GEN_VEXT_VX(vmaxu_vx_h, 2) | ||
314 | +GEN_VEXT_VX(vmaxu_vx_w, 4) | ||
315 | +GEN_VEXT_VX(vmaxu_vx_d, 8) | ||
316 | +GEN_VEXT_VX(vmax_vx_b, 1) | ||
317 | +GEN_VEXT_VX(vmax_vx_h, 2) | ||
318 | +GEN_VEXT_VX(vmax_vx_w, 4) | ||
319 | +GEN_VEXT_VX(vmax_vx_d, 8) | ||
320 | |||
321 | /* Vector Single-Width Integer Multiply Instructions */ | ||
322 | #define DO_MUL(N, M) (N * M) | ||
323 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) | ||
324 | RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) | ||
325 | RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) | ||
326 | RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) | ||
327 | -GEN_VEXT_VX(vmul_vx_b) | ||
328 | -GEN_VEXT_VX(vmul_vx_h) | ||
329 | -GEN_VEXT_VX(vmul_vx_w) | ||
330 | -GEN_VEXT_VX(vmul_vx_d) | ||
331 | -GEN_VEXT_VX(vmulh_vx_b) | ||
332 | -GEN_VEXT_VX(vmulh_vx_h) | ||
333 | -GEN_VEXT_VX(vmulh_vx_w) | ||
334 | -GEN_VEXT_VX(vmulh_vx_d) | ||
335 | -GEN_VEXT_VX(vmulhu_vx_b) | ||
336 | -GEN_VEXT_VX(vmulhu_vx_h) | ||
337 | -GEN_VEXT_VX(vmulhu_vx_w) | ||
338 | -GEN_VEXT_VX(vmulhu_vx_d) | ||
339 | -GEN_VEXT_VX(vmulhsu_vx_b) | ||
340 | -GEN_VEXT_VX(vmulhsu_vx_h) | ||
341 | -GEN_VEXT_VX(vmulhsu_vx_w) | ||
342 | -GEN_VEXT_VX(vmulhsu_vx_d) | ||
343 | +GEN_VEXT_VX(vmul_vx_b, 1) | ||
344 | +GEN_VEXT_VX(vmul_vx_h, 2) | ||
345 | +GEN_VEXT_VX(vmul_vx_w, 4) | ||
346 | +GEN_VEXT_VX(vmul_vx_d, 8) | ||
347 | +GEN_VEXT_VX(vmulh_vx_b, 1) | ||
348 | +GEN_VEXT_VX(vmulh_vx_h, 2) | ||
349 | +GEN_VEXT_VX(vmulh_vx_w, 4) | ||
350 | +GEN_VEXT_VX(vmulh_vx_d, 8) | ||
351 | +GEN_VEXT_VX(vmulhu_vx_b, 1) | ||
352 | +GEN_VEXT_VX(vmulhu_vx_h, 2) | ||
353 | +GEN_VEXT_VX(vmulhu_vx_w, 4) | ||
354 | +GEN_VEXT_VX(vmulhu_vx_d, 8) | ||
355 | +GEN_VEXT_VX(vmulhsu_vx_b, 1) | ||
356 | +GEN_VEXT_VX(vmulhsu_vx_h, 2) | ||
357 | +GEN_VEXT_VX(vmulhsu_vx_w, 4) | ||
358 | +GEN_VEXT_VX(vmulhsu_vx_d, 8) | ||
359 | |||
360 | /* Vector Integer Divide Instructions */ | ||
361 | #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) | ||
362 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) | ||
363 | RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) | ||
364 | RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) | ||
365 | RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) | ||
366 | -GEN_VEXT_VX(vdivu_vx_b) | ||
367 | -GEN_VEXT_VX(vdivu_vx_h) | ||
368 | -GEN_VEXT_VX(vdivu_vx_w) | ||
369 | -GEN_VEXT_VX(vdivu_vx_d) | ||
370 | -GEN_VEXT_VX(vdiv_vx_b) | ||
371 | -GEN_VEXT_VX(vdiv_vx_h) | ||
372 | -GEN_VEXT_VX(vdiv_vx_w) | ||
373 | -GEN_VEXT_VX(vdiv_vx_d) | ||
374 | -GEN_VEXT_VX(vremu_vx_b) | ||
375 | -GEN_VEXT_VX(vremu_vx_h) | ||
376 | -GEN_VEXT_VX(vremu_vx_w) | ||
377 | -GEN_VEXT_VX(vremu_vx_d) | ||
378 | -GEN_VEXT_VX(vrem_vx_b) | ||
379 | -GEN_VEXT_VX(vrem_vx_h) | ||
380 | -GEN_VEXT_VX(vrem_vx_w) | ||
381 | -GEN_VEXT_VX(vrem_vx_d) | ||
382 | +GEN_VEXT_VX(vdivu_vx_b, 1) | ||
383 | +GEN_VEXT_VX(vdivu_vx_h, 2) | ||
384 | +GEN_VEXT_VX(vdivu_vx_w, 4) | ||
385 | +GEN_VEXT_VX(vdivu_vx_d, 8) | ||
386 | +GEN_VEXT_VX(vdiv_vx_b, 1) | ||
387 | +GEN_VEXT_VX(vdiv_vx_h, 2) | ||
388 | +GEN_VEXT_VX(vdiv_vx_w, 4) | ||
389 | +GEN_VEXT_VX(vdiv_vx_d, 8) | ||
390 | +GEN_VEXT_VX(vremu_vx_b, 1) | ||
391 | +GEN_VEXT_VX(vremu_vx_h, 2) | ||
392 | +GEN_VEXT_VX(vremu_vx_w, 4) | ||
393 | +GEN_VEXT_VX(vremu_vx_d, 8) | ||
394 | +GEN_VEXT_VX(vrem_vx_b, 1) | ||
395 | +GEN_VEXT_VX(vrem_vx_h, 2) | ||
396 | +GEN_VEXT_VX(vrem_vx_w, 4) | ||
397 | +GEN_VEXT_VX(vrem_vx_d, 8) | ||
398 | |||
399 | /* Vector Widening Integer Multiply Instructions */ | ||
400 | RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) | ||
401 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) | ||
402 | RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) | ||
403 | RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) | ||
404 | RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) | ||
405 | -GEN_VEXT_VX(vwmul_vx_b) | ||
406 | -GEN_VEXT_VX(vwmul_vx_h) | ||
407 | -GEN_VEXT_VX(vwmul_vx_w) | ||
408 | -GEN_VEXT_VX(vwmulu_vx_b) | ||
409 | -GEN_VEXT_VX(vwmulu_vx_h) | ||
410 | -GEN_VEXT_VX(vwmulu_vx_w) | ||
411 | -GEN_VEXT_VX(vwmulsu_vx_b) | ||
412 | -GEN_VEXT_VX(vwmulsu_vx_h) | ||
413 | -GEN_VEXT_VX(vwmulsu_vx_w) | ||
414 | +GEN_VEXT_VX(vwmul_vx_b, 2) | ||
415 | +GEN_VEXT_VX(vwmul_vx_h, 4) | ||
416 | +GEN_VEXT_VX(vwmul_vx_w, 8) | ||
417 | +GEN_VEXT_VX(vwmulu_vx_b, 2) | ||
418 | +GEN_VEXT_VX(vwmulu_vx_h, 4) | ||
419 | +GEN_VEXT_VX(vwmulu_vx_w, 8) | ||
420 | +GEN_VEXT_VX(vwmulsu_vx_b, 2) | ||
421 | +GEN_VEXT_VX(vwmulsu_vx_h, 4) | ||
422 | +GEN_VEXT_VX(vwmulsu_vx_w, 8) | ||
423 | |||
424 | /* Vector Single-Width Integer Multiply-Add Instructions */ | ||
425 | #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
426 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) | ||
427 | RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) | ||
428 | RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) | ||
429 | RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) | ||
430 | -GEN_VEXT_VX(vmacc_vx_b) | ||
431 | -GEN_VEXT_VX(vmacc_vx_h) | ||
432 | -GEN_VEXT_VX(vmacc_vx_w) | ||
433 | -GEN_VEXT_VX(vmacc_vx_d) | ||
434 | -GEN_VEXT_VX(vnmsac_vx_b) | ||
435 | -GEN_VEXT_VX(vnmsac_vx_h) | ||
436 | -GEN_VEXT_VX(vnmsac_vx_w) | ||
437 | -GEN_VEXT_VX(vnmsac_vx_d) | ||
438 | -GEN_VEXT_VX(vmadd_vx_b) | ||
439 | -GEN_VEXT_VX(vmadd_vx_h) | ||
440 | -GEN_VEXT_VX(vmadd_vx_w) | ||
441 | -GEN_VEXT_VX(vmadd_vx_d) | ||
442 | -GEN_VEXT_VX(vnmsub_vx_b) | ||
443 | -GEN_VEXT_VX(vnmsub_vx_h) | ||
444 | -GEN_VEXT_VX(vnmsub_vx_w) | ||
445 | -GEN_VEXT_VX(vnmsub_vx_d) | ||
446 | +GEN_VEXT_VX(vmacc_vx_b, 1) | ||
447 | +GEN_VEXT_VX(vmacc_vx_h, 2) | ||
448 | +GEN_VEXT_VX(vmacc_vx_w, 4) | ||
449 | +GEN_VEXT_VX(vmacc_vx_d, 8) | ||
450 | +GEN_VEXT_VX(vnmsac_vx_b, 1) | ||
451 | +GEN_VEXT_VX(vnmsac_vx_h, 2) | ||
452 | +GEN_VEXT_VX(vnmsac_vx_w, 4) | ||
453 | +GEN_VEXT_VX(vnmsac_vx_d, 8) | ||
454 | +GEN_VEXT_VX(vmadd_vx_b, 1) | ||
455 | +GEN_VEXT_VX(vmadd_vx_h, 2) | ||
456 | +GEN_VEXT_VX(vmadd_vx_w, 4) | ||
457 | +GEN_VEXT_VX(vmadd_vx_d, 8) | ||
458 | +GEN_VEXT_VX(vnmsub_vx_b, 1) | ||
459 | +GEN_VEXT_VX(vnmsub_vx_h, 2) | ||
460 | +GEN_VEXT_VX(vnmsub_vx_w, 4) | ||
461 | +GEN_VEXT_VX(vnmsub_vx_d, 8) | ||
462 | |||
463 | /* Vector Widening Integer Multiply-Add Instructions */ | ||
464 | RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) | ||
465 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) | ||
466 | RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) | ||
467 | RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) | ||
468 | RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) | ||
469 | -GEN_VEXT_VX(vwmaccu_vx_b) | ||
470 | -GEN_VEXT_VX(vwmaccu_vx_h) | ||
471 | -GEN_VEXT_VX(vwmaccu_vx_w) | ||
472 | -GEN_VEXT_VX(vwmacc_vx_b) | ||
473 | -GEN_VEXT_VX(vwmacc_vx_h) | ||
474 | -GEN_VEXT_VX(vwmacc_vx_w) | ||
475 | -GEN_VEXT_VX(vwmaccsu_vx_b) | ||
476 | -GEN_VEXT_VX(vwmaccsu_vx_h) | ||
477 | -GEN_VEXT_VX(vwmaccsu_vx_w) | ||
478 | -GEN_VEXT_VX(vwmaccus_vx_b) | ||
479 | -GEN_VEXT_VX(vwmaccus_vx_h) | ||
480 | -GEN_VEXT_VX(vwmaccus_vx_w) | ||
481 | +GEN_VEXT_VX(vwmaccu_vx_b, 2) | ||
482 | +GEN_VEXT_VX(vwmaccu_vx_h, 4) | ||
483 | +GEN_VEXT_VX(vwmaccu_vx_w, 8) | ||
484 | +GEN_VEXT_VX(vwmacc_vx_b, 2) | ||
485 | +GEN_VEXT_VX(vwmacc_vx_h, 4) | ||
486 | +GEN_VEXT_VX(vwmacc_vx_w, 8) | ||
487 | +GEN_VEXT_VX(vwmaccsu_vx_b, 2) | ||
488 | +GEN_VEXT_VX(vwmaccsu_vx_h, 4) | ||
489 | +GEN_VEXT_VX(vwmaccsu_vx_w, 8) | ||
490 | +GEN_VEXT_VX(vwmaccus_vx_b, 2) | ||
491 | +GEN_VEXT_VX(vwmaccus_vx_h, 4) | ||
492 | +GEN_VEXT_VX(vwmaccus_vx_w, 8) | ||
493 | |||
494 | /* Vector Integer Merge and Move Instructions */ | ||
495 | #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ | ||
496 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
497 | index XXXXXXX..XXXXXXX 100644 | ||
498 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
499 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
500 | @@ -XXX,XX +XXX,XX @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, | ||
501 | |||
502 | data = FIELD_DP32(data, VDATA, VM, vm); | ||
503 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
504 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
505 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); | ||
506 | desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8, | ||
507 | s->cfg_ptr->vlen / 8, data)); | ||
508 | |||
509 | @@ -XXX,XX +XXX,XX @@ do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, | ||
510 | return false; | ||
53 | } | 511 | } |
54 | 512 | ||
55 | @@ -XXX,XX +XXX,XX @@ static void sifive_u_otp_class_init(ObjectClass *klass, void *data) | 513 | - if (a->vm && s->vl_eq_vlmax) { |
56 | 514 | + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | |
57 | device_class_set_props(dc, sifive_u_otp_properties); | 515 | TCGv_i64 src1 = tcg_temp_new_i64(); |
58 | dc->realize = sifive_u_otp_realize; | 516 | |
59 | - dc->reset = sifive_u_otp_reset; | 517 | tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN)); |
60 | } | 518 | @@ -XXX,XX +XXX,XX @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, |
61 | 519 | ||
62 | static const TypeInfo sifive_u_otp_info = { | 520 | data = FIELD_DP32(data, VDATA, VM, vm); |
521 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
522 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
523 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); | ||
524 | desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8, | ||
525 | s->cfg_ptr->vlen / 8, data)); | ||
526 | |||
527 | @@ -XXX,XX +XXX,XX @@ do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, | ||
528 | return false; | ||
529 | } | ||
530 | |||
531 | - if (a->vm && s->vl_eq_vlmax) { | ||
532 | + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
533 | gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), | ||
534 | extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s)); | ||
535 | mark_vs_dirty(s); | ||
536 | @@ -XXX,XX +XXX,XX @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, | ||
537 | |||
538 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
539 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
540 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
541 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
542 | vreg_ofs(s, a->rs1), | ||
543 | vreg_ofs(s, a->rs2), | ||
544 | @@ -XXX,XX +XXX,XX @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, | ||
545 | |||
546 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
547 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
548 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
549 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
550 | vreg_ofs(s, a->rs1), | ||
551 | vreg_ofs(s, a->rs2), | ||
552 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
553 | \ | ||
554 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
555 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
556 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
557 | + data = \ | ||
558 | + FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ | ||
559 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
560 | vreg_ofs(s, a->rs1), \ | ||
561 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
63 | -- | 562 | -- |
64 | 2.31.1 | 563 | 2.36.1 |
65 | |||
66 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: eopXD <yueh.ting.chen@gmail.com> | ||
1 | 2 | ||
3 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
4 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
5 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
6 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
7 | Message-Id: <165449614532.19704.7000832880482980398-8@git.sr.ht> | ||
8 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
9 | --- | ||
10 | target/riscv/vector_helper.c | 11 +++++++++++ | ||
11 | target/riscv/insn_trans/trans_rvv.c.inc | 3 ++- | ||
12 | 2 files changed, 13 insertions(+), 1 deletion(-) | ||
13 | |||
14 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/riscv/vector_helper.c | ||
17 | +++ b/target/riscv/vector_helper.c | ||
18 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
19 | { \ | ||
20 | uint32_t vm = vext_vm(desc); \ | ||
21 | uint32_t vl = env->vl; \ | ||
22 | + uint32_t esz = sizeof(TS1); \ | ||
23 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
24 | + uint32_t vta = vext_vta(desc); \ | ||
25 | uint32_t i; \ | ||
26 | \ | ||
27 | for (i = env->vstart; i < vl; i++) { \ | ||
28 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
29 | *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ | ||
30 | } \ | ||
31 | env->vstart = 0; \ | ||
32 | + /* set tail elements to 1s */ \ | ||
33 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
34 | } | ||
35 | |||
36 | GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) | ||
37 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
38 | { \ | ||
39 | uint32_t vm = vext_vm(desc); \ | ||
40 | uint32_t vl = env->vl; \ | ||
41 | + uint32_t esz = sizeof(TD); \ | ||
42 | + uint32_t total_elems = \ | ||
43 | + vext_get_total_elems(env, desc, esz); \ | ||
44 | + uint32_t vta = vext_vta(desc); \ | ||
45 | uint32_t i; \ | ||
46 | \ | ||
47 | for (i = env->vstart; i < vl; i++) { \ | ||
48 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
49 | *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ | ||
50 | } \ | ||
51 | env->vstart = 0; \ | ||
52 | + /* set tail elements to 1s */ \ | ||
53 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ | ||
54 | } | ||
55 | |||
56 | GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) | ||
57 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
60 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
61 | @@ -XXX,XX +XXX,XX @@ do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, | ||
62 | return false; | ||
63 | } | ||
64 | |||
65 | - if (a->vm && s->vl_eq_vlmax) { | ||
66 | + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
67 | TCGv_i32 src1 = tcg_temp_new_i32(); | ||
68 | |||
69 | tcg_gen_trunc_tl_i32(src1, get_gpr(s, a->rs1, EXT_NONE)); | ||
70 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
71 | \ | ||
72 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
73 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
74 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
75 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
76 | vreg_ofs(s, a->rs1), \ | ||
77 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
78 | -- | ||
79 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: eopXD <yueh.ting.chen@gmail.com> | ||
1 | 2 | ||
3 | Compares write mask registers, and so always operate under a tail- | ||
4 | agnostic policy. | ||
5 | |||
6 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
7 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
8 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
9 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
10 | Message-Id: <165449614532.19704.7000832880482980398-9@git.sr.ht> | ||
11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
12 | --- | ||
13 | target/riscv/vector_helper.c | 18 ++++++++++++++++++ | ||
14 | 1 file changed, 18 insertions(+) | ||
15 | |||
16 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/riscv/vector_helper.c | ||
19 | +++ b/target/riscv/vector_helper.c | ||
20 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
21 | { \ | ||
22 | uint32_t vm = vext_vm(desc); \ | ||
23 | uint32_t vl = env->vl; \ | ||
24 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ | ||
25 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ | ||
26 | uint32_t i; \ | ||
27 | \ | ||
28 | for (i = env->vstart; i < vl; i++) { \ | ||
29 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
30 | vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ | ||
31 | } \ | ||
32 | env->vstart = 0; \ | ||
33 | + /* mask destination register are always tail-agnostic */ \ | ||
34 | + /* set tail elements to 1s */ \ | ||
35 | + if (vta_all_1s) { \ | ||
36 | + for (; i < total_elems; i++) { \ | ||
37 | + vext_set_elem_mask(vd, i, 1); \ | ||
38 | + } \ | ||
39 | + } \ | ||
40 | } | ||
41 | |||
42 | GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) | ||
43 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
44 | { \ | ||
45 | uint32_t vm = vext_vm(desc); \ | ||
46 | uint32_t vl = env->vl; \ | ||
47 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ | ||
48 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ | ||
49 | uint32_t i; \ | ||
50 | \ | ||
51 | for (i = env->vstart; i < vl; i++) { \ | ||
52 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
53 | DO_OP(s2, (ETYPE)(target_long)s1)); \ | ||
54 | } \ | ||
55 | env->vstart = 0; \ | ||
56 | + /* mask destination register are always tail-agnostic */ \ | ||
57 | + /* set tail elements to 1s */ \ | ||
58 | + if (vta_all_1s) { \ | ||
59 | + for (; i < total_elems; i++) { \ | ||
60 | + vext_set_elem_mask(vd, i, 1); \ | ||
61 | + } \ | ||
62 | + } \ | ||
63 | } | ||
64 | |||
65 | GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) | ||
66 | -- | ||
67 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: eopXD <yueh.ting.chen@gmail.com> | ||
1 | 2 | ||
3 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
4 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
5 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
6 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
7 | Message-Id: <165449614532.19704.7000832880482980398-10@git.sr.ht> | ||
8 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
9 | --- | ||
10 | target/riscv/vector_helper.c | 20 ++++++++++++++++++++ | ||
11 | target/riscv/insn_trans/trans_rvv.c.inc | 12 ++++++++---- | ||
12 | 2 files changed, 28 insertions(+), 4 deletions(-) | ||
13 | |||
14 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/riscv/vector_helper.c | ||
17 | +++ b/target/riscv/vector_helper.c | ||
18 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ | ||
19 | uint32_t desc) \ | ||
20 | { \ | ||
21 | uint32_t vl = env->vl; \ | ||
22 | + uint32_t esz = sizeof(ETYPE); \ | ||
23 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
24 | + uint32_t vta = vext_vta(desc); \ | ||
25 | uint32_t i; \ | ||
26 | \ | ||
27 | for (i = env->vstart; i < vl; i++) { \ | ||
28 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ | ||
29 | *((ETYPE *)vd + H(i)) = s1; \ | ||
30 | } \ | ||
31 | env->vstart = 0; \ | ||
32 | + /* set tail elements to 1s */ \ | ||
33 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
34 | } | ||
35 | |||
36 | GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) | ||
37 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ | ||
38 | uint32_t desc) \ | ||
39 | { \ | ||
40 | uint32_t vl = env->vl; \ | ||
41 | + uint32_t esz = sizeof(ETYPE); \ | ||
42 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
43 | + uint32_t vta = vext_vta(desc); \ | ||
44 | uint32_t i; \ | ||
45 | \ | ||
46 | for (i = env->vstart; i < vl; i++) { \ | ||
47 | *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ | ||
48 | } \ | ||
49 | env->vstart = 0; \ | ||
50 | + /* set tail elements to 1s */ \ | ||
51 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
52 | } | ||
53 | |||
54 | GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) | ||
55 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
56 | CPURISCVState *env, uint32_t desc) \ | ||
57 | { \ | ||
58 | uint32_t vl = env->vl; \ | ||
59 | + uint32_t esz = sizeof(ETYPE); \ | ||
60 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
61 | + uint32_t vta = vext_vta(desc); \ | ||
62 | uint32_t i; \ | ||
63 | \ | ||
64 | for (i = env->vstart; i < vl; i++) { \ | ||
65 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
66 | *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ | ||
67 | } \ | ||
68 | env->vstart = 0; \ | ||
69 | + /* set tail elements to 1s */ \ | ||
70 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
71 | } | ||
72 | |||
73 | GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) | ||
74 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
75 | void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
76 | { \ | ||
77 | uint32_t vl = env->vl; \ | ||
78 | + uint32_t esz = sizeof(ETYPE); \ | ||
79 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
80 | + uint32_t vta = vext_vta(desc); \ | ||
81 | uint32_t i; \ | ||
82 | \ | ||
83 | for (i = env->vstart; i < vl; i++) { \ | ||
84 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
85 | *((ETYPE *)vd + H(i)) = d; \ | ||
86 | } \ | ||
87 | env->vstart = 0; \ | ||
88 | + /* set tail elements to 1s */ \ | ||
89 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
90 | } | ||
91 | |||
92 | GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) | ||
93 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
96 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
97 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) | ||
98 | vext_check_isa_ill(s) && | ||
99 | /* vmv.v.v has rs2 = 0 and vm = 1 */ | ||
100 | vext_check_sss(s, a->rd, a->rs1, 0, 1)) { | ||
101 | - if (s->vl_eq_vlmax) { | ||
102 | + if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
103 | tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd), | ||
104 | vreg_ofs(s, a->rs1), | ||
105 | MAXSZ(s), MAXSZ(s)); | ||
106 | } else { | ||
107 | uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); | ||
108 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
109 | static gen_helper_gvec_2_ptr * const fns[4] = { | ||
110 | gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, | ||
111 | gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, | ||
112 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) | ||
113 | |||
114 | s1 = get_gpr(s, a->rs1, EXT_SIGN); | ||
115 | |||
116 | - if (s->vl_eq_vlmax) { | ||
117 | + if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
118 | tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), | ||
119 | MAXSZ(s), MAXSZ(s), s1); | ||
120 | } else { | ||
121 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) | ||
122 | TCGv_i64 s1_i64 = tcg_temp_new_i64(); | ||
123 | TCGv_ptr dest = tcg_temp_new_ptr(); | ||
124 | uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); | ||
125 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
126 | static gen_helper_vmv_vx * const fns[4] = { | ||
127 | gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, | ||
128 | gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, | ||
129 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) | ||
130 | /* vmv.v.i has rs2 = 0 and vm = 1 */ | ||
131 | vext_check_ss(s, a->rd, 0, 1)) { | ||
132 | int64_t simm = sextract64(a->rs1, 0, 5); | ||
133 | - if (s->vl_eq_vlmax) { | ||
134 | + if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
135 | tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd), | ||
136 | MAXSZ(s), MAXSZ(s), simm); | ||
137 | mark_vs_dirty(s); | ||
138 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) | ||
139 | TCGv_i64 s1; | ||
140 | TCGv_ptr dest; | ||
141 | uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); | ||
142 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
143 | static gen_helper_vmv_vx * const fns[4] = { | ||
144 | gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, | ||
145 | gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, | ||
146 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) | ||
147 | |||
148 | TCGv_i64 t1; | ||
149 | |||
150 | - if (s->vl_eq_vlmax) { | ||
151 | + if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
152 | t1 = tcg_temp_new_i64(); | ||
153 | /* NaN-box f[rs1] */ | ||
154 | do_nanbox(s, t1, cpu_fpr[a->rs1]); | ||
155 | @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) | ||
156 | TCGv_ptr dest; | ||
157 | TCGv_i32 desc; | ||
158 | uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); | ||
159 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
160 | static gen_helper_vmv_vx * const fns[3] = { | ||
161 | gen_helper_vmv_v_x_h, | ||
162 | gen_helper_vmv_v_x_w, | ||
163 | -- | ||
164 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: eopXD <yueh.ting.chen@gmail.com> | ||
1 | 2 | ||
3 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
4 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
5 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
6 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
7 | Message-Id: <165449614532.19704.7000832880482980398-11@git.sr.ht> | ||
8 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
9 | --- | ||
10 | target/riscv/vector_helper.c | 220 ++++++++++++++++++----------------- | ||
11 | 1 file changed, 114 insertions(+), 106 deletions(-) | ||
12 | |||
13 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/riscv/vector_helper.c | ||
16 | +++ b/target/riscv/vector_helper.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static inline void | ||
18 | vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, | ||
19 | CPURISCVState *env, | ||
20 | uint32_t desc, | ||
21 | - opivv2_rm_fn *fn) | ||
22 | + opivv2_rm_fn *fn, uint32_t esz) | ||
23 | { | ||
24 | uint32_t vm = vext_vm(desc); | ||
25 | uint32_t vl = env->vl; | ||
26 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
27 | + uint32_t vta = vext_vta(desc); | ||
28 | |||
29 | switch (env->vxrm) { | ||
30 | case 0: /* rnu */ | ||
31 | @@ -XXX,XX +XXX,XX @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, | ||
32 | env, vl, vm, 3, fn); | ||
33 | break; | ||
34 | } | ||
35 | + /* set tail elements to 1s */ | ||
36 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); | ||
37 | } | ||
38 | |||
39 | /* generate helpers for fixed point instructions with OPIVV format */ | ||
40 | -#define GEN_VEXT_VV_RM(NAME) \ | ||
41 | +#define GEN_VEXT_VV_RM(NAME, ESZ) \ | ||
42 | void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
43 | CPURISCVState *env, uint32_t desc) \ | ||
44 | { \ | ||
45 | vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ | ||
46 | - do_##NAME); \ | ||
47 | + do_##NAME, ESZ); \ | ||
48 | } | ||
49 | |||
50 | static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) | ||
51 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) | ||
52 | RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) | ||
53 | RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) | ||
54 | RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) | ||
55 | -GEN_VEXT_VV_RM(vsaddu_vv_b) | ||
56 | -GEN_VEXT_VV_RM(vsaddu_vv_h) | ||
57 | -GEN_VEXT_VV_RM(vsaddu_vv_w) | ||
58 | -GEN_VEXT_VV_RM(vsaddu_vv_d) | ||
59 | +GEN_VEXT_VV_RM(vsaddu_vv_b, 1) | ||
60 | +GEN_VEXT_VV_RM(vsaddu_vv_h, 2) | ||
61 | +GEN_VEXT_VV_RM(vsaddu_vv_w, 4) | ||
62 | +GEN_VEXT_VV_RM(vsaddu_vv_d, 8) | ||
63 | |||
64 | typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, | ||
65 | CPURISCVState *env, int vxrm); | ||
66 | @@ -XXX,XX +XXX,XX @@ static inline void | ||
67 | vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, | ||
68 | CPURISCVState *env, | ||
69 | uint32_t desc, | ||
70 | - opivx2_rm_fn *fn) | ||
71 | + opivx2_rm_fn *fn, uint32_t esz) | ||
72 | { | ||
73 | uint32_t vm = vext_vm(desc); | ||
74 | uint32_t vl = env->vl; | ||
75 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); | ||
76 | + uint32_t vta = vext_vta(desc); | ||
77 | |||
78 | switch (env->vxrm) { | ||
79 | case 0: /* rnu */ | ||
80 | @@ -XXX,XX +XXX,XX @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, | ||
81 | env, vl, vm, 3, fn); | ||
82 | break; | ||
83 | } | ||
84 | + /* set tail elements to 1s */ | ||
85 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); | ||
86 | } | ||
87 | |||
88 | /* generate helpers for fixed point instructions with OPIVX format */ | ||
89 | -#define GEN_VEXT_VX_RM(NAME) \ | ||
90 | +#define GEN_VEXT_VX_RM(NAME, ESZ) \ | ||
91 | void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ | ||
92 | void *vs2, CPURISCVState *env, uint32_t desc) \ | ||
93 | { \ | ||
94 | vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ | ||
95 | - do_##NAME); \ | ||
96 | + do_##NAME, ESZ); \ | ||
97 | } | ||
98 | |||
99 | RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) | ||
100 | RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) | ||
101 | RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) | ||
102 | RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) | ||
103 | -GEN_VEXT_VX_RM(vsaddu_vx_b) | ||
104 | -GEN_VEXT_VX_RM(vsaddu_vx_h) | ||
105 | -GEN_VEXT_VX_RM(vsaddu_vx_w) | ||
106 | -GEN_VEXT_VX_RM(vsaddu_vx_d) | ||
107 | +GEN_VEXT_VX_RM(vsaddu_vx_b, 1) | ||
108 | +GEN_VEXT_VX_RM(vsaddu_vx_h, 2) | ||
109 | +GEN_VEXT_VX_RM(vsaddu_vx_w, 4) | ||
110 | +GEN_VEXT_VX_RM(vsaddu_vx_d, 8) | ||
111 | |||
112 | static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
113 | { | ||
114 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) | ||
115 | RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) | ||
116 | RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) | ||
117 | RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) | ||
118 | -GEN_VEXT_VV_RM(vsadd_vv_b) | ||
119 | -GEN_VEXT_VV_RM(vsadd_vv_h) | ||
120 | -GEN_VEXT_VV_RM(vsadd_vv_w) | ||
121 | -GEN_VEXT_VV_RM(vsadd_vv_d) | ||
122 | +GEN_VEXT_VV_RM(vsadd_vv_b, 1) | ||
123 | +GEN_VEXT_VV_RM(vsadd_vv_h, 2) | ||
124 | +GEN_VEXT_VV_RM(vsadd_vv_w, 4) | ||
125 | +GEN_VEXT_VV_RM(vsadd_vv_d, 8) | ||
126 | |||
127 | RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) | ||
128 | RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) | ||
129 | RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) | ||
130 | RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) | ||
131 | -GEN_VEXT_VX_RM(vsadd_vx_b) | ||
132 | -GEN_VEXT_VX_RM(vsadd_vx_h) | ||
133 | -GEN_VEXT_VX_RM(vsadd_vx_w) | ||
134 | -GEN_VEXT_VX_RM(vsadd_vx_d) | ||
135 | +GEN_VEXT_VX_RM(vsadd_vx_b, 1) | ||
136 | +GEN_VEXT_VX_RM(vsadd_vx_h, 2) | ||
137 | +GEN_VEXT_VX_RM(vsadd_vx_w, 4) | ||
138 | +GEN_VEXT_VX_RM(vsadd_vx_d, 8) | ||
139 | |||
140 | static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) | ||
141 | { | ||
142 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) | ||
143 | RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) | ||
144 | RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) | ||
145 | RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) | ||
146 | -GEN_VEXT_VV_RM(vssubu_vv_b) | ||
147 | -GEN_VEXT_VV_RM(vssubu_vv_h) | ||
148 | -GEN_VEXT_VV_RM(vssubu_vv_w) | ||
149 | -GEN_VEXT_VV_RM(vssubu_vv_d) | ||
150 | +GEN_VEXT_VV_RM(vssubu_vv_b, 1) | ||
151 | +GEN_VEXT_VV_RM(vssubu_vv_h, 2) | ||
152 | +GEN_VEXT_VV_RM(vssubu_vv_w, 4) | ||
153 | +GEN_VEXT_VV_RM(vssubu_vv_d, 8) | ||
154 | |||
155 | RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) | ||
156 | RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) | ||
157 | RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) | ||
158 | RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) | ||
159 | -GEN_VEXT_VX_RM(vssubu_vx_b) | ||
160 | -GEN_VEXT_VX_RM(vssubu_vx_h) | ||
161 | -GEN_VEXT_VX_RM(vssubu_vx_w) | ||
162 | -GEN_VEXT_VX_RM(vssubu_vx_d) | ||
163 | +GEN_VEXT_VX_RM(vssubu_vx_b, 1) | ||
164 | +GEN_VEXT_VX_RM(vssubu_vx_h, 2) | ||
165 | +GEN_VEXT_VX_RM(vssubu_vx_w, 4) | ||
166 | +GEN_VEXT_VX_RM(vssubu_vx_d, 8) | ||
167 | |||
168 | static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
169 | { | ||
170 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) | ||
171 | RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) | ||
172 | RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) | ||
173 | RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) | ||
174 | -GEN_VEXT_VV_RM(vssub_vv_b) | ||
175 | -GEN_VEXT_VV_RM(vssub_vv_h) | ||
176 | -GEN_VEXT_VV_RM(vssub_vv_w) | ||
177 | -GEN_VEXT_VV_RM(vssub_vv_d) | ||
178 | +GEN_VEXT_VV_RM(vssub_vv_b, 1) | ||
179 | +GEN_VEXT_VV_RM(vssub_vv_h, 2) | ||
180 | +GEN_VEXT_VV_RM(vssub_vv_w, 4) | ||
181 | +GEN_VEXT_VV_RM(vssub_vv_d, 8) | ||
182 | |||
183 | RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) | ||
184 | RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) | ||
185 | RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) | ||
186 | RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) | ||
187 | -GEN_VEXT_VX_RM(vssub_vx_b) | ||
188 | -GEN_VEXT_VX_RM(vssub_vx_h) | ||
189 | -GEN_VEXT_VX_RM(vssub_vx_w) | ||
190 | -GEN_VEXT_VX_RM(vssub_vx_d) | ||
191 | +GEN_VEXT_VX_RM(vssub_vx_b, 1) | ||
192 | +GEN_VEXT_VX_RM(vssub_vx_h, 2) | ||
193 | +GEN_VEXT_VX_RM(vssub_vx_w, 4) | ||
194 | +GEN_VEXT_VX_RM(vssub_vx_d, 8) | ||
195 | |||
196 | /* Vector Single-Width Averaging Add and Subtract */ | ||
197 | static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) | ||
198 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) | ||
199 | RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) | ||
200 | RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) | ||
201 | RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) | ||
202 | -GEN_VEXT_VV_RM(vaadd_vv_b) | ||
203 | -GEN_VEXT_VV_RM(vaadd_vv_h) | ||
204 | -GEN_VEXT_VV_RM(vaadd_vv_w) | ||
205 | -GEN_VEXT_VV_RM(vaadd_vv_d) | ||
206 | +GEN_VEXT_VV_RM(vaadd_vv_b, 1) | ||
207 | +GEN_VEXT_VV_RM(vaadd_vv_h, 2) | ||
208 | +GEN_VEXT_VV_RM(vaadd_vv_w, 4) | ||
209 | +GEN_VEXT_VV_RM(vaadd_vv_d, 8) | ||
210 | |||
211 | RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) | ||
212 | RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) | ||
213 | RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) | ||
214 | RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) | ||
215 | -GEN_VEXT_VX_RM(vaadd_vx_b) | ||
216 | -GEN_VEXT_VX_RM(vaadd_vx_h) | ||
217 | -GEN_VEXT_VX_RM(vaadd_vx_w) | ||
218 | -GEN_VEXT_VX_RM(vaadd_vx_d) | ||
219 | +GEN_VEXT_VX_RM(vaadd_vx_b, 1) | ||
220 | +GEN_VEXT_VX_RM(vaadd_vx_h, 2) | ||
221 | +GEN_VEXT_VX_RM(vaadd_vx_w, 4) | ||
222 | +GEN_VEXT_VX_RM(vaadd_vx_d, 8) | ||
223 | |||
224 | static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, | ||
225 | uint32_t a, uint32_t b) | ||
226 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) | ||
227 | RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) | ||
228 | RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) | ||
229 | RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) | ||
230 | -GEN_VEXT_VV_RM(vaaddu_vv_b) | ||
231 | -GEN_VEXT_VV_RM(vaaddu_vv_h) | ||
232 | -GEN_VEXT_VV_RM(vaaddu_vv_w) | ||
233 | -GEN_VEXT_VV_RM(vaaddu_vv_d) | ||
234 | +GEN_VEXT_VV_RM(vaaddu_vv_b, 1) | ||
235 | +GEN_VEXT_VV_RM(vaaddu_vv_h, 2) | ||
236 | +GEN_VEXT_VV_RM(vaaddu_vv_w, 4) | ||
237 | +GEN_VEXT_VV_RM(vaaddu_vv_d, 8) | ||
238 | |||
239 | RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) | ||
240 | RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) | ||
241 | RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) | ||
242 | RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) | ||
243 | -GEN_VEXT_VX_RM(vaaddu_vx_b) | ||
244 | -GEN_VEXT_VX_RM(vaaddu_vx_h) | ||
245 | -GEN_VEXT_VX_RM(vaaddu_vx_w) | ||
246 | -GEN_VEXT_VX_RM(vaaddu_vx_d) | ||
247 | +GEN_VEXT_VX_RM(vaaddu_vx_b, 1) | ||
248 | +GEN_VEXT_VX_RM(vaaddu_vx_h, 2) | ||
249 | +GEN_VEXT_VX_RM(vaaddu_vx_w, 4) | ||
250 | +GEN_VEXT_VX_RM(vaaddu_vx_d, 8) | ||
251 | |||
252 | static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) | ||
253 | { | ||
254 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) | ||
255 | RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) | ||
256 | RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) | ||
257 | RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) | ||
258 | -GEN_VEXT_VV_RM(vasub_vv_b) | ||
259 | -GEN_VEXT_VV_RM(vasub_vv_h) | ||
260 | -GEN_VEXT_VV_RM(vasub_vv_w) | ||
261 | -GEN_VEXT_VV_RM(vasub_vv_d) | ||
262 | +GEN_VEXT_VV_RM(vasub_vv_b, 1) | ||
263 | +GEN_VEXT_VV_RM(vasub_vv_h, 2) | ||
264 | +GEN_VEXT_VV_RM(vasub_vv_w, 4) | ||
265 | +GEN_VEXT_VV_RM(vasub_vv_d, 8) | ||
266 | |||
267 | RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) | ||
268 | RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) | ||
269 | RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) | ||
270 | RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) | ||
271 | -GEN_VEXT_VX_RM(vasub_vx_b) | ||
272 | -GEN_VEXT_VX_RM(vasub_vx_h) | ||
273 | -GEN_VEXT_VX_RM(vasub_vx_w) | ||
274 | -GEN_VEXT_VX_RM(vasub_vx_d) | ||
275 | +GEN_VEXT_VX_RM(vasub_vx_b, 1) | ||
276 | +GEN_VEXT_VX_RM(vasub_vx_h, 2) | ||
277 | +GEN_VEXT_VX_RM(vasub_vx_w, 4) | ||
278 | +GEN_VEXT_VX_RM(vasub_vx_d, 8) | ||
279 | |||
280 | static inline uint32_t asubu32(CPURISCVState *env, int vxrm, | ||
281 | uint32_t a, uint32_t b) | ||
282 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) | ||
283 | RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) | ||
284 | RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) | ||
285 | RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) | ||
286 | -GEN_VEXT_VV_RM(vasubu_vv_b) | ||
287 | -GEN_VEXT_VV_RM(vasubu_vv_h) | ||
288 | -GEN_VEXT_VV_RM(vasubu_vv_w) | ||
289 | -GEN_VEXT_VV_RM(vasubu_vv_d) | ||
290 | +GEN_VEXT_VV_RM(vasubu_vv_b, 1) | ||
291 | +GEN_VEXT_VV_RM(vasubu_vv_h, 2) | ||
292 | +GEN_VEXT_VV_RM(vasubu_vv_w, 4) | ||
293 | +GEN_VEXT_VV_RM(vasubu_vv_d, 8) | ||
294 | |||
295 | RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) | ||
296 | RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) | ||
297 | RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) | ||
298 | RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) | ||
299 | -GEN_VEXT_VX_RM(vasubu_vx_b) | ||
300 | -GEN_VEXT_VX_RM(vasubu_vx_h) | ||
301 | -GEN_VEXT_VX_RM(vasubu_vx_w) | ||
302 | -GEN_VEXT_VX_RM(vasubu_vx_d) | ||
303 | +GEN_VEXT_VX_RM(vasubu_vx_b, 1) | ||
304 | +GEN_VEXT_VX_RM(vasubu_vx_h, 2) | ||
305 | +GEN_VEXT_VX_RM(vasubu_vx_w, 4) | ||
306 | +GEN_VEXT_VX_RM(vasubu_vx_d, 8) | ||
307 | |||
308 | /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ | ||
309 | static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
310 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) | ||
311 | RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) | ||
312 | RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) | ||
313 | RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) | ||
314 | -GEN_VEXT_VV_RM(vsmul_vv_b) | ||
315 | -GEN_VEXT_VV_RM(vsmul_vv_h) | ||
316 | -GEN_VEXT_VV_RM(vsmul_vv_w) | ||
317 | -GEN_VEXT_VV_RM(vsmul_vv_d) | ||
318 | +GEN_VEXT_VV_RM(vsmul_vv_b, 1) | ||
319 | +GEN_VEXT_VV_RM(vsmul_vv_h, 2) | ||
320 | +GEN_VEXT_VV_RM(vsmul_vv_w, 4) | ||
321 | +GEN_VEXT_VV_RM(vsmul_vv_d, 8) | ||
322 | |||
323 | RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) | ||
324 | RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) | ||
325 | RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) | ||
326 | RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) | ||
327 | -GEN_VEXT_VX_RM(vsmul_vx_b) | ||
328 | -GEN_VEXT_VX_RM(vsmul_vx_h) | ||
329 | -GEN_VEXT_VX_RM(vsmul_vx_w) | ||
330 | -GEN_VEXT_VX_RM(vsmul_vx_d) | ||
331 | +GEN_VEXT_VX_RM(vsmul_vx_b, 1) | ||
332 | +GEN_VEXT_VX_RM(vsmul_vx_h, 2) | ||
333 | +GEN_VEXT_VX_RM(vsmul_vx_w, 4) | ||
334 | +GEN_VEXT_VX_RM(vsmul_vx_d, 8) | ||
335 | |||
336 | /* Vector Single-Width Scaling Shift Instructions */ | ||
337 | static inline uint8_t | ||
338 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) | ||
339 | RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) | ||
340 | RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) | ||
341 | RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) | ||
342 | -GEN_VEXT_VV_RM(vssrl_vv_b) | ||
343 | -GEN_VEXT_VV_RM(vssrl_vv_h) | ||
344 | -GEN_VEXT_VV_RM(vssrl_vv_w) | ||
345 | -GEN_VEXT_VV_RM(vssrl_vv_d) | ||
346 | +GEN_VEXT_VV_RM(vssrl_vv_b, 1) | ||
347 | +GEN_VEXT_VV_RM(vssrl_vv_h, 2) | ||
348 | +GEN_VEXT_VV_RM(vssrl_vv_w, 4) | ||
349 | +GEN_VEXT_VV_RM(vssrl_vv_d, 8) | ||
350 | |||
351 | RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) | ||
352 | RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) | ||
353 | RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) | ||
354 | RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) | ||
355 | -GEN_VEXT_VX_RM(vssrl_vx_b) | ||
356 | -GEN_VEXT_VX_RM(vssrl_vx_h) | ||
357 | -GEN_VEXT_VX_RM(vssrl_vx_w) | ||
358 | -GEN_VEXT_VX_RM(vssrl_vx_d) | ||
359 | +GEN_VEXT_VX_RM(vssrl_vx_b, 1) | ||
360 | +GEN_VEXT_VX_RM(vssrl_vx_h, 2) | ||
361 | +GEN_VEXT_VX_RM(vssrl_vx_w, 4) | ||
362 | +GEN_VEXT_VX_RM(vssrl_vx_d, 8) | ||
363 | |||
364 | static inline int8_t | ||
365 | vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) | ||
366 | @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) | ||
367 | RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) | ||
368 | RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) | ||
369 | RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) | ||
370 | -GEN_VEXT_VV_RM(vssra_vv_b) | ||
371 | -GEN_VEXT_VV_RM(vssra_vv_h) | ||
372 | -GEN_VEXT_VV_RM(vssra_vv_w) | ||
373 | -GEN_VEXT_VV_RM(vssra_vv_d) | ||
374 | +GEN_VEXT_VV_RM(vssra_vv_b, 1) | ||
375 | +GEN_VEXT_VV_RM(vssra_vv_h, 2) | ||
376 | +GEN_VEXT_VV_RM(vssra_vv_w, 4) | ||
377 | +GEN_VEXT_VV_RM(vssra_vv_d, 8) | ||
378 | |||
379 | RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) | ||
380 | RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) | ||
381 | RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) | ||
382 | RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) | ||
383 | -GEN_VEXT_VX_RM(vssra_vx_b) | ||
384 | -GEN_VEXT_VX_RM(vssra_vx_h) | ||
385 | -GEN_VEXT_VX_RM(vssra_vx_w) | ||
386 | -GEN_VEXT_VX_RM(vssra_vx_d) | ||
387 | +GEN_VEXT_VX_RM(vssra_vx_b, 1) | ||
388 | +GEN_VEXT_VX_RM(vssra_vx_h, 2) | ||
389 | +GEN_VEXT_VX_RM(vssra_vx_w, 4) | ||
390 | +GEN_VEXT_VX_RM(vssra_vx_d, 8) | ||
391 | |||
392 | /* Vector Narrowing Fixed-Point Clip Instructions */ | ||
393 | static inline int8_t | ||
394 | @@ -XXX,XX +XXX,XX @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) | ||
395 | RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) | ||
396 | RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) | ||
397 | RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) | ||
398 | -GEN_VEXT_VV_RM(vnclip_wv_b) | ||
399 | -GEN_VEXT_VV_RM(vnclip_wv_h) | ||
400 | -GEN_VEXT_VV_RM(vnclip_wv_w) | ||
401 | +GEN_VEXT_VV_RM(vnclip_wv_b, 1) | ||
402 | +GEN_VEXT_VV_RM(vnclip_wv_h, 2) | ||
403 | +GEN_VEXT_VV_RM(vnclip_wv_w, 4) | ||
404 | |||
405 | RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) | ||
406 | RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) | ||
407 | RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) | ||
408 | -GEN_VEXT_VX_RM(vnclip_wx_b) | ||
409 | -GEN_VEXT_VX_RM(vnclip_wx_h) | ||
410 | -GEN_VEXT_VX_RM(vnclip_wx_w) | ||
411 | +GEN_VEXT_VX_RM(vnclip_wx_b, 1) | ||
412 | +GEN_VEXT_VX_RM(vnclip_wx_h, 2) | ||
413 | +GEN_VEXT_VX_RM(vnclip_wx_w, 4) | ||
414 | |||
415 | static inline uint8_t | ||
416 | vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) | ||
417 | @@ -XXX,XX +XXX,XX @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) | ||
418 | RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) | ||
419 | RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) | ||
420 | RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) | ||
421 | -GEN_VEXT_VV_RM(vnclipu_wv_b) | ||
422 | -GEN_VEXT_VV_RM(vnclipu_wv_h) | ||
423 | -GEN_VEXT_VV_RM(vnclipu_wv_w) | ||
424 | +GEN_VEXT_VV_RM(vnclipu_wv_b, 1) | ||
425 | +GEN_VEXT_VV_RM(vnclipu_wv_h, 2) | ||
426 | +GEN_VEXT_VV_RM(vnclipu_wv_w, 4) | ||
427 | |||
428 | RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) | ||
429 | RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) | ||
430 | RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) | ||
431 | -GEN_VEXT_VX_RM(vnclipu_wx_b) | ||
432 | -GEN_VEXT_VX_RM(vnclipu_wx_h) | ||
433 | -GEN_VEXT_VX_RM(vnclipu_wx_w) | ||
434 | +GEN_VEXT_VX_RM(vnclipu_wx_b, 1) | ||
435 | +GEN_VEXT_VX_RM(vnclipu_wx_h, 2) | ||
436 | +GEN_VEXT_VX_RM(vnclipu_wx_w, 4) | ||
437 | |||
438 | /* | ||
439 | *** Vector Float Point Arithmetic Instructions | ||
440 | -- | ||
441 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: eopXD <yueh.ting.chen@gmail.com> | ||
1 | 2 | ||
3 | Compares write mask registers, and so always operate under a tail- | ||
4 | agnostic policy. | ||
5 | |||
6 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
7 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
8 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
9 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
10 | Message-Id: <165449614532.19704.7000832880482980398-12@git.sr.ht> | ||
11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
12 | --- | ||
13 | target/riscv/vector_helper.c | 440 +++++++++++++----------- | ||
14 | target/riscv/insn_trans/trans_rvv.c.inc | 17 + | ||
15 | 2 files changed, 261 insertions(+), 196 deletions(-) | ||
16 | |||
17 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/riscv/vector_helper.c | ||
20 | +++ b/target/riscv/vector_helper.c | ||
21 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ | ||
22 | *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ | ||
23 | } | ||
24 | |||
25 | -#define GEN_VEXT_VV_ENV(NAME) \ | ||
26 | +#define GEN_VEXT_VV_ENV(NAME, ESZ) \ | ||
27 | void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
28 | void *vs2, CPURISCVState *env, \ | ||
29 | uint32_t desc) \ | ||
30 | { \ | ||
31 | uint32_t vm = vext_vm(desc); \ | ||
32 | uint32_t vl = env->vl; \ | ||
33 | + uint32_t total_elems = \ | ||
34 | + vext_get_total_elems(env, desc, ESZ); \ | ||
35 | + uint32_t vta = vext_vta(desc); \ | ||
36 | uint32_t i; \ | ||
37 | \ | ||
38 | for (i = env->vstart; i < vl; i++) { \ | ||
39 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
40 | do_##NAME(vd, vs1, vs2, i, env); \ | ||
41 | } \ | ||
42 | env->vstart = 0; \ | ||
43 | + /* set tail elements to 1s */ \ | ||
44 | + vext_set_elems_1s(vd, vta, vl * ESZ, \ | ||
45 | + total_elems * ESZ); \ | ||
46 | } | ||
47 | |||
48 | RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) | ||
49 | RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) | ||
50 | RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) | ||
51 | -GEN_VEXT_VV_ENV(vfadd_vv_h) | ||
52 | -GEN_VEXT_VV_ENV(vfadd_vv_w) | ||
53 | -GEN_VEXT_VV_ENV(vfadd_vv_d) | ||
54 | +GEN_VEXT_VV_ENV(vfadd_vv_h, 2) | ||
55 | +GEN_VEXT_VV_ENV(vfadd_vv_w, 4) | ||
56 | +GEN_VEXT_VV_ENV(vfadd_vv_d, 8) | ||
57 | |||
58 | #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
59 | static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
60 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
61 | *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ | ||
62 | } | ||
63 | |||
64 | -#define GEN_VEXT_VF(NAME) \ | ||
65 | +#define GEN_VEXT_VF(NAME, ESZ) \ | ||
66 | void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ | ||
67 | void *vs2, CPURISCVState *env, \ | ||
68 | uint32_t desc) \ | ||
69 | { \ | ||
70 | uint32_t vm = vext_vm(desc); \ | ||
71 | uint32_t vl = env->vl; \ | ||
72 | + uint32_t total_elems = \ | ||
73 | + vext_get_total_elems(env, desc, ESZ); \ | ||
74 | + uint32_t vta = vext_vta(desc); \ | ||
75 | uint32_t i; \ | ||
76 | \ | ||
77 | for (i = env->vstart; i < vl; i++) { \ | ||
78 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ | ||
79 | do_##NAME(vd, s1, vs2, i, env); \ | ||
80 | } \ | ||
81 | env->vstart = 0; \ | ||
82 | + /* set tail elements to 1s */ \ | ||
83 | + vext_set_elems_1s(vd, vta, vl * ESZ, \ | ||
84 | + total_elems * ESZ); \ | ||
85 | } | ||
86 | |||
87 | RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) | ||
88 | RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) | ||
89 | RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) | ||
90 | -GEN_VEXT_VF(vfadd_vf_h) | ||
91 | -GEN_VEXT_VF(vfadd_vf_w) | ||
92 | -GEN_VEXT_VF(vfadd_vf_d) | ||
93 | +GEN_VEXT_VF(vfadd_vf_h, 2) | ||
94 | +GEN_VEXT_VF(vfadd_vf_w, 4) | ||
95 | +GEN_VEXT_VF(vfadd_vf_d, 8) | ||
96 | |||
97 | RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) | ||
98 | RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) | ||
99 | RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) | ||
100 | -GEN_VEXT_VV_ENV(vfsub_vv_h) | ||
101 | -GEN_VEXT_VV_ENV(vfsub_vv_w) | ||
102 | -GEN_VEXT_VV_ENV(vfsub_vv_d) | ||
103 | +GEN_VEXT_VV_ENV(vfsub_vv_h, 2) | ||
104 | +GEN_VEXT_VV_ENV(vfsub_vv_w, 4) | ||
105 | +GEN_VEXT_VV_ENV(vfsub_vv_d, 8) | ||
106 | RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) | ||
107 | RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) | ||
108 | RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) | ||
109 | -GEN_VEXT_VF(vfsub_vf_h) | ||
110 | -GEN_VEXT_VF(vfsub_vf_w) | ||
111 | -GEN_VEXT_VF(vfsub_vf_d) | ||
112 | +GEN_VEXT_VF(vfsub_vf_h, 2) | ||
113 | +GEN_VEXT_VF(vfsub_vf_w, 4) | ||
114 | +GEN_VEXT_VF(vfsub_vf_d, 8) | ||
115 | |||
116 | static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) | ||
117 | { | ||
118 | @@ -XXX,XX +XXX,XX @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) | ||
119 | RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) | ||
120 | RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) | ||
121 | RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) | ||
122 | -GEN_VEXT_VF(vfrsub_vf_h) | ||
123 | -GEN_VEXT_VF(vfrsub_vf_w) | ||
124 | -GEN_VEXT_VF(vfrsub_vf_d) | ||
125 | +GEN_VEXT_VF(vfrsub_vf_h, 2) | ||
126 | +GEN_VEXT_VF(vfrsub_vf_w, 4) | ||
127 | +GEN_VEXT_VF(vfrsub_vf_d, 8) | ||
128 | |||
129 | /* Vector Widening Floating-Point Add/Subtract Instructions */ | ||
130 | static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) | ||
131 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) | ||
132 | |||
133 | RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) | ||
134 | RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) | ||
135 | -GEN_VEXT_VV_ENV(vfwadd_vv_h) | ||
136 | -GEN_VEXT_VV_ENV(vfwadd_vv_w) | ||
137 | +GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) | ||
138 | +GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) | ||
139 | RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) | ||
140 | RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) | ||
141 | -GEN_VEXT_VF(vfwadd_vf_h) | ||
142 | -GEN_VEXT_VF(vfwadd_vf_w) | ||
143 | +GEN_VEXT_VF(vfwadd_vf_h, 4) | ||
144 | +GEN_VEXT_VF(vfwadd_vf_w, 8) | ||
145 | |||
146 | static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) | ||
147 | { | ||
148 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) | ||
149 | |||
150 | RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) | ||
151 | RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) | ||
152 | -GEN_VEXT_VV_ENV(vfwsub_vv_h) | ||
153 | -GEN_VEXT_VV_ENV(vfwsub_vv_w) | ||
154 | +GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) | ||
155 | +GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) | ||
156 | RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) | ||
157 | RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) | ||
158 | -GEN_VEXT_VF(vfwsub_vf_h) | ||
159 | -GEN_VEXT_VF(vfwsub_vf_w) | ||
160 | +GEN_VEXT_VF(vfwsub_vf_h, 4) | ||
161 | +GEN_VEXT_VF(vfwsub_vf_w, 8) | ||
162 | |||
163 | static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) | ||
164 | { | ||
165 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) | ||
166 | |||
167 | RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) | ||
168 | RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) | ||
169 | -GEN_VEXT_VV_ENV(vfwadd_wv_h) | ||
170 | -GEN_VEXT_VV_ENV(vfwadd_wv_w) | ||
171 | +GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) | ||
172 | +GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) | ||
173 | RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) | ||
174 | RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) | ||
175 | -GEN_VEXT_VF(vfwadd_wf_h) | ||
176 | -GEN_VEXT_VF(vfwadd_wf_w) | ||
177 | +GEN_VEXT_VF(vfwadd_wf_h, 4) | ||
178 | +GEN_VEXT_VF(vfwadd_wf_w, 8) | ||
179 | |||
180 | static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) | ||
181 | { | ||
182 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) | ||
183 | |||
184 | RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) | ||
185 | RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) | ||
186 | -GEN_VEXT_VV_ENV(vfwsub_wv_h) | ||
187 | -GEN_VEXT_VV_ENV(vfwsub_wv_w) | ||
188 | +GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) | ||
189 | +GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) | ||
190 | RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) | ||
191 | RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) | ||
192 | -GEN_VEXT_VF(vfwsub_wf_h) | ||
193 | -GEN_VEXT_VF(vfwsub_wf_w) | ||
194 | +GEN_VEXT_VF(vfwsub_wf_h, 4) | ||
195 | +GEN_VEXT_VF(vfwsub_wf_w, 8) | ||
196 | |||
197 | /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ | ||
198 | RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) | ||
199 | RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) | ||
200 | RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) | ||
201 | -GEN_VEXT_VV_ENV(vfmul_vv_h) | ||
202 | -GEN_VEXT_VV_ENV(vfmul_vv_w) | ||
203 | -GEN_VEXT_VV_ENV(vfmul_vv_d) | ||
204 | +GEN_VEXT_VV_ENV(vfmul_vv_h, 2) | ||
205 | +GEN_VEXT_VV_ENV(vfmul_vv_w, 4) | ||
206 | +GEN_VEXT_VV_ENV(vfmul_vv_d, 8) | ||
207 | RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) | ||
208 | RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) | ||
209 | RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) | ||
210 | -GEN_VEXT_VF(vfmul_vf_h) | ||
211 | -GEN_VEXT_VF(vfmul_vf_w) | ||
212 | -GEN_VEXT_VF(vfmul_vf_d) | ||
213 | +GEN_VEXT_VF(vfmul_vf_h, 2) | ||
214 | +GEN_VEXT_VF(vfmul_vf_w, 4) | ||
215 | +GEN_VEXT_VF(vfmul_vf_d, 8) | ||
216 | |||
217 | RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) | ||
218 | RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) | ||
219 | RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) | ||
220 | -GEN_VEXT_VV_ENV(vfdiv_vv_h) | ||
221 | -GEN_VEXT_VV_ENV(vfdiv_vv_w) | ||
222 | -GEN_VEXT_VV_ENV(vfdiv_vv_d) | ||
223 | +GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) | ||
224 | +GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) | ||
225 | +GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) | ||
226 | RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) | ||
227 | RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) | ||
228 | RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) | ||
229 | -GEN_VEXT_VF(vfdiv_vf_h) | ||
230 | -GEN_VEXT_VF(vfdiv_vf_w) | ||
231 | -GEN_VEXT_VF(vfdiv_vf_d) | ||
232 | +GEN_VEXT_VF(vfdiv_vf_h, 2) | ||
233 | +GEN_VEXT_VF(vfdiv_vf_w, 4) | ||
234 | +GEN_VEXT_VF(vfdiv_vf_d, 8) | ||
235 | |||
236 | static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) | ||
237 | { | ||
238 | @@ -XXX,XX +XXX,XX @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) | ||
239 | RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) | ||
240 | RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) | ||
241 | RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) | ||
242 | -GEN_VEXT_VF(vfrdiv_vf_h) | ||
243 | -GEN_VEXT_VF(vfrdiv_vf_w) | ||
244 | -GEN_VEXT_VF(vfrdiv_vf_d) | ||
245 | +GEN_VEXT_VF(vfrdiv_vf_h, 2) | ||
246 | +GEN_VEXT_VF(vfrdiv_vf_w, 4) | ||
247 | +GEN_VEXT_VF(vfrdiv_vf_d, 8) | ||
248 | |||
249 | /* Vector Widening Floating-Point Multiply */ | ||
250 | static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) | ||
251 | @@ -XXX,XX +XXX,XX @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) | ||
252 | } | ||
253 | RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) | ||
254 | RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) | ||
255 | -GEN_VEXT_VV_ENV(vfwmul_vv_h) | ||
256 | -GEN_VEXT_VV_ENV(vfwmul_vv_w) | ||
257 | +GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) | ||
258 | +GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) | ||
259 | RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) | ||
260 | RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) | ||
261 | -GEN_VEXT_VF(vfwmul_vf_h) | ||
262 | -GEN_VEXT_VF(vfwmul_vf_w) | ||
263 | +GEN_VEXT_VF(vfwmul_vf_h, 4) | ||
264 | +GEN_VEXT_VF(vfwmul_vf_w, 8) | ||
265 | |||
266 | /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ | ||
267 | #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ | ||
268 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
269 | RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) | ||
270 | RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) | ||
271 | RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) | ||
272 | -GEN_VEXT_VV_ENV(vfmacc_vv_h) | ||
273 | -GEN_VEXT_VV_ENV(vfmacc_vv_w) | ||
274 | -GEN_VEXT_VV_ENV(vfmacc_vv_d) | ||
275 | +GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) | ||
276 | +GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) | ||
277 | +GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) | ||
278 | |||
279 | #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ | ||
280 | static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
281 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ | ||
282 | RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) | ||
283 | RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) | ||
284 | RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) | ||
285 | -GEN_VEXT_VF(vfmacc_vf_h) | ||
286 | -GEN_VEXT_VF(vfmacc_vf_w) | ||
287 | -GEN_VEXT_VF(vfmacc_vf_d) | ||
288 | +GEN_VEXT_VF(vfmacc_vf_h, 2) | ||
289 | +GEN_VEXT_VF(vfmacc_vf_w, 4) | ||
290 | +GEN_VEXT_VF(vfmacc_vf_d, 8) | ||
291 | |||
292 | static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
293 | { | ||
294 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
295 | RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) | ||
296 | RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) | ||
297 | RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) | ||
298 | -GEN_VEXT_VV_ENV(vfnmacc_vv_h) | ||
299 | -GEN_VEXT_VV_ENV(vfnmacc_vv_w) | ||
300 | -GEN_VEXT_VV_ENV(vfnmacc_vv_d) | ||
301 | +GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) | ||
302 | +GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) | ||
303 | +GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) | ||
304 | RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) | ||
305 | RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) | ||
306 | RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) | ||
307 | -GEN_VEXT_VF(vfnmacc_vf_h) | ||
308 | -GEN_VEXT_VF(vfnmacc_vf_w) | ||
309 | -GEN_VEXT_VF(vfnmacc_vf_d) | ||
310 | +GEN_VEXT_VF(vfnmacc_vf_h, 2) | ||
311 | +GEN_VEXT_VF(vfnmacc_vf_w, 4) | ||
312 | +GEN_VEXT_VF(vfnmacc_vf_d, 8) | ||
313 | |||
314 | static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
315 | { | ||
316 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
317 | RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) | ||
318 | RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) | ||
319 | RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) | ||
320 | -GEN_VEXT_VV_ENV(vfmsac_vv_h) | ||
321 | -GEN_VEXT_VV_ENV(vfmsac_vv_w) | ||
322 | -GEN_VEXT_VV_ENV(vfmsac_vv_d) | ||
323 | +GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) | ||
324 | +GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) | ||
325 | +GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) | ||
326 | RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) | ||
327 | RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) | ||
328 | RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) | ||
329 | -GEN_VEXT_VF(vfmsac_vf_h) | ||
330 | -GEN_VEXT_VF(vfmsac_vf_w) | ||
331 | -GEN_VEXT_VF(vfmsac_vf_d) | ||
332 | +GEN_VEXT_VF(vfmsac_vf_h, 2) | ||
333 | +GEN_VEXT_VF(vfmsac_vf_w, 4) | ||
334 | +GEN_VEXT_VF(vfmsac_vf_d, 8) | ||
335 | |||
336 | static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
337 | { | ||
338 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
339 | RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) | ||
340 | RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) | ||
341 | RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) | ||
342 | -GEN_VEXT_VV_ENV(vfnmsac_vv_h) | ||
343 | -GEN_VEXT_VV_ENV(vfnmsac_vv_w) | ||
344 | -GEN_VEXT_VV_ENV(vfnmsac_vv_d) | ||
345 | +GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) | ||
346 | +GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) | ||
347 | +GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) | ||
348 | RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) | ||
349 | RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) | ||
350 | RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) | ||
351 | -GEN_VEXT_VF(vfnmsac_vf_h) | ||
352 | -GEN_VEXT_VF(vfnmsac_vf_w) | ||
353 | -GEN_VEXT_VF(vfnmsac_vf_d) | ||
354 | +GEN_VEXT_VF(vfnmsac_vf_h, 2) | ||
355 | +GEN_VEXT_VF(vfnmsac_vf_w, 4) | ||
356 | +GEN_VEXT_VF(vfnmsac_vf_d, 8) | ||
357 | |||
358 | static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
359 | { | ||
360 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
361 | RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) | ||
362 | RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) | ||
363 | RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) | ||
364 | -GEN_VEXT_VV_ENV(vfmadd_vv_h) | ||
365 | -GEN_VEXT_VV_ENV(vfmadd_vv_w) | ||
366 | -GEN_VEXT_VV_ENV(vfmadd_vv_d) | ||
367 | +GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) | ||
368 | +GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) | ||
369 | +GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) | ||
370 | RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) | ||
371 | RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) | ||
372 | RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) | ||
373 | -GEN_VEXT_VF(vfmadd_vf_h) | ||
374 | -GEN_VEXT_VF(vfmadd_vf_w) | ||
375 | -GEN_VEXT_VF(vfmadd_vf_d) | ||
376 | +GEN_VEXT_VF(vfmadd_vf_h, 2) | ||
377 | +GEN_VEXT_VF(vfmadd_vf_w, 4) | ||
378 | +GEN_VEXT_VF(vfmadd_vf_d, 8) | ||
379 | |||
380 | static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
381 | { | ||
382 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
383 | RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) | ||
384 | RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) | ||
385 | RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) | ||
386 | -GEN_VEXT_VV_ENV(vfnmadd_vv_h) | ||
387 | -GEN_VEXT_VV_ENV(vfnmadd_vv_w) | ||
388 | -GEN_VEXT_VV_ENV(vfnmadd_vv_d) | ||
389 | +GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) | ||
390 | +GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) | ||
391 | +GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) | ||
392 | RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) | ||
393 | RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) | ||
394 | RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) | ||
395 | -GEN_VEXT_VF(vfnmadd_vf_h) | ||
396 | -GEN_VEXT_VF(vfnmadd_vf_w) | ||
397 | -GEN_VEXT_VF(vfnmadd_vf_d) | ||
398 | +GEN_VEXT_VF(vfnmadd_vf_h, 2) | ||
399 | +GEN_VEXT_VF(vfnmadd_vf_w, 4) | ||
400 | +GEN_VEXT_VF(vfnmadd_vf_d, 8) | ||
401 | |||
402 | static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
403 | { | ||
404 | @@ -XXX,XX +XXX,XX @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
405 | RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) | ||
406 | RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) | ||
407 | RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) | ||
408 | -GEN_VEXT_VV_ENV(vfmsub_vv_h) | ||
409 | -GEN_VEXT_VV_ENV(vfmsub_vv_w) | ||
410 | -GEN_VEXT_VV_ENV(vfmsub_vv_d) | ||
411 | +GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) | ||
412 | +GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) | ||
413 | +GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) | ||
414 | RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) | ||
415 | RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) | ||
416 | RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) | ||
417 | -GEN_VEXT_VF(vfmsub_vf_h) | ||
418 | -GEN_VEXT_VF(vfmsub_vf_w) | ||
419 | -GEN_VEXT_VF(vfmsub_vf_d) | ||
420 | +GEN_VEXT_VF(vfmsub_vf_h, 2) | ||
421 | +GEN_VEXT_VF(vfmsub_vf_w, 4) | ||
422 | +GEN_VEXT_VF(vfmsub_vf_d, 8) | ||
423 | |||
424 | static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) | ||
425 | { | ||
426 | @@ -XXX,XX +XXX,XX @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) | ||
427 | RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) | ||
428 | RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) | ||
429 | RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) | ||
430 | -GEN_VEXT_VV_ENV(vfnmsub_vv_h) | ||
431 | -GEN_VEXT_VV_ENV(vfnmsub_vv_w) | ||
432 | -GEN_VEXT_VV_ENV(vfnmsub_vv_d) | ||
433 | +GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) | ||
434 | +GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) | ||
435 | +GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) | ||
436 | RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) | ||
437 | RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) | ||
438 | RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) | ||
439 | -GEN_VEXT_VF(vfnmsub_vf_h) | ||
440 | -GEN_VEXT_VF(vfnmsub_vf_w) | ||
441 | -GEN_VEXT_VF(vfnmsub_vf_d) | ||
442 | +GEN_VEXT_VF(vfnmsub_vf_h, 2) | ||
443 | +GEN_VEXT_VF(vfnmsub_vf_w, 4) | ||
444 | +GEN_VEXT_VF(vfnmsub_vf_d, 8) | ||
445 | |||
446 | /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ | ||
447 | static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
448 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
449 | |||
450 | RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) | ||
451 | RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) | ||
452 | -GEN_VEXT_VV_ENV(vfwmacc_vv_h) | ||
453 | -GEN_VEXT_VV_ENV(vfwmacc_vv_w) | ||
454 | +GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) | ||
455 | +GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) | ||
456 | RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) | ||
457 | RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) | ||
458 | -GEN_VEXT_VF(vfwmacc_vf_h) | ||
459 | -GEN_VEXT_VF(vfwmacc_vf_w) | ||
460 | +GEN_VEXT_VF(vfwmacc_vf_h, 4) | ||
461 | +GEN_VEXT_VF(vfwmacc_vf_w, 8) | ||
462 | |||
463 | static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
464 | { | ||
465 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
466 | |||
467 | RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) | ||
468 | RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) | ||
469 | -GEN_VEXT_VV_ENV(vfwnmacc_vv_h) | ||
470 | -GEN_VEXT_VV_ENV(vfwnmacc_vv_w) | ||
471 | +GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) | ||
472 | +GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) | ||
473 | RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) | ||
474 | RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) | ||
475 | -GEN_VEXT_VF(vfwnmacc_vf_h) | ||
476 | -GEN_VEXT_VF(vfwnmacc_vf_w) | ||
477 | +GEN_VEXT_VF(vfwnmacc_vf_h, 4) | ||
478 | +GEN_VEXT_VF(vfwnmacc_vf_w, 8) | ||
479 | |||
480 | static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
481 | { | ||
482 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
483 | |||
484 | RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) | ||
485 | RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) | ||
486 | -GEN_VEXT_VV_ENV(vfwmsac_vv_h) | ||
487 | -GEN_VEXT_VV_ENV(vfwmsac_vv_w) | ||
488 | +GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) | ||
489 | +GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) | ||
490 | RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) | ||
491 | RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) | ||
492 | -GEN_VEXT_VF(vfwmsac_vf_h) | ||
493 | -GEN_VEXT_VF(vfwmsac_vf_w) | ||
494 | +GEN_VEXT_VF(vfwmsac_vf_h, 4) | ||
495 | +GEN_VEXT_VF(vfwmsac_vf_w, 8) | ||
496 | |||
497 | static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) | ||
498 | { | ||
499 | @@ -XXX,XX +XXX,XX @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) | ||
500 | |||
501 | RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) | ||
502 | RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) | ||
503 | -GEN_VEXT_VV_ENV(vfwnmsac_vv_h) | ||
504 | -GEN_VEXT_VV_ENV(vfwnmsac_vv_w) | ||
505 | +GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) | ||
506 | +GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) | ||
507 | RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) | ||
508 | RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) | ||
509 | -GEN_VEXT_VF(vfwnmsac_vf_h) | ||
510 | -GEN_VEXT_VF(vfwnmsac_vf_w) | ||
511 | +GEN_VEXT_VF(vfwnmsac_vf_h, 4) | ||
512 | +GEN_VEXT_VF(vfwnmsac_vf_w, 8) | ||
513 | |||
514 | /* Vector Floating-Point Square-Root Instruction */ | ||
515 | /* (TD, T2, TX2) */ | ||
516 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i, \ | ||
517 | *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ | ||
518 | } | ||
519 | |||
520 | -#define GEN_VEXT_V_ENV(NAME) \ | ||
521 | +#define GEN_VEXT_V_ENV(NAME, ESZ) \ | ||
522 | void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
523 | CPURISCVState *env, uint32_t desc) \ | ||
524 | { \ | ||
525 | uint32_t vm = vext_vm(desc); \ | ||
526 | uint32_t vl = env->vl; \ | ||
527 | + uint32_t total_elems = \ | ||
528 | + vext_get_total_elems(env, desc, ESZ); \ | ||
529 | + uint32_t vta = vext_vta(desc); \ | ||
530 | uint32_t i; \ | ||
531 | \ | ||
532 | if (vl == 0) { \ | ||
533 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
534 | do_##NAME(vd, vs2, i, env); \ | ||
535 | } \ | ||
536 | env->vstart = 0; \ | ||
537 | + vext_set_elems_1s(vd, vta, vl * ESZ, \ | ||
538 | + total_elems * ESZ); \ | ||
539 | } | ||
540 | |||
541 | RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) | ||
542 | RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) | ||
543 | RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) | ||
544 | -GEN_VEXT_V_ENV(vfsqrt_v_h) | ||
545 | -GEN_VEXT_V_ENV(vfsqrt_v_w) | ||
546 | -GEN_VEXT_V_ENV(vfsqrt_v_d) | ||
547 | +GEN_VEXT_V_ENV(vfsqrt_v_h, 2) | ||
548 | +GEN_VEXT_V_ENV(vfsqrt_v_w, 4) | ||
549 | +GEN_VEXT_V_ENV(vfsqrt_v_d, 8) | ||
550 | |||
551 | /* | ||
552 | * Vector Floating-Point Reciprocal Square-Root Estimate Instruction | ||
553 | @@ -XXX,XX +XXX,XX @@ static float64 frsqrt7_d(float64 f, float_status *s) | ||
554 | RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) | ||
555 | RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) | ||
556 | RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) | ||
557 | -GEN_VEXT_V_ENV(vfrsqrt7_v_h) | ||
558 | -GEN_VEXT_V_ENV(vfrsqrt7_v_w) | ||
559 | -GEN_VEXT_V_ENV(vfrsqrt7_v_d) | ||
560 | +GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) | ||
561 | +GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) | ||
562 | +GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) | ||
563 | |||
564 | /* | ||
565 | * Vector Floating-Point Reciprocal Estimate Instruction | ||
566 | @@ -XXX,XX +XXX,XX @@ static float64 frec7_d(float64 f, float_status *s) | ||
567 | RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) | ||
568 | RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) | ||
569 | RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) | ||
570 | -GEN_VEXT_V_ENV(vfrec7_v_h) | ||
571 | -GEN_VEXT_V_ENV(vfrec7_v_w) | ||
572 | -GEN_VEXT_V_ENV(vfrec7_v_d) | ||
573 | +GEN_VEXT_V_ENV(vfrec7_v_h, 2) | ||
574 | +GEN_VEXT_V_ENV(vfrec7_v_w, 4) | ||
575 | +GEN_VEXT_V_ENV(vfrec7_v_d, 8) | ||
576 | |||
577 | /* Vector Floating-Point MIN/MAX Instructions */ | ||
578 | RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) | ||
579 | RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) | ||
580 | RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) | ||
581 | -GEN_VEXT_VV_ENV(vfmin_vv_h) | ||
582 | -GEN_VEXT_VV_ENV(vfmin_vv_w) | ||
583 | -GEN_VEXT_VV_ENV(vfmin_vv_d) | ||
584 | +GEN_VEXT_VV_ENV(vfmin_vv_h, 2) | ||
585 | +GEN_VEXT_VV_ENV(vfmin_vv_w, 4) | ||
586 | +GEN_VEXT_VV_ENV(vfmin_vv_d, 8) | ||
587 | RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) | ||
588 | RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) | ||
589 | RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) | ||
590 | -GEN_VEXT_VF(vfmin_vf_h) | ||
591 | -GEN_VEXT_VF(vfmin_vf_w) | ||
592 | -GEN_VEXT_VF(vfmin_vf_d) | ||
593 | +GEN_VEXT_VF(vfmin_vf_h, 2) | ||
594 | +GEN_VEXT_VF(vfmin_vf_w, 4) | ||
595 | +GEN_VEXT_VF(vfmin_vf_d, 8) | ||
596 | |||
597 | RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) | ||
598 | RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) | ||
599 | RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) | ||
600 | -GEN_VEXT_VV_ENV(vfmax_vv_h) | ||
601 | -GEN_VEXT_VV_ENV(vfmax_vv_w) | ||
602 | -GEN_VEXT_VV_ENV(vfmax_vv_d) | ||
603 | +GEN_VEXT_VV_ENV(vfmax_vv_h, 2) | ||
604 | +GEN_VEXT_VV_ENV(vfmax_vv_w, 4) | ||
605 | +GEN_VEXT_VV_ENV(vfmax_vv_d, 8) | ||
606 | RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) | ||
607 | RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) | ||
608 | RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) | ||
609 | -GEN_VEXT_VF(vfmax_vf_h) | ||
610 | -GEN_VEXT_VF(vfmax_vf_w) | ||
611 | -GEN_VEXT_VF(vfmax_vf_d) | ||
612 | +GEN_VEXT_VF(vfmax_vf_h, 2) | ||
613 | +GEN_VEXT_VF(vfmax_vf_w, 4) | ||
614 | +GEN_VEXT_VF(vfmax_vf_d, 8) | ||
615 | |||
616 | /* Vector Floating-Point Sign-Injection Instructions */ | ||
617 | static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) | ||
618 | @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) | ||
619 | RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) | ||
620 | RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) | ||
621 | RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) | ||
622 | -GEN_VEXT_VV_ENV(vfsgnj_vv_h) | ||
623 | -GEN_VEXT_VV_ENV(vfsgnj_vv_w) | ||
624 | -GEN_VEXT_VV_ENV(vfsgnj_vv_d) | ||
625 | +GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) | ||
626 | +GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) | ||
627 | +GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) | ||
628 | RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) | ||
629 | RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) | ||
630 | RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) | ||
631 | -GEN_VEXT_VF(vfsgnj_vf_h) | ||
632 | -GEN_VEXT_VF(vfsgnj_vf_w) | ||
633 | -GEN_VEXT_VF(vfsgnj_vf_d) | ||
634 | +GEN_VEXT_VF(vfsgnj_vf_h, 2) | ||
635 | +GEN_VEXT_VF(vfsgnj_vf_w, 4) | ||
636 | +GEN_VEXT_VF(vfsgnj_vf_d, 8) | ||
637 | |||
638 | static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) | ||
639 | { | ||
640 | @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) | ||
641 | RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) | ||
642 | RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) | ||
643 | RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) | ||
644 | -GEN_VEXT_VV_ENV(vfsgnjn_vv_h) | ||
645 | -GEN_VEXT_VV_ENV(vfsgnjn_vv_w) | ||
646 | -GEN_VEXT_VV_ENV(vfsgnjn_vv_d) | ||
647 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) | ||
648 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) | ||
649 | +GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) | ||
650 | RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) | ||
651 | RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) | ||
652 | RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) | ||
653 | -GEN_VEXT_VF(vfsgnjn_vf_h) | ||
654 | -GEN_VEXT_VF(vfsgnjn_vf_w) | ||
655 | -GEN_VEXT_VF(vfsgnjn_vf_d) | ||
656 | +GEN_VEXT_VF(vfsgnjn_vf_h, 2) | ||
657 | +GEN_VEXT_VF(vfsgnjn_vf_w, 4) | ||
658 | +GEN_VEXT_VF(vfsgnjn_vf_d, 8) | ||
659 | |||
660 | static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) | ||
661 | { | ||
662 | @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) | ||
663 | RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) | ||
664 | RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) | ||
665 | RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) | ||
666 | -GEN_VEXT_VV_ENV(vfsgnjx_vv_h) | ||
667 | -GEN_VEXT_VV_ENV(vfsgnjx_vv_w) | ||
668 | -GEN_VEXT_VV_ENV(vfsgnjx_vv_d) | ||
669 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) | ||
670 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) | ||
671 | +GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) | ||
672 | RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) | ||
673 | RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) | ||
674 | RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) | ||
675 | -GEN_VEXT_VF(vfsgnjx_vf_h) | ||
676 | -GEN_VEXT_VF(vfsgnjx_vf_w) | ||
677 | -GEN_VEXT_VF(vfsgnjx_vf_d) | ||
678 | +GEN_VEXT_VF(vfsgnjx_vf_h, 2) | ||
679 | +GEN_VEXT_VF(vfsgnjx_vf_w, 4) | ||
680 | +GEN_VEXT_VF(vfsgnjx_vf_d, 8) | ||
681 | |||
682 | /* Vector Floating-Point Compare Instructions */ | ||
683 | #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ | ||
684 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
685 | { \ | ||
686 | uint32_t vm = vext_vm(desc); \ | ||
687 | uint32_t vl = env->vl; \ | ||
688 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ | ||
689 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ | ||
690 | uint32_t i; \ | ||
691 | \ | ||
692 | for (i = env->vstart; i < vl; i++) { \ | ||
693 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
694 | DO_OP(s2, s1, &env->fp_status)); \ | ||
695 | } \ | ||
696 | env->vstart = 0; \ | ||
697 | + /* mask destination register are always tail-agnostic */ \ | ||
698 | + /* set tail elements to 1s */ \ | ||
699 | + if (vta_all_1s) { \ | ||
700 | + for (; i < total_elems; i++) { \ | ||
701 | + vext_set_elem_mask(vd, i, 1); \ | ||
702 | + } \ | ||
703 | + } \ | ||
704 | } | ||
705 | |||
706 | GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) | ||
707 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
708 | { \ | ||
709 | uint32_t vm = vext_vm(desc); \ | ||
710 | uint32_t vl = env->vl; \ | ||
711 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ | ||
712 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ | ||
713 | uint32_t i; \ | ||
714 | \ | ||
715 | for (i = env->vstart; i < vl; i++) { \ | ||
716 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
717 | DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ | ||
718 | } \ | ||
719 | env->vstart = 0; \ | ||
720 | + /* mask destination register are always tail-agnostic */ \ | ||
721 | + /* set tail elements to 1s */ \ | ||
722 | + if (vta_all_1s) { \ | ||
723 | + for (; i < total_elems; i++) { \ | ||
724 | + vext_set_elem_mask(vd, i, 1); \ | ||
725 | + } \ | ||
726 | + } \ | ||
727 | } | ||
728 | |||
729 | GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) | ||
730 | @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i) \ | ||
731 | *((TD *)vd + HD(i)) = OP(s2); \ | ||
732 | } | ||
733 | |||
734 | -#define GEN_VEXT_V(NAME) \ | ||
735 | +#define GEN_VEXT_V(NAME, ESZ) \ | ||
736 | void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
737 | CPURISCVState *env, uint32_t desc) \ | ||
738 | { \ | ||
739 | uint32_t vm = vext_vm(desc); \ | ||
740 | uint32_t vl = env->vl; \ | ||
741 | + uint32_t total_elems = \ | ||
742 | + vext_get_total_elems(env, desc, ESZ); \ | ||
743 | + uint32_t vta = vext_vta(desc); \ | ||
744 | uint32_t i; \ | ||
745 | \ | ||
746 | for (i = env->vstart; i < vl; i++) { \ | ||
747 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
748 | do_##NAME(vd, vs2, i); \ | ||
749 | } \ | ||
750 | env->vstart = 0; \ | ||
751 | + /* set tail elements to 1s */ \ | ||
752 | + vext_set_elems_1s(vd, vta, vl * ESZ, \ | ||
753 | + total_elems * ESZ); \ | ||
754 | } | ||
755 | |||
756 | target_ulong fclass_h(uint64_t frs1) | ||
757 | @@ -XXX,XX +XXX,XX @@ target_ulong fclass_d(uint64_t frs1) | ||
758 | RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) | ||
759 | RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) | ||
760 | RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) | ||
761 | -GEN_VEXT_V(vfclass_v_h) | ||
762 | -GEN_VEXT_V(vfclass_v_w) | ||
763 | -GEN_VEXT_V(vfclass_v_d) | ||
764 | +GEN_VEXT_V(vfclass_v_h, 2) | ||
765 | +GEN_VEXT_V(vfclass_v_w, 4) | ||
766 | +GEN_VEXT_V(vfclass_v_d, 8) | ||
767 | |||
768 | /* Vector Floating-Point Merge Instruction */ | ||
769 | + | ||
770 | #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ | ||
771 | void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
772 | CPURISCVState *env, uint32_t desc) \ | ||
773 | { \ | ||
774 | uint32_t vm = vext_vm(desc); \ | ||
775 | uint32_t vl = env->vl; \ | ||
776 | + uint32_t esz = sizeof(ETYPE); \ | ||
777 | + uint32_t total_elems = \ | ||
778 | + vext_get_total_elems(env, desc, esz); \ | ||
779 | + uint32_t vta = vext_vta(desc); \ | ||
780 | uint32_t i; \ | ||
781 | \ | ||
782 | for (i = env->vstart; i < vl; i++) { \ | ||
783 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ | ||
784 | = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ | ||
785 | } \ | ||
786 | env->vstart = 0; \ | ||
787 | + /* set tail elements to 1s */ \ | ||
788 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
789 | } | ||
790 | |||
791 | GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) | ||
792 | @@ -XXX,XX +XXX,XX @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) | ||
793 | RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) | ||
794 | RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) | ||
795 | RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) | ||
796 | -GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) | ||
797 | -GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) | ||
798 | -GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) | ||
799 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) | ||
800 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) | ||
801 | +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) | ||
802 | |||
803 | /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ | ||
804 | RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) | ||
805 | RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) | ||
806 | RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) | ||
807 | -GEN_VEXT_V_ENV(vfcvt_x_f_v_h) | ||
808 | -GEN_VEXT_V_ENV(vfcvt_x_f_v_w) | ||
809 | -GEN_VEXT_V_ENV(vfcvt_x_f_v_d) | ||
810 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) | ||
811 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) | ||
812 | +GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) | ||
813 | |||
814 | /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ | ||
815 | RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) | ||
816 | RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) | ||
817 | RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) | ||
818 | -GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) | ||
819 | -GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) | ||
820 | -GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) | ||
821 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) | ||
822 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) | ||
823 | +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) | ||
824 | |||
825 | /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ | ||
826 | RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) | ||
827 | RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) | ||
828 | RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) | ||
829 | -GEN_VEXT_V_ENV(vfcvt_f_x_v_h) | ||
830 | -GEN_VEXT_V_ENV(vfcvt_f_x_v_w) | ||
831 | -GEN_VEXT_V_ENV(vfcvt_f_x_v_d) | ||
832 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) | ||
833 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) | ||
834 | +GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) | ||
835 | |||
836 | /* Widening Floating-Point/Integer Type-Convert Instructions */ | ||
837 | /* (TD, T2, TX2) */ | ||
838 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d) | ||
839 | /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ | ||
840 | RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) | ||
841 | RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) | ||
842 | -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) | ||
843 | -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) | ||
844 | +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) | ||
845 | +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) | ||
846 | |||
847 | /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ | ||
848 | RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) | ||
849 | RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) | ||
850 | -GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) | ||
851 | -GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) | ||
852 | +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) | ||
853 | +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) | ||
854 | |||
855 | /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ | ||
856 | RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) | ||
857 | RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) | ||
858 | RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) | ||
859 | -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) | ||
860 | -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) | ||
861 | -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) | ||
862 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) | ||
863 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) | ||
864 | +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) | ||
865 | |||
866 | /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ | ||
867 | RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) | ||
868 | RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) | ||
869 | RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) | ||
870 | -GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) | ||
871 | -GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) | ||
872 | -GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) | ||
873 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) | ||
874 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) | ||
875 | +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) | ||
876 | |||
877 | /* | ||
878 | * vfwcvt.f.f.v vd, vs2, vm | ||
879 | @@ -XXX,XX +XXX,XX @@ static uint32_t vfwcvtffv16(uint16_t a, float_status *s) | ||
880 | |||
881 | RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) | ||
882 | RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) | ||
883 | -GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) | ||
884 | -GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) | ||
885 | +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) | ||
886 | +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) | ||
887 | |||
888 | /* Narrowing Floating-Point/Integer Type-Convert Instructions */ | ||
889 | /* (TD, T2, TX2) */ | ||
890 | @@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) | ||
891 | RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) | ||
892 | RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) | ||
893 | RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) | ||
894 | -GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) | ||
895 | -GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) | ||
896 | -GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) | ||
897 | +GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) | ||
898 | +GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) | ||
899 | +GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) | ||
900 | |||
901 | /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ | ||
902 | RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) | ||
903 | RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) | ||
904 | RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) | ||
905 | -GEN_VEXT_V_ENV(vfncvt_x_f_w_b) | ||
906 | -GEN_VEXT_V_ENV(vfncvt_x_f_w_h) | ||
907 | -GEN_VEXT_V_ENV(vfncvt_x_f_w_w) | ||
908 | +GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) | ||
909 | +GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) | ||
910 | +GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) | ||
911 | |||
912 | /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ | ||
913 | RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) | ||
914 | RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) | ||
915 | -GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) | ||
916 | -GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) | ||
917 | +GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) | ||
918 | +GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) | ||
919 | |||
920 | /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ | ||
921 | RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) | ||
922 | RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) | ||
923 | -GEN_VEXT_V_ENV(vfncvt_f_x_w_h) | ||
924 | -GEN_VEXT_V_ENV(vfncvt_f_x_w_w) | ||
925 | +GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) | ||
926 | +GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) | ||
927 | |||
928 | /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ | ||
929 | static uint16_t vfncvtffv16(uint32_t a, float_status *s) | ||
930 | @@ -XXX,XX +XXX,XX @@ static uint16_t vfncvtffv16(uint32_t a, float_status *s) | ||
931 | |||
932 | RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) | ||
933 | RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) | ||
934 | -GEN_VEXT_V_ENV(vfncvt_f_f_w_h) | ||
935 | -GEN_VEXT_V_ENV(vfncvt_f_f_w_w) | ||
936 | +GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) | ||
937 | +GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) | ||
938 | |||
939 | /* | ||
940 | *** Vector Reduction Operations | ||
941 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
942 | index XXXXXXX..XXXXXXX 100644 | ||
943 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
944 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
945 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
946 | \ | ||
947 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
948 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
949 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
950 | + data = \ | ||
951 | + FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ | ||
952 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
953 | vreg_ofs(s, a->rs1), \ | ||
954 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
955 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
956 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
957 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
958 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
959 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
960 | + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, \ | ||
961 | + s->cfg_vta_all_1s); \ | ||
962 | return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ | ||
963 | fns[s->sew - 1], s); \ | ||
964 | } \ | ||
965 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
966 | \ | ||
967 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
968 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
969 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
970 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
971 | vreg_ofs(s, a->rs1), \ | ||
972 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
973 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
974 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
975 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
976 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
977 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
978 | return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ | ||
979 | fns[s->sew - 1], s); \ | ||
980 | } \ | ||
981 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
982 | \ | ||
983 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
984 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
985 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
986 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
987 | vreg_ofs(s, a->rs1), \ | ||
988 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
989 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ | ||
990 | gen_set_rm(s, RISCV_FRM_DYN); \ | ||
991 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
992 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
993 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
994 | return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ | ||
995 | fns[s->sew - 1], s); \ | ||
996 | } \ | ||
997 | @@ -XXX,XX +XXX,XX @@ static bool do_opfv(DisasContext *s, arg_rmr *a, | ||
998 | |||
999 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
1000 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
1001 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
1002 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
1003 | vreg_ofs(s, a->rs2), cpu_env, | ||
1004 | s->cfg_ptr->vlen / 8, | ||
1005 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
1006 | \ | ||
1007 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
1008 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
1009 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
1010 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
1011 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
1012 | s->cfg_ptr->vlen / 8, \ | ||
1013 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
1014 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
1015 | \ | ||
1016 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
1017 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
1018 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
1019 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
1020 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
1021 | s->cfg_ptr->vlen / 8, \ | ||
1022 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
1023 | \ | ||
1024 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
1025 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
1026 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
1027 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
1028 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
1029 | s->cfg_ptr->vlen / 8, \ | ||
1030 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
1031 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
1032 | \ | ||
1033 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
1034 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
1035 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ | ||
1036 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
1037 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
1038 | s->cfg_ptr->vlen / 8, \ | ||
1039 | -- | ||
1040 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: eopXD <yueh.ting.chen@gmail.com> | ||
1 | 2 | ||
3 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
4 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
5 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
6 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
7 | Message-Id: <165449614532.19704.7000832880482980398-13@git.sr.ht> | ||
8 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
9 | --- | ||
10 | target/riscv/vector_helper.c | 20 ++++++++++++++++++++ | ||
11 | 1 file changed, 20 insertions(+) | ||
12 | |||
13 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/riscv/vector_helper.c | ||
16 | +++ b/target/riscv/vector_helper.c | ||
17 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
18 | { \ | ||
19 | uint32_t vm = vext_vm(desc); \ | ||
20 | uint32_t vl = env->vl; \ | ||
21 | + uint32_t esz = sizeof(TD); \ | ||
22 | + uint32_t vlenb = simd_maxsz(desc); \ | ||
23 | + uint32_t vta = vext_vta(desc); \ | ||
24 | uint32_t i; \ | ||
25 | TD s1 = *((TD *)vs1 + HD(0)); \ | ||
26 | \ | ||
27 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
28 | } \ | ||
29 | *((TD *)vd + HD(0)) = s1; \ | ||
30 | env->vstart = 0; \ | ||
31 | + /* set tail elements to 1s */ \ | ||
32 | + vext_set_elems_1s(vd, vta, esz, vlenb); \ | ||
33 | } | ||
34 | |||
35 | /* vd[0] = sum(vs1[0], vs2[*]) */ | ||
36 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
37 | { \ | ||
38 | uint32_t vm = vext_vm(desc); \ | ||
39 | uint32_t vl = env->vl; \ | ||
40 | + uint32_t esz = sizeof(TD); \ | ||
41 | + uint32_t vlenb = simd_maxsz(desc); \ | ||
42 | + uint32_t vta = vext_vta(desc); \ | ||
43 | uint32_t i; \ | ||
44 | TD s1 = *((TD *)vs1 + HD(0)); \ | ||
45 | \ | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ | ||
47 | } \ | ||
48 | *((TD *)vd + HD(0)) = s1; \ | ||
49 | env->vstart = 0; \ | ||
50 | + /* set tail elements to 1s */ \ | ||
51 | + vext_set_elems_1s(vd, vta, esz, vlenb); \ | ||
52 | } | ||
53 | |||
54 | /* Unordered sum */ | ||
55 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, | ||
56 | { | ||
57 | uint32_t vm = vext_vm(desc); | ||
58 | uint32_t vl = env->vl; | ||
59 | + uint32_t esz = sizeof(uint32_t); | ||
60 | + uint32_t vlenb = simd_maxsz(desc); | ||
61 | + uint32_t vta = vext_vta(desc); | ||
62 | uint32_t i; | ||
63 | uint32_t s1 = *((uint32_t *)vs1 + H4(0)); | ||
64 | |||
65 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, | ||
66 | } | ||
67 | *((uint32_t *)vd + H4(0)) = s1; | ||
68 | env->vstart = 0; | ||
69 | + /* set tail elements to 1s */ | ||
70 | + vext_set_elems_1s(vd, vta, esz, vlenb); | ||
71 | } | ||
72 | |||
73 | void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, | ||
74 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, | ||
75 | { | ||
76 | uint32_t vm = vext_vm(desc); | ||
77 | uint32_t vl = env->vl; | ||
78 | + uint32_t esz = sizeof(uint64_t); | ||
79 | + uint32_t vlenb = simd_maxsz(desc); | ||
80 | + uint32_t vta = vext_vta(desc); | ||
81 | uint32_t i; | ||
82 | uint64_t s1 = *((uint64_t *)vs1); | ||
83 | |||
84 | @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, | ||
85 | } | ||
86 | *((uint64_t *)vd) = s1; | ||
87 | env->vstart = 0; | ||
88 | + /* set tail elements to 1s */ | ||
89 | + vext_set_elems_1s(vd, vta, esz, vlenb); | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | -- | ||
94 | 2.36.1 | diff view generated by jsdifflib |
1 | From: Thomas Huth <thuth@redhat.com> | 1 | From: eopXD <yueh.ting.chen@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Configuring a drive with "if=none" is meant for creation of a backend | 3 | The tail elements in the destination mask register are updated under |
4 | only, it should not get automatically assigned to a device frontend. | 4 | a tail-agnostic policy. |
5 | Use "if=pflash" for the One-Time-Programmable device instead (like | ||
6 | it is e.g. also done for the efuse device in hw/arm/xlnx-zcu102.c). | ||
7 | 5 | ||
8 | Since the old way of configuring the device has already been published | 6 | Signed-off-by: eop Chen <eop.chen@sifive.com> |
9 | with the previous QEMU versions, we cannot remove this immediately, but | 7 | Reviewed-by: Frank Chang <frank.chang@sifive.com> |
10 | have to deprecate it and support it for at least two more releases. | 8 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> |
11 | 9 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | |
12 | Signed-off-by: Thomas Huth <thuth@redhat.com> | 10 | Message-Id: <165449614532.19704.7000832880482980398-14@git.sr.ht> |
13 | Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
14 | Reviewed-by: Markus Armbruster <armbru@redhat.com> | ||
15 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
16 | Message-id: 20211119102549.217755-1-thuth@redhat.com | ||
17 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | 11 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> |
18 | --- | 12 | --- |
19 | docs/about/deprecated.rst | 6 ++++++ | 13 | target/riscv/vector_helper.c | 30 +++++++++++++++++++++++++ |
20 | hw/misc/sifive_u_otp.c | 9 ++++++++- | 14 | target/riscv/insn_trans/trans_rvv.c.inc | 6 +++++ |
21 | 2 files changed, 14 insertions(+), 1 deletion(-) | 15 | 2 files changed, 36 insertions(+) |
22 | 16 | ||
23 | diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst | 17 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c |
24 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/docs/about/deprecated.rst | 19 | --- a/target/riscv/vector_helper.c |
26 | +++ b/docs/about/deprecated.rst | 20 | +++ b/target/riscv/vector_helper.c |
27 | @@ -XXX,XX +XXX,XX @@ as short-form boolean values, and passed to plugins as ``arg_name=on``. | 21 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ |
28 | However, short-form booleans are deprecated and full explicit ``arg_name=on`` | 22 | uint32_t desc) \ |
29 | form is preferred. | 23 | { \ |
30 | 24 | uint32_t vl = env->vl; \ | |
31 | +``-drive if=none`` for the sifive_u OTP device (since 6.2) | 25 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ |
32 | +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''' | 26 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ |
33 | + | 27 | uint32_t i; \ |
34 | +Using ``-drive if=none`` to configure the OTP device of the sifive_u | 28 | int a, b; \ |
35 | +RISC-V machine is deprecated. Use ``-drive if=pflash`` instead. | 29 | \ |
36 | + | 30 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ |
37 | 31 | vext_set_elem_mask(vd, i, OP(b, a)); \ | |
38 | QEMU Machine Protocol (QMP) commands | 32 | } \ |
39 | ------------------------------------ | 33 | env->vstart = 0; \ |
40 | diff --git a/hw/misc/sifive_u_otp.c b/hw/misc/sifive_u_otp.c | 34 | + /* mask destination register are always tail- \ |
41 | index XXXXXXX..XXXXXXX 100644 | 35 | + * agnostic \ |
42 | --- a/hw/misc/sifive_u_otp.c | 36 | + */ \ |
43 | +++ b/hw/misc/sifive_u_otp.c | 37 | + /* set tail elements to 1s */ \ |
44 | @@ -XXX,XX +XXX,XX @@ static void sifive_u_otp_realize(DeviceState *dev, Error **errp) | 38 | + if (vta_all_1s) { \ |
45 | TYPE_SIFIVE_U_OTP, SIFIVE_U_OTP_REG_SIZE); | 39 | + for (; i < total_elems; i++) { \ |
46 | sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio); | 40 | + vext_set_elem_mask(vd, i, 1); \ |
47 | 41 | + } \ | |
48 | - dinfo = drive_get_next(IF_NONE); | 42 | + } \ |
49 | + dinfo = drive_get_next(IF_PFLASH); | 43 | } |
50 | + if (!dinfo) { | 44 | |
51 | + dinfo = drive_get_next(IF_NONE); | 45 | #define DO_NAND(N, M) (!(N & M)) |
52 | + if (dinfo) { | 46 | @@ -XXX,XX +XXX,XX @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, |
53 | + warn_report("using \"-drive if=none\" for the OTP is deprecated, " | 47 | { |
54 | + "use \"-drive if=pflash\" instead."); | 48 | uint32_t vm = vext_vm(desc); |
49 | uint32_t vl = env->vl; | ||
50 | + uint32_t total_elems = env_archcpu(env)->cfg.vlen; | ||
51 | + uint32_t vta_all_1s = vext_vta_all_1s(desc); | ||
52 | int i; | ||
53 | bool first_mask_bit = false; | ||
54 | |||
55 | @@ -XXX,XX +XXX,XX @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, | ||
56 | } | ||
57 | } | ||
58 | env->vstart = 0; | ||
59 | + /* mask destination register are always tail-agnostic */ | ||
60 | + /* set tail elements to 1s */ | ||
61 | + if (vta_all_1s) { | ||
62 | + for (; i < total_elems; i++) { | ||
63 | + vext_set_elem_mask(vd, i, 1); | ||
55 | + } | 64 | + } |
56 | + } | 65 | + } |
57 | if (dinfo) { | 66 | } |
58 | int ret; | 67 | |
59 | uint64_t perm; | 68 | void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, |
69 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ | ||
70 | { \ | ||
71 | uint32_t vm = vext_vm(desc); \ | ||
72 | uint32_t vl = env->vl; \ | ||
73 | + uint32_t esz = sizeof(ETYPE); \ | ||
74 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
75 | + uint32_t vta = vext_vta(desc); \ | ||
76 | uint32_t sum = 0; \ | ||
77 | int i; \ | ||
78 | \ | ||
79 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ | ||
80 | } \ | ||
81 | } \ | ||
82 | env->vstart = 0; \ | ||
83 | + /* set tail elements to 1s */ \ | ||
84 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
85 | } | ||
86 | |||
87 | GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) | ||
88 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ | ||
89 | { \ | ||
90 | uint32_t vm = vext_vm(desc); \ | ||
91 | uint32_t vl = env->vl; \ | ||
92 | + uint32_t esz = sizeof(ETYPE); \ | ||
93 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
94 | + uint32_t vta = vext_vta(desc); \ | ||
95 | int i; \ | ||
96 | \ | ||
97 | for (i = env->vstart; i < vl; i++) { \ | ||
98 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ | ||
99 | *((ETYPE *)vd + H(i)) = i; \ | ||
100 | } \ | ||
101 | env->vstart = 0; \ | ||
102 | + /* set tail elements to 1s */ \ | ||
103 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
104 | } | ||
105 | |||
106 | GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) | ||
107 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
108 | index XXXXXXX..XXXXXXX 100644 | ||
109 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
110 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
111 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \ | ||
112 | tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ | ||
113 | \ | ||
114 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
115 | + data = \ | ||
116 | + FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ | ||
117 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ | ||
118 | vreg_ofs(s, a->rs1), \ | ||
119 | vreg_ofs(s, a->rs2), cpu_env, \ | ||
120 | @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ | ||
121 | \ | ||
122 | data = FIELD_DP32(data, VDATA, VM, a->vm); \ | ||
123 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ | ||
124 | + data = \ | ||
125 | + FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ | ||
126 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \ | ||
127 | vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \ | ||
128 | cpu_env, s->cfg_ptr->vlen / 8, \ | ||
129 | @@ -XXX,XX +XXX,XX @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a) | ||
130 | |||
131 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
132 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
133 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
134 | static gen_helper_gvec_3_ptr * const fns[4] = { | ||
135 | gen_helper_viota_m_b, gen_helper_viota_m_h, | ||
136 | gen_helper_viota_m_w, gen_helper_viota_m_d, | ||
137 | @@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) | ||
138 | |||
139 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
140 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
141 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
142 | static gen_helper_gvec_2_ptr * const fns[4] = { | ||
143 | gen_helper_vid_v_b, gen_helper_vid_v_h, | ||
144 | gen_helper_vid_v_w, gen_helper_vid_v_d, | ||
60 | -- | 145 | -- |
61 | 2.31.1 | 146 | 2.36.1 |
62 | |||
63 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: eopXD <yueh.ting.chen@gmail.com> | ||
1 | 2 | ||
3 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
4 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
5 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
6 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
7 | Message-Id: <165449614532.19704.7000832880482980398-15@git.sr.ht> | ||
8 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
9 | --- | ||
10 | target/riscv/vector_helper.c | 40 +++++++++++++++++++++++++ | ||
11 | target/riscv/insn_trans/trans_rvv.c.inc | 7 +++-- | ||
12 | 2 files changed, 45 insertions(+), 2 deletions(-) | ||
13 | |||
14 | diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/riscv/vector_helper.c | ||
17 | +++ b/target/riscv/vector_helper.c | ||
18 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
19 | { \ | ||
20 | uint32_t vm = vext_vm(desc); \ | ||
21 | uint32_t vl = env->vl; \ | ||
22 | + uint32_t esz = sizeof(ETYPE); \ | ||
23 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
24 | + uint32_t vta = vext_vta(desc); \ | ||
25 | target_ulong offset = s1, i_min, i; \ | ||
26 | \ | ||
27 | i_min = MAX(env->vstart, offset); \ | ||
28 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
29 | } \ | ||
30 | *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ | ||
31 | } \ | ||
32 | + /* set tail elements to 1s */ \ | ||
33 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
34 | } | ||
35 | |||
36 | /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ | ||
37 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
38 | uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ | ||
39 | uint32_t vm = vext_vm(desc); \ | ||
40 | uint32_t vl = env->vl; \ | ||
41 | + uint32_t esz = sizeof(ETYPE); \ | ||
42 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
43 | + uint32_t vta = vext_vta(desc); \ | ||
44 | target_ulong i_max, i; \ | ||
45 | \ | ||
46 | i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ | ||
47 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
48 | } \ | ||
49 | \ | ||
50 | env->vstart = 0; \ | ||
51 | + /* set tail elements to 1s */ \ | ||
52 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
53 | } | ||
54 | |||
55 | /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ | ||
56 | @@ -XXX,XX +XXX,XX @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ | ||
57 | typedef uint##BITWIDTH##_t ETYPE; \ | ||
58 | uint32_t vm = vext_vm(desc); \ | ||
59 | uint32_t vl = env->vl; \ | ||
60 | + uint32_t esz = sizeof(ETYPE); \ | ||
61 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
62 | + uint32_t vta = vext_vta(desc); \ | ||
63 | uint32_t i; \ | ||
64 | \ | ||
65 | for (i = env->vstart; i < vl; i++) { \ | ||
66 | @@ -XXX,XX +XXX,XX @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ | ||
67 | } \ | ||
68 | } \ | ||
69 | env->vstart = 0; \ | ||
70 | + /* set tail elements to 1s */ \ | ||
71 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
72 | } | ||
73 | |||
74 | GEN_VEXT_VSLIE1UP(8, H1) | ||
75 | @@ -XXX,XX +XXX,XX @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ | ||
76 | typedef uint##BITWIDTH##_t ETYPE; \ | ||
77 | uint32_t vm = vext_vm(desc); \ | ||
78 | uint32_t vl = env->vl; \ | ||
79 | + uint32_t esz = sizeof(ETYPE); \ | ||
80 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
81 | + uint32_t vta = vext_vta(desc); \ | ||
82 | uint32_t i; \ | ||
83 | \ | ||
84 | for (i = env->vstart; i < vl; i++) { \ | ||
85 | @@ -XXX,XX +XXX,XX @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ | ||
86 | } \ | ||
87 | } \ | ||
88 | env->vstart = 0; \ | ||
89 | + /* set tail elements to 1s */ \ | ||
90 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
91 | } | ||
92 | |||
93 | GEN_VEXT_VSLIDE1DOWN(8, H1) | ||
94 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
95 | uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ | ||
96 | uint32_t vm = vext_vm(desc); \ | ||
97 | uint32_t vl = env->vl; \ | ||
98 | + uint32_t esz = sizeof(TS2); \ | ||
99 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
100 | + uint32_t vta = vext_vta(desc); \ | ||
101 | uint64_t index; \ | ||
102 | uint32_t i; \ | ||
103 | \ | ||
104 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
105 | } \ | ||
106 | } \ | ||
107 | env->vstart = 0; \ | ||
108 | + /* set tail elements to 1s */ \ | ||
109 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
110 | } | ||
111 | |||
112 | /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ | ||
113 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
114 | uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ | ||
115 | uint32_t vm = vext_vm(desc); \ | ||
116 | uint32_t vl = env->vl; \ | ||
117 | + uint32_t esz = sizeof(ETYPE); \ | ||
118 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
119 | + uint32_t vta = vext_vta(desc); \ | ||
120 | uint64_t index = s1; \ | ||
121 | uint32_t i; \ | ||
122 | \ | ||
123 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ | ||
124 | } \ | ||
125 | } \ | ||
126 | env->vstart = 0; \ | ||
127 | + /* set tail elements to 1s */ \ | ||
128 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
129 | } | ||
130 | |||
131 | /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ | ||
132 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
133 | CPURISCVState *env, uint32_t desc) \ | ||
134 | { \ | ||
135 | uint32_t vl = env->vl; \ | ||
136 | + uint32_t esz = sizeof(ETYPE); \ | ||
137 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
138 | + uint32_t vta = vext_vta(desc); \ | ||
139 | uint32_t num = 0, i; \ | ||
140 | \ | ||
141 | for (i = env->vstart; i < vl; i++) { \ | ||
142 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ | ||
143 | num++; \ | ||
144 | } \ | ||
145 | env->vstart = 0; \ | ||
146 | + /* set tail elements to 1s */ \ | ||
147 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
148 | } | ||
149 | |||
150 | /* Compress into vd elements of vs2 where vs1 is enabled */ | ||
151 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
152 | { \ | ||
153 | uint32_t vl = env->vl; \ | ||
154 | uint32_t vm = vext_vm(desc); \ | ||
155 | + uint32_t esz = sizeof(ETYPE); \ | ||
156 | + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ | ||
157 | + uint32_t vta = vext_vta(desc); \ | ||
158 | uint32_t i; \ | ||
159 | \ | ||
160 | for (i = env->vstart; i < vl; i++) { \ | ||
161 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ | ||
162 | *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ | ||
163 | } \ | ||
164 | env->vstart = 0; \ | ||
165 | + /* set tail elements to 1s */ \ | ||
166 | + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ | ||
167 | } | ||
168 | |||
169 | GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) | ||
170 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
171 | index XXXXXXX..XXXXXXX 100644 | ||
172 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
173 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
174 | @@ -XXX,XX +XXX,XX @@ static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a) | ||
175 | return false; | ||
176 | } | ||
177 | |||
178 | - if (a->vm && s->vl_eq_vlmax) { | ||
179 | + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
180 | int scale = s->lmul - (s->sew + 3); | ||
181 | int vlmax = s->cfg_ptr->vlen >> -scale; | ||
182 | TCGv_i64 dest = tcg_temp_new_i64(); | ||
183 | @@ -XXX,XX +XXX,XX @@ static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a) | ||
184 | return false; | ||
185 | } | ||
186 | |||
187 | - if (a->vm && s->vl_eq_vlmax) { | ||
188 | + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
189 | int scale = s->lmul - (s->sew + 3); | ||
190 | int vlmax = s->cfg_ptr->vlen >> -scale; | ||
191 | if (a->rs1 >= vlmax) { | ||
192 | @@ -XXX,XX +XXX,XX @@ static bool trans_vcompress_vm(DisasContext *s, arg_r *a) | ||
193 | tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); | ||
194 | |||
195 | data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
196 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
197 | tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
198 | vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), | ||
199 | cpu_env, s->cfg_ptr->vlen / 8, | ||
200 | @@ -XXX,XX +XXX,XX @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) | ||
201 | } | ||
202 | |||
203 | data = FIELD_DP32(data, VDATA, VM, a->vm); | ||
204 | + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); | ||
205 | + data = FIELD_DP32(data, VDATA, VTA, s->vta); | ||
206 | |||
207 | tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), | ||
208 | vreg_ofs(s, a->rs2), cpu_env, | ||
209 | -- | ||
210 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: eopXD <eop.chen@sifive.com> | ||
1 | 2 | ||
3 | According to v-spec, tail agnostic behavior can be either kept as | ||
4 | undisturbed or set elements' bits to all 1s. To distinguish the | ||
5 | difference of tail policies, QEMU should be able to simulate the tail | ||
6 | agnostic behavior as "set tail elements' bits to all 1s". | ||
7 | |||
8 | There are multiple possibility for agnostic elements according to | ||
9 | v-spec. The main intent of this patch-set tries to add option that | ||
10 | can distinguish between tail policies. Setting agnostic elements to | ||
11 | all 1s allows QEMU to express this. | ||
12 | |||
13 | This commit adds option 'rvv_ta_all_1s' is added to enable the | ||
14 | behavior, it is default as disabled. | ||
15 | |||
16 | Signed-off-by: eop Chen <eop.chen@sifive.com> | ||
17 | Reviewed-by: Frank Chang <frank.chang@sifive.com> | ||
18 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
19 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
20 | Message-Id: <165449614532.19704.7000832880482980398-16@git.sr.ht> | ||
21 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
22 | --- | ||
23 | target/riscv/cpu.c | 2 ++ | ||
24 | 1 file changed, 2 insertions(+) | ||
25 | |||
26 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/target/riscv/cpu.c | ||
29 | +++ b/target/riscv/cpu.c | ||
30 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { | ||
31 | DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC), | ||
32 | |||
33 | DEFINE_PROP_BOOL("short-isa-string", RISCVCPU, cfg.short_isa_string, false), | ||
34 | + | ||
35 | + DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false), | ||
36 | DEFINE_PROP_END_OF_LIST(), | ||
37 | }; | ||
38 | |||
39 | -- | ||
40 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alistair Francis <alistair.francis@wdc.com> | ||
1 | 2 | ||
3 | There are currently two types of RISC-V CPUs: | ||
4 | - Generic CPUs (base or any) that allow complete custimisation | ||
5 | - "Named" CPUs that match existing hardware | ||
6 | |||
7 | Users can use the base CPUs to custimise the extensions that they want, for | ||
8 | example -cpu rv64,v=true. | ||
9 | |||
10 | We originally exposed these as part of the named CPUs as well, but that was | ||
11 | by accident. | ||
12 | |||
13 | Exposing the CPU properties to named CPUs means that we accidently | ||
14 | enable extensions that don't exist on the CPUs by default. For example | ||
15 | the SiFive E CPU currently support the zba extension, which is a bug. | ||
16 | |||
17 | This patch instead only exposes the CPU extensions to the generic CPUs. | ||
18 | |||
19 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
20 | Reviewed-by: Bin Meng <bmeng.cn@gmail.com> | ||
21 | Message-Id: <20220608061437.314434-1-alistair.francis@opensource.wdc.com> | ||
22 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
23 | --- | ||
24 | target/riscv/cpu.c | 57 +++++++++++++++++++++++++++++++++++++--------- | ||
25 | 1 file changed, 46 insertions(+), 11 deletions(-) | ||
26 | |||
27 | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/riscv/cpu.c | ||
30 | +++ b/target/riscv/cpu.c | ||
31 | @@ -XXX,XX +XXX,XX @@ static const char * const riscv_intr_names[] = { | ||
32 | "reserved" | ||
33 | }; | ||
34 | |||
35 | +static void register_cpu_props(DeviceState *dev); | ||
36 | + | ||
37 | const char *riscv_cpu_get_trap_name(target_ulong cause, bool async) | ||
38 | { | ||
39 | if (async) { | ||
40 | @@ -XXX,XX +XXX,XX @@ static void riscv_any_cpu_init(Object *obj) | ||
41 | set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVU); | ||
42 | #endif | ||
43 | set_priv_version(env, PRIV_VERSION_1_12_0); | ||
44 | + register_cpu_props(DEVICE(obj)); | ||
45 | } | ||
46 | |||
47 | #if defined(TARGET_RISCV64) | ||
48 | @@ -XXX,XX +XXX,XX @@ static void rv64_base_cpu_init(Object *obj) | ||
49 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
50 | /* We set this in the realise function */ | ||
51 | set_misa(env, MXL_RV64, 0); | ||
52 | + register_cpu_props(DEVICE(obj)); | ||
53 | } | ||
54 | |||
55 | static void rv64_sifive_u_cpu_init(Object *obj) | ||
56 | @@ -XXX,XX +XXX,XX @@ static void rv64_sifive_u_cpu_init(Object *obj) | ||
57 | static void rv64_sifive_e_cpu_init(Object *obj) | ||
58 | { | ||
59 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
60 | + RISCVCPU *cpu = RISCV_CPU(obj); | ||
61 | + | ||
62 | set_misa(env, MXL_RV64, RVI | RVM | RVA | RVC | RVU); | ||
63 | set_priv_version(env, PRIV_VERSION_1_10_0); | ||
64 | - qdev_prop_set_bit(DEVICE(obj), "mmu", false); | ||
65 | + cpu->cfg.mmu = false; | ||
66 | } | ||
67 | |||
68 | static void rv128_base_cpu_init(Object *obj) | ||
69 | @@ -XXX,XX +XXX,XX @@ static void rv128_base_cpu_init(Object *obj) | ||
70 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
71 | /* We set this in the realise function */ | ||
72 | set_misa(env, MXL_RV128, 0); | ||
73 | + register_cpu_props(DEVICE(obj)); | ||
74 | } | ||
75 | #else | ||
76 | static void rv32_base_cpu_init(Object *obj) | ||
77 | @@ -XXX,XX +XXX,XX @@ static void rv32_base_cpu_init(Object *obj) | ||
78 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
79 | /* We set this in the realise function */ | ||
80 | set_misa(env, MXL_RV32, 0); | ||
81 | + register_cpu_props(DEVICE(obj)); | ||
82 | } | ||
83 | |||
84 | static void rv32_sifive_u_cpu_init(Object *obj) | ||
85 | @@ -XXX,XX +XXX,XX @@ static void rv32_sifive_u_cpu_init(Object *obj) | ||
86 | static void rv32_sifive_e_cpu_init(Object *obj) | ||
87 | { | ||
88 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
89 | + RISCVCPU *cpu = RISCV_CPU(obj); | ||
90 | + | ||
91 | set_misa(env, MXL_RV32, RVI | RVM | RVA | RVC | RVU); | ||
92 | set_priv_version(env, PRIV_VERSION_1_10_0); | ||
93 | - qdev_prop_set_bit(DEVICE(obj), "mmu", false); | ||
94 | + cpu->cfg.mmu = false; | ||
95 | } | ||
96 | |||
97 | static void rv32_ibex_cpu_init(Object *obj) | ||
98 | { | ||
99 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
100 | + RISCVCPU *cpu = RISCV_CPU(obj); | ||
101 | + | ||
102 | set_misa(env, MXL_RV32, RVI | RVM | RVC | RVU); | ||
103 | set_priv_version(env, PRIV_VERSION_1_10_0); | ||
104 | - qdev_prop_set_bit(DEVICE(obj), "mmu", false); | ||
105 | - qdev_prop_set_bit(DEVICE(obj), "x-epmp", true); | ||
106 | + cpu->cfg.mmu = false; | ||
107 | + cpu->cfg.epmp = true; | ||
108 | } | ||
109 | |||
110 | static void rv32_imafcu_nommu_cpu_init(Object *obj) | ||
111 | { | ||
112 | CPURISCVState *env = &RISCV_CPU(obj)->env; | ||
113 | + RISCVCPU *cpu = RISCV_CPU(obj); | ||
114 | + | ||
115 | set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVC | RVU); | ||
116 | set_priv_version(env, PRIV_VERSION_1_10_0); | ||
117 | set_resetvec(env, DEFAULT_RSTVEC); | ||
118 | - qdev_prop_set_bit(DEVICE(obj), "mmu", false); | ||
119 | + cpu->cfg.mmu = false; | ||
120 | } | ||
121 | #endif | ||
122 | |||
123 | @@ -XXX,XX +XXX,XX @@ static void riscv_host_cpu_init(Object *obj) | ||
124 | #elif defined(TARGET_RISCV64) | ||
125 | set_misa(env, MXL_RV64, 0); | ||
126 | #endif | ||
127 | + register_cpu_props(DEVICE(obj)); | ||
128 | } | ||
129 | #endif | ||
130 | |||
131 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_init(Object *obj) | ||
132 | { | ||
133 | RISCVCPU *cpu = RISCV_CPU(obj); | ||
134 | |||
135 | + cpu->cfg.ext_counters = true; | ||
136 | + cpu->cfg.ext_ifencei = true; | ||
137 | + cpu->cfg.ext_icsr = true; | ||
138 | + cpu->cfg.mmu = true; | ||
139 | + cpu->cfg.pmp = true; | ||
140 | + | ||
141 | cpu_set_cpustate_pointers(cpu); | ||
142 | |||
143 | #ifndef CONFIG_USER_ONLY | ||
144 | @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_init(Object *obj) | ||
145 | #endif /* CONFIG_USER_ONLY */ | ||
146 | } | ||
147 | |||
148 | -static Property riscv_cpu_properties[] = { | ||
149 | +static Property riscv_cpu_extensions[] = { | ||
150 | /* Defaults for standard extensions */ | ||
151 | DEFINE_PROP_BOOL("i", RISCVCPU, cfg.ext_i, true), | ||
152 | DEFINE_PROP_BOOL("e", RISCVCPU, cfg.ext_e, false), | ||
153 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { | ||
154 | DEFINE_PROP_BOOL("Zve64f", RISCVCPU, cfg.ext_zve64f, false), | ||
155 | DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true), | ||
156 | DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true), | ||
157 | - DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true), | ||
158 | |||
159 | DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec), | ||
160 | DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec), | ||
161 | DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128), | ||
162 | DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64), | ||
163 | |||
164 | - DEFINE_PROP_UINT32("mvendorid", RISCVCPU, cfg.mvendorid, 0), | ||
165 | - DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID), | ||
166 | - DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID), | ||
167 | - | ||
168 | DEFINE_PROP_BOOL("svinval", RISCVCPU, cfg.ext_svinval, false), | ||
169 | DEFINE_PROP_BOOL("svnapot", RISCVCPU, cfg.ext_svnapot, false), | ||
170 | DEFINE_PROP_BOOL("svpbmt", RISCVCPU, cfg.ext_svpbmt, false), | ||
171 | @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { | ||
172 | DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false), | ||
173 | DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false), | ||
174 | |||
175 | + DEFINE_PROP_END_OF_LIST(), | ||
176 | +}; | ||
177 | + | ||
178 | +static void register_cpu_props(DeviceState *dev) | ||
179 | +{ | ||
180 | + Property *prop; | ||
181 | + | ||
182 | + for (prop = riscv_cpu_extensions; prop && prop->name; prop++) { | ||
183 | + qdev_property_add_static(dev, prop); | ||
184 | + } | ||
185 | +} | ||
186 | + | ||
187 | +static Property riscv_cpu_properties[] = { | ||
188 | + DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true), | ||
189 | + | ||
190 | + DEFINE_PROP_UINT32("mvendorid", RISCVCPU, cfg.mvendorid, 0), | ||
191 | + DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID), | ||
192 | + DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID), | ||
193 | + | ||
194 | DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC), | ||
195 | |||
196 | DEFINE_PROP_BOOL("short-isa-string", RISCVCPU, cfg.short_isa_string, false), | ||
197 | -- | ||
198 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alistair Francis <alistair.francis@wdc.com> | ||
1 | 2 | ||
3 | When running a 32-bit guest, with a e64 vmv.v.x and vl_eq_vlmax set to | ||
4 | true the `tcg_debug_assert(vece <= MO_32)` will be triggered inside | ||
5 | tcg_gen_gvec_dup_i32(). | ||
6 | |||
7 | This patch checks that condition and instead uses tcg_gen_gvec_dup_i64() | ||
8 | is required. | ||
9 | |||
10 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1028 | ||
11 | Suggested-by: Robert Bu <robert.bu@gmail.com> | ||
12 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Message-Id: <20220608234701.369536-1-alistair.francis@opensource.wdc.com> | ||
15 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
16 | --- | ||
17 | target/riscv/insn_trans/trans_rvv.c.inc | 12 ++++++++++-- | ||
18 | 1 file changed, 10 insertions(+), 2 deletions(-) | ||
19 | |||
20 | diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/riscv/insn_trans/trans_rvv.c.inc | ||
23 | +++ b/target/riscv/insn_trans/trans_rvv.c.inc | ||
24 | @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) | ||
25 | s1 = get_gpr(s, a->rs1, EXT_SIGN); | ||
26 | |||
27 | if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { | ||
28 | - tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), | ||
29 | - MAXSZ(s), MAXSZ(s), s1); | ||
30 | + if (get_xl(s) == MXL_RV32 && s->sew == MO_64) { | ||
31 | + TCGv_i64 s1_i64 = tcg_temp_new_i64(); | ||
32 | + tcg_gen_ext_tl_i64(s1_i64, s1); | ||
33 | + tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), | ||
34 | + MAXSZ(s), MAXSZ(s), s1_i64); | ||
35 | + tcg_temp_free_i64(s1_i64); | ||
36 | + } else { | ||
37 | + tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), | ||
38 | + MAXSZ(s), MAXSZ(s), s1); | ||
39 | + } | ||
40 | } else { | ||
41 | TCGv_i32 desc; | ||
42 | TCGv_i64 s1_i64 = tcg_temp_new_i64(); | ||
43 | -- | ||
44 | 2.36.1 | diff view generated by jsdifflib |