:p
atchew
Login
From: Alistair Francis <alistair.francis@wdc.com> The following changes since commit 9cc1bf1ebca550f8d90f967ccd2b6d2e00e81387: Merge tag 'pull-xen-20220609' of https://xenbits.xen.org/git-http/people/aperard/qemu-dm into staging (2022-06-09 08:25:17 -0700) are available in the Git repository at: git@github.com:alistair23/qemu.git tags/pull-riscv-to-apply-20220610 for you to fetch changes up to 07314158f6aa4d2589520c194a7531b9364a8d54: target/riscv: trans_rvv: Avoid assert for RV32 and e64 (2022-06-10 09:42:12 +1000) ---------------------------------------------------------------- Fourth RISC-V PR for QEMU 7.1 * Update MAINTAINERS * Add support for Zmmul extension * Fixup FDT errors when supplying device tree from the command line for virt machine * Avoid overflowing the addr_config buffer in the SiFive PLIC * Support -device loader addresses above 2GB * Correctly wake from WFI on VS-level external interrupts * Fixes for RV128 support * Support Vector extension tail agnostic setting elements' bits to all 1s * Don't expose the CPU properties on named CPUs * Fix vector extension assert for RV32 ---------------------------------------------------------------- Alistair Francis (4): MAINTAINERS: Cover hw/core/uboot_image.h within Generic Loader section hw/intc: sifive_plic: Avoid overflowing the addr_config buffer target/riscv: Don't expose the CPU properties on names CPUs target/riscv: trans_rvv: Avoid assert for RV32 and e64 Andrew Bresticker (1): target/riscv: Wake on VS-level external interrupts Atish Patra (1): hw/riscv: virt: Generate fw_cfg DT node correctly Frédéric Pétrot (1): target/riscv/debug.c: keep experimental rv128 support working Jamie Iles (1): hw/core/loader: return image sizes as ssize_t Weiwei Li (1): target/riscv: add support for zmmul extension v0.1 eopXD (16): target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed target/riscv: rvv: Prune redundant access_type parameter passed target/riscv: rvv: Rename ambiguous esz target/riscv: rvv: Early exit when vstart >= vl target/riscv: rvv: Add tail agnostic for vv instructions target/riscv: rvv: Add tail agnostic for vector load / store instructions target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions target/riscv: rvv: Add tail agnostic for vector integer shift instructions target/riscv: rvv: Add tail agnostic for vector integer comparison instructions target/riscv: rvv: Add tail agnostic for vector integer merge and move instructions target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic instructions target/riscv: rvv: Add tail agnostic for vector floating-point instructions target/riscv: rvv: Add tail agnostic for vector reduction instructions target/riscv: rvv: Add tail agnostic for vector mask instructions target/riscv: rvv: Add tail agnostic for vector permutation instructions target/riscv: rvv: Add option 'rvv_ta_all_1s' to enable optional tail agnostic behavior include/hw/loader.h | 55 +- target/riscv/cpu.h | 4 + target/riscv/internals.h | 6 +- hw/arm/armv7m.c | 2 +- hw/arm/boot.c | 8 +- hw/core/generic-loader.c | 2 +- hw/core/loader.c | 81 +- hw/i386/x86.c | 2 +- hw/intc/sifive_plic.c | 19 +- hw/riscv/boot.c | 5 +- hw/riscv/virt.c | 28 +- target/riscv/cpu.c | 68 +- target/riscv/cpu_helper.c | 4 +- target/riscv/debug.c | 2 + target/riscv/translate.c | 4 + target/riscv/vector_helper.c | 1588 +++++++++++++++++++------------ target/riscv/insn_trans/trans_rvm.c.inc | 18 +- target/riscv/insn_trans/trans_rvv.c.inc | 106 ++- MAINTAINERS | 1 + 19 files changed, 1244 insertions(+), 759 deletions(-)
From: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Message-Id: <20220509091339.26016-1-alistair.francis@wdc.com> --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index XXXXXXX..XXXXXXX 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -XXX,XX +XXX,XX @@ Generic Loader M: Alistair Francis <alistair@alistair23.me> S: Maintained F: hw/core/generic-loader.c +F: hw/core/uboot_image.h F: include/hw/core/generic-loader.h F: docs/system/generic-loader.rst -- 2.36.1
From: Weiwei Li <liweiwei@iscas.ac.cn> Add support for the zmmul extension v0.1. This extension includes all multiplication operations from the M extension but not the divide ops. Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> Reviewed-by: Víctor Colombo <victor.colombo@eldorado.org.br> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <20220531030732.3850-1-liweiwei@iscas.ac.cn> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.h | 1 + target/riscv/cpu.c | 7 +++++++ target/riscv/insn_trans/trans_rvm.c.inc | 18 ++++++++++++------ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { bool ext_zhinxmin; bool ext_zve32f; bool ext_zve64f; + bool ext_zmmul; uint32_t mvendorid; uint64_t marchid; diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) cpu->cfg.ext_ifencei = true; } + if (cpu->cfg.ext_m && cpu->cfg.ext_zmmul) { + warn_report("Zmmul will override M"); + cpu->cfg.ext_m = false; + } + if (cpu->cfg.ext_i && cpu->cfg.ext_e) { error_setg(errp, "I and E extensions are incompatible"); @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { /* These are experimental so mark with 'x-' */ DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false), + DEFINE_PROP_BOOL("x-zmmul", RISCVCPU, cfg.ext_zmmul, false), /* ePMP 0.9.3 */ DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false), DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false), @@ -XXX,XX +XXX,XX @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int max_str_len) struct isa_ext_data isa_edata_arr[] = { ISA_EDATA_ENTRY(zicsr, ext_icsr), ISA_EDATA_ENTRY(zifencei, ext_ifencei), + ISA_EDATA_ENTRY(zmmul, ext_zmmul), ISA_EDATA_ENTRY(zfh, ext_zfh), ISA_EDATA_ENTRY(zfhmin, ext_zfhmin), ISA_EDATA_ENTRY(zfinx, ext_zfinx), diff --git a/target/riscv/insn_trans/trans_rvm.c.inc b/target/riscv/insn_trans/trans_rvm.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvm.c.inc +++ b/target/riscv/insn_trans/trans_rvm.c.inc @@ -XXX,XX +XXX,XX @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ +#define REQUIRE_M_OR_ZMMUL(ctx) do { \ + if (!ctx->cfg_ptr->ext_zmmul && !has_ext(ctx, RVM)) { \ + return false; \ + } \ +} while (0) + static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, TCGv bh) { TCGv tmpl = tcg_temp_new(); @@ -XXX,XX +XXX,XX @@ static void gen_mul_i128(TCGv rl, TCGv rh, static bool trans_mul(DisasContext *ctx, arg_mul *a) { - REQUIRE_EXT(ctx, RVM); + REQUIRE_M_OR_ZMMUL(ctx); return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128); } @@ -XXX,XX +XXX,XX @@ static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2) static bool trans_mulh(DisasContext *ctx, arg_mulh *a) { - REQUIRE_EXT(ctx, RVM); + REQUIRE_M_OR_ZMMUL(ctx); return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w, gen_mulh_i128); } @@ -XXX,XX +XXX,XX @@ static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2) static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a) { - REQUIRE_EXT(ctx, RVM); + REQUIRE_M_OR_ZMMUL(ctx); return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w, gen_mulhsu_i128); } @@ -XXX,XX +XXX,XX @@ static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2) static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a) { - REQUIRE_EXT(ctx, RVM); + REQUIRE_M_OR_ZMMUL(ctx); /* gen_mulh_w works for either sign as input. */ return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w, gen_mulhu_i128); @@ -XXX,XX +XXX,XX @@ static bool trans_remu(DisasContext *ctx, arg_remu *a) static bool trans_mulw(DisasContext *ctx, arg_mulw *a) { REQUIRE_64_OR_128BIT(ctx); - REQUIRE_EXT(ctx, RVM); + REQUIRE_M_OR_ZMMUL(ctx); ctx->ol = MXL_RV32; return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL); } @@ -XXX,XX +XXX,XX @@ static bool trans_remuw(DisasContext *ctx, arg_remuw *a) static bool trans_muld(DisasContext *ctx, arg_muld *a) { REQUIRE_128BIT(ctx); - REQUIRE_EXT(ctx, RVM); + REQUIRE_M_OR_ZMMUL(ctx); ctx->ol = MXL_RV64; return gen_arith(ctx, a, EXT_SIGN, tcg_gen_mul_tl, NULL); } -- 2.36.1
From: Atish Patra <atishp@rivosinc.com> fw_cfg DT node is generated after the create_fdt without any check if the DT is being loaded from the commandline. This results in FDT_ERR_EXISTS error if dtb is loaded from the commandline. Generate fw_cfg node only if the DT is not loaded from the commandline. Signed-off-by: Atish Patra <atishp@rivosinc.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <20220526203500.847165-1-atishp@rivosinc.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap) g_free(name); } +static void create_fdt_fw_cfg(RISCVVirtState *s, const MemMapEntry *memmap) +{ + char *nodename; + MachineState *mc = MACHINE(s); + hwaddr base = memmap[VIRT_FW_CFG].base; + hwaddr size = memmap[VIRT_FW_CFG].size; + + nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); + qemu_fdt_add_subnode(mc->fdt, nodename); + qemu_fdt_setprop_string(mc->fdt, nodename, + "compatible", "qemu,fw-cfg-mmio"); + qemu_fdt_setprop_sized_cells(mc->fdt, nodename, "reg", + 2, base, 2, size); + qemu_fdt_setprop(mc->fdt, nodename, "dma-coherent", NULL, 0); + g_free(nodename); +} + static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, uint64_t mem_size, const char *cmdline, bool is_32_bit) { @@ -XXX,XX +XXX,XX @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, create_fdt_rtc(s, memmap, irq_mmio_phandle); create_fdt_flash(s, memmap); + create_fdt_fw_cfg(s, memmap); update_bootargs: if (cmdline && *cmdline) { @@ -XXX,XX +XXX,XX @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, static FWCfgState *create_fw_cfg(const MachineState *mc) { hwaddr base = virt_memmap[VIRT_FW_CFG].base; - hwaddr size = virt_memmap[VIRT_FW_CFG].size; FWCfgState *fw_cfg; - char *nodename; fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, &address_space_memory); fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)mc->smp.cpus); - nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); - qemu_fdt_add_subnode(mc->fdt, nodename); - qemu_fdt_setprop_string(mc->fdt, nodename, - "compatible", "qemu,fw-cfg-mmio"); - qemu_fdt_setprop_sized_cells(mc->fdt, nodename, "reg", - 2, base, 2, size); - qemu_fdt_setprop(mc->fdt, nodename, "dma-coherent", NULL, 0); - g_free(nodename); return fw_cfg; } -- 2.36.1
From: Alistair Francis <alistair.francis@wdc.com> Since commit ad40be27 "target/riscv: Support start kernel directly by KVM" we have been overflowing the addr_config on "M,MS..." configurations, as reported https://gitlab.com/qemu-project/qemu/-/issues/1050. This commit changes the loop in sifive_plic_create() from iterating over the number of harts to just iterating over the addr_config. The addr_config is based on the hart_config, and will contain interrup details for all harts. This way we can't iterate past the end of addr_config. Fixes: ad40be27084536 ("target/riscv: Support start kernel directly by KVM") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1050 Signed-off-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Mingwang Li <limingwang@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Message-Id: <20220601013631.196854-1-alistair.francis@opensource.wdc.com> --- hw/intc/sifive_plic.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c index XXXXXXX..XXXXXXX 100644 --- a/hw/intc/sifive_plic.c +++ b/hw/intc/sifive_plic.c @@ -XXX,XX +XXX,XX @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, uint32_t context_stride, uint32_t aperture_size) { DeviceState *dev = qdev_new(TYPE_SIFIVE_PLIC); - int i, j = 0; + int i; SiFivePLICState *plic; assert(enable_stride == (enable_stride & -enable_stride)); @@ -XXX,XX +XXX,XX @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); plic = SIFIVE_PLIC(dev); - for (i = 0; i < num_harts; i++) { - CPUState *cpu = qemu_get_cpu(hartid_base + i); - if (plic->addr_config[j].mode == PLICMode_M) { - j++; - qdev_connect_gpio_out(dev, num_harts + i, + for (i = 0; i < plic->num_addrs; i++) { + int cpu_num = plic->addr_config[i].hartid; + CPUState *cpu = qemu_get_cpu(hartid_base + cpu_num); + + if (plic->addr_config[i].mode == PLICMode_M) { + qdev_connect_gpio_out(dev, num_harts + cpu_num, qdev_get_gpio_in(DEVICE(cpu), IRQ_M_EXT)); } - - if (plic->addr_config[j].mode == PLICMode_S) { - j++; - qdev_connect_gpio_out(dev, i, + if (plic->addr_config[i].mode == PLICMode_S) { + qdev_connect_gpio_out(dev, cpu_num, qdev_get_gpio_in(DEVICE(cpu), IRQ_S_EXT)); } } -- 2.36.1
From: Jamie Iles <jamie@nuviainc.com> Various loader functions return an int which limits images to 2GB which is fine for things like a BIOS/kernel image, but if we want to be able to load memory images or large ramdisks then any file over 2GB would silently fail to load. Cc: Luc Michel <lmichel@kalray.eu> Signed-off-by: Jamie Iles <jamie@nuviainc.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Luc Michel <lmichel@kalray.eu> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <20211111141141.3295094-2-jamie@nuviainc.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- include/hw/loader.h | 55 +++++++++++++-------------- hw/arm/armv7m.c | 2 +- hw/arm/boot.c | 8 ++-- hw/core/generic-loader.c | 2 +- hw/core/loader.c | 81 +++++++++++++++++++++------------------- hw/i386/x86.c | 2 +- hw/riscv/boot.c | 5 ++- 7 files changed, 80 insertions(+), 75 deletions(-) diff --git a/include/hw/loader.h b/include/hw/loader.h index XXXXXXX..XXXXXXX 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -XXX,XX +XXX,XX @@ ssize_t load_image_size(const char *filename, void *addr, size_t size); * * Returns the size of the loaded image on success, -1 otherwise. */ -int load_image_targphys_as(const char *filename, - hwaddr addr, uint64_t max_sz, AddressSpace *as); +ssize_t load_image_targphys_as(const char *filename, + hwaddr addr, uint64_t max_sz, AddressSpace *as); /**load_targphys_hex_as: * @filename: Path to the .hex file @@ -XXX,XX +XXX,XX @@ int load_image_targphys_as(const char *filename, * * Returns the size of the loaded .hex file on success, -1 otherwise. */ -int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as); +ssize_t load_targphys_hex_as(const char *filename, hwaddr *entry, + AddressSpace *as); /** load_image_targphys: * Same as load_image_targphys_as(), but doesn't allow the caller to specify * an AddressSpace. */ -int load_image_targphys(const char *filename, hwaddr, - uint64_t max_sz); +ssize_t load_image_targphys(const char *filename, hwaddr, + uint64_t max_sz); /** * load_image_mr: load an image into a memory region @@ -XXX,XX +XXX,XX @@ int load_image_targphys(const char *filename, hwaddr, * If the file is larger than the memory region's size the call will fail. * Returns -1 on failure, or the size of the file. */ -int load_image_mr(const char *filename, MemoryRegion *mr); +ssize_t load_image_mr(const char *filename, MemoryRegion *mr); /* This is the limit on the maximum uncompressed image size that * load_image_gzipped_buffer() and load_image_gzipped() will read. It prevents @@ -XXX,XX +XXX,XX @@ int load_image_mr(const char *filename, MemoryRegion *mr); */ #define LOAD_IMAGE_MAX_GUNZIP_BYTES (256 << 20) -int load_image_gzipped_buffer(const char *filename, uint64_t max_sz, - uint8_t **buffer); -int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); +ssize_t load_image_gzipped_buffer(const char *filename, uint64_t max_sz, + uint8_t **buffer); +ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); #define ELF_LOAD_FAILED -1 #define ELF_LOAD_NOT_ELF -2 @@ -XXX,XX +XXX,XX @@ ssize_t load_elf(const char *filename, */ void load_elf_hdr(const char *filename, void *hdr, bool *is64, Error **errp); -int load_aout(const char *filename, hwaddr addr, int max_sz, - int bswap_needed, hwaddr target_page_size); +ssize_t load_aout(const char *filename, hwaddr addr, int max_sz, + int bswap_needed, hwaddr target_page_size); #define LOAD_UIMAGE_LOADADDR_INVALID (-1) @@ -XXX,XX +XXX,XX @@ int load_aout(const char *filename, hwaddr addr, int max_sz, * * Returns the size of the loaded image on success, -1 otherwise. */ -int load_uimage_as(const char *filename, hwaddr *ep, - hwaddr *loadaddr, int *is_linux, - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque, AddressSpace *as); +ssize_t load_uimage_as(const char *filename, hwaddr *ep, + hwaddr *loadaddr, int *is_linux, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, AddressSpace *as); /** load_uimage: * Same as load_uimage_as(), but doesn't allow the caller to specify an * AddressSpace. */ -int load_uimage(const char *filename, hwaddr *ep, - hwaddr *loadaddr, int *is_linux, - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque); +ssize_t load_uimage(const char *filename, hwaddr *ep, + hwaddr *loadaddr, int *is_linux, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque); /** * load_ramdisk_as: @@ -XXX,XX +XXX,XX @@ int load_uimage(const char *filename, hwaddr *ep, * * Returns the size of the loaded image on success, -1 otherwise. */ -int load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz, - AddressSpace *as); +ssize_t load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz, + AddressSpace *as); /** * load_ramdisk: * Same as load_ramdisk_as(), but doesn't allow the caller to specify * an AddressSpace. */ -int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz); +ssize_t load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz); ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen); @@ -XXX,XX +XXX,XX @@ void pstrcpy_targphys(const char *name, extern bool option_rom_has_mr; extern bool rom_file_has_mr; -int rom_add_file(const char *file, const char *fw_dir, - hwaddr addr, int32_t bootindex, - bool option_rom, MemoryRegion *mr, AddressSpace *as); +ssize_t rom_add_file(const char *file, const char *fw_dir, + hwaddr addr, int32_t bootindex, + bool option_rom, MemoryRegion *mr, AddressSpace *as); MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len, size_t max_len, hwaddr addr, const char *fw_file_name, @@ -XXX,XX +XXX,XX @@ void hmp_info_roms(Monitor *mon, const QDict *qdict); #define rom_add_blob_fixed_as(_f, _b, _l, _a, _as) \ rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, _as, true) -int rom_add_vga(const char *file); -int rom_add_option(const char *file, int32_t bootindex); +ssize_t rom_add_vga(const char *file); +ssize_t rom_add_option(const char *file, int32_t bootindex); /* This is the usual maximum in uboot, so if a uImage overflows this, it would * overflow on real hardware too. */ diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c index XXXXXXX..XXXXXXX 100644 --- a/hw/arm/armv7m.c +++ b/hw/arm/armv7m.c @@ -XXX,XX +XXX,XX @@ static void armv7m_reset(void *opaque) void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size) { - int image_size; + ssize_t image_size; uint64_t entry; int big_endian; AddressSpace *as; diff --git a/hw/arm/boot.c b/hw/arm/boot.c index XXXXXXX..XXXXXXX 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -XXX,XX +XXX,XX @@ static int do_arm_linux_init(Object *obj, void *opaque) return 0; } -static int64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry, +static ssize_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry, uint64_t *lowaddr, uint64_t *highaddr, int elf_machine, AddressSpace *as) { @@ -XXX,XX +XXX,XX @@ static int64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry, } elf_header; int data_swab = 0; bool big_endian; - int64_t ret = -1; + ssize_t ret = -1; Error *err = NULL; @@ -XXX,XX +XXX,XX @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, /* Set up for a direct boot of a kernel image file. */ CPUState *cs; AddressSpace *as = arm_boot_address_space(cpu, info); - int kernel_size; + ssize_t kernel_size; int initrd_size; int is_linux = 0; uint64_t elf_entry; @@ -XXX,XX +XXX,XX @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, if (kernel_size > info->ram_size) { error_report("kernel '%s' is too large to fit in RAM " - "(kernel size %d, RAM size %" PRId64 ")", + "(kernel size %zd, RAM size %" PRId64 ")", info->kernel_filename, kernel_size, info->ram_size); exit(1); } diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c index XXXXXXX..XXXXXXX 100644 --- a/hw/core/generic-loader.c +++ b/hw/core/generic-loader.c @@ -XXX,XX +XXX,XX @@ static void generic_loader_realize(DeviceState *dev, Error **errp) GenericLoaderState *s = GENERIC_LOADER(dev); hwaddr entry; int big_endian; - int size = 0; + ssize_t size = 0; s->set_pc = false; diff --git a/hw/core/loader.c b/hw/core/loader.c index XXXXXXX..XXXXXXX 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -XXX,XX +XXX,XX @@ ssize_t read_targphys(const char *name, return did; } -int load_image_targphys(const char *filename, - hwaddr addr, uint64_t max_sz) +ssize_t load_image_targphys(const char *filename, + hwaddr addr, uint64_t max_sz) { return load_image_targphys_as(filename, addr, max_sz, NULL); } /* return the size or -1 if error */ -int load_image_targphys_as(const char *filename, - hwaddr addr, uint64_t max_sz, AddressSpace *as) +ssize_t load_image_targphys_as(const char *filename, + hwaddr addr, uint64_t max_sz, AddressSpace *as) { - int size; + ssize_t size; size = get_image_size(filename); if (size < 0 || size > max_sz) { @@ -XXX,XX +XXX,XX @@ int load_image_targphys_as(const char *filename, return size; } -int load_image_mr(const char *filename, MemoryRegion *mr) +ssize_t load_image_mr(const char *filename, MemoryRegion *mr) { - int size; + ssize_t size; if (!memory_access_is_direct(mr, false)) { /* Can only load an image into RAM or ROM */ @@ -XXX,XX +XXX,XX @@ static void bswap_ahdr(struct exec *e) : (_N_SEGMENT_ROUND (_N_TXTENDADDR(x, target_page_size), target_page_size))) -int load_aout(const char *filename, hwaddr addr, int max_sz, - int bswap_needed, hwaddr target_page_size) +ssize_t load_aout(const char *filename, hwaddr addr, int max_sz, + int bswap_needed, hwaddr target_page_size) { int fd; ssize_t size, ret; @@ -XXX,XX +XXX,XX @@ toosmall: } /* Load a U-Boot image. */ -static int load_uboot_image(const char *filename, hwaddr *ep, hwaddr *loadaddr, - int *is_linux, uint8_t image_type, - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque, AddressSpace *as) +static ssize_t load_uboot_image(const char *filename, hwaddr *ep, + hwaddr *loadaddr, int *is_linux, + uint8_t image_type, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, AddressSpace *as) { int fd; - int size; + ssize_t size; hwaddr address; uboot_image_header_t h; uboot_image_header_t *hdr = &h; @@ -XXX,XX +XXX,XX @@ out: return ret; } -int load_uimage(const char *filename, hwaddr *ep, hwaddr *loadaddr, - int *is_linux, - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque) +ssize_t load_uimage(const char *filename, hwaddr *ep, hwaddr *loadaddr, + int *is_linux, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque) { return load_uboot_image(filename, ep, loadaddr, is_linux, IH_TYPE_KERNEL, translate_fn, translate_opaque, NULL); } -int load_uimage_as(const char *filename, hwaddr *ep, hwaddr *loadaddr, - int *is_linux, - uint64_t (*translate_fn)(void *, uint64_t), - void *translate_opaque, AddressSpace *as) +ssize_t load_uimage_as(const char *filename, hwaddr *ep, hwaddr *loadaddr, + int *is_linux, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, AddressSpace *as) { return load_uboot_image(filename, ep, loadaddr, is_linux, IH_TYPE_KERNEL, translate_fn, translate_opaque, as); } /* Load a ramdisk. */ -int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz) +ssize_t load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz) { return load_ramdisk_as(filename, addr, max_sz, NULL); } -int load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz, - AddressSpace *as) +ssize_t load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz, + AddressSpace *as) { return load_uboot_image(filename, NULL, &addr, NULL, IH_TYPE_RAMDISK, NULL, NULL, as); } /* Load a gzip-compressed kernel to a dynamically allocated buffer. */ -int load_image_gzipped_buffer(const char *filename, uint64_t max_sz, - uint8_t **buffer) +ssize_t load_image_gzipped_buffer(const char *filename, uint64_t max_sz, + uint8_t **buffer) { uint8_t *compressed_data = NULL; uint8_t *data = NULL; @@ -XXX,XX +XXX,XX @@ int load_image_gzipped_buffer(const char *filename, uint64_t max_sz, } /* Load a gzip-compressed kernel. */ -int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz) +ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz) { - int bytes; + ssize_t bytes; uint8_t *data; bytes = load_image_gzipped_buffer(filename, max_sz, &data); @@ -XXX,XX +XXX,XX @@ static void *rom_set_mr(Rom *rom, Object *owner, const char *name, bool ro) return data; } -int rom_add_file(const char *file, const char *fw_dir, - hwaddr addr, int32_t bootindex, - bool option_rom, MemoryRegion *mr, - AddressSpace *as) +ssize_t rom_add_file(const char *file, const char *fw_dir, + hwaddr addr, int32_t bootindex, + bool option_rom, MemoryRegion *mr, + AddressSpace *as) { MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); Rom *rom; - int rc, fd = -1; + ssize_t rc; + int fd = -1; char devpath[100]; if (as && mr) { @@ -XXX,XX +XXX,XX @@ int rom_add_file(const char *file, const char *fw_dir, lseek(fd, 0, SEEK_SET); rc = read(fd, rom->data, rom->datasize); if (rc != rom->datasize) { - fprintf(stderr, "rom: file %-20s: read error: rc=%d (expected %zd)\n", + fprintf(stderr, "rom: file %-20s: read error: rc=%zd (expected %zd)\n", rom->name, rc, rom->datasize); goto err; } @@ -XXX,XX +XXX,XX @@ int rom_add_elf_program(const char *name, GMappedFile *mapped_file, void *data, return 0; } -int rom_add_vga(const char *file) +ssize_t rom_add_vga(const char *file) { return rom_add_file(file, "vgaroms", 0, -1, true, NULL, NULL); } -int rom_add_option(const char *file, int32_t bootindex) +ssize_t rom_add_option(const char *file, int32_t bootindex) { return rom_add_file(file, "genroms", 0, bootindex, true, NULL, NULL); } @@ -XXX,XX +XXX,XX @@ out: } /* return size or -1 if error */ -int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as) +ssize_t load_targphys_hex_as(const char *filename, hwaddr *entry, + AddressSpace *as) { gsize hex_blob_size; gchar *hex_blob; - int total_size = 0; + ssize_t total_size = 0; if (!g_file_get_contents(filename, &hex_blob, &hex_blob_size, NULL)) { return -1; diff --git a/hw/i386/x86.c b/hw/i386/x86.c index XXXXXXX..XXXXXXX 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -XXX,XX +XXX,XX @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, char *filename; MemoryRegion *bios, *isa_bios; int bios_size, isa_bios_size; - int ret; + ssize_t ret; /* BIOS load */ bios_name = ms->firmware ?: default_firmware; diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/boot.c +++ b/hw/riscv/boot.c @@ -XXX,XX +XXX,XX @@ target_ulong riscv_load_firmware(const char *firmware_filename, hwaddr firmware_load_addr, symbol_fn_t sym_cb) { - uint64_t firmware_entry, firmware_size, firmware_end; + uint64_t firmware_entry, firmware_end; + ssize_t firmware_size; if (load_elf_ram_sym(firmware_filename, NULL, NULL, NULL, &firmware_entry, NULL, &firmware_end, NULL, @@ -XXX,XX +XXX,XX @@ target_ulong riscv_load_kernel(const char *kernel_filename, hwaddr riscv_load_initrd(const char *filename, uint64_t mem_size, uint64_t kernel_entry, hwaddr *start) { - int size; + ssize_t size; /* * We want to put the initrd far enough into RAM that when the -- 2.36.1
From: Andrew Bresticker <abrestic@rivosinc.com> Whether or not VSEIP is pending isn't reflected in env->mip and must instead be determined from hstatus.vgein and hgeip. As a result a CPU in WFI won't wake on a VSEIP, which violates the WFI behavior as specified in the privileged ISA. Just use riscv_cpu_all_pending() instead, which already accounts for VSEIP. Signed-off-by: Andrew Bresticker <abrestic@rivosinc.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <20220531210544.181322-1-abrestic@rivosinc.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.h | 1 + target/riscv/cpu.c | 2 +- target/riscv/cpu_helper.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -XXX,XX +XXX,XX @@ int riscv_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int riscv_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); int riscv_cpu_hviprio_index2irq(int index, int *out_irq, int *out_rdzero); uint8_t riscv_cpu_default_priority(int irq); +uint64_t riscv_cpu_all_pending(CPURISCVState *env); int riscv_cpu_mirq_pending(CPURISCVState *env); int riscv_cpu_sirq_pending(CPURISCVState *env); int riscv_cpu_vsirq_pending(CPURISCVState *env); diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ static bool riscv_cpu_has_work(CPUState *cs) * Definition of the WFI instruction requires it to ignore the privilege * mode and delegation registers, but respect individual enables */ - return (env->mip & env->mie) != 0; + return riscv_cpu_all_pending(env) != 0; #else return true; #endif diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -XXX,XX +XXX,XX @@ static int riscv_cpu_pending_to_irq(CPURISCVState *env, return best_irq; } -static uint64_t riscv_cpu_all_pending(CPURISCVState *env) +uint64_t riscv_cpu_all_pending(CPURISCVState *env) { uint32_t gein = get_field(env->hstatus, HSTATUS_VGEIN); uint64_t vsgein = (env->hgeip & (1ULL << gein)) ? MIP_VSEIP : 0; -- 2.36.1
From: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr> Add an MXL_RV128 case in two switches so that no error is triggered when using the -cpu x-rv128 option. Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr> Acked-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Bin Meng <bmeng.cn@gmail.com> Message-Id: <20220602155246.38837-1-frederic.petrot@univ-grenoble-alpes.fr> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/debug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/riscv/debug.c b/target/riscv/debug.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/debug.c +++ b/target/riscv/debug.c @@ -XXX,XX +XXX,XX @@ static inline target_ulong trigger_type(CPURISCVState *env, tdata1 = RV32_TYPE(type); break; case MXL_RV64: + case MXL_RV128: tdata1 = RV64_TYPE(type); break; default: @@ -XXX,XX +XXX,XX @@ static target_ulong tdata1_validate(CPURISCVState *env, target_ulong val, tdata1 = RV32_TYPE(t); break; case MXL_RV64: + case MXL_RV128: type = extract64(val, 60, 4); dmode = extract64(val, 59, 1); tdata1 = RV64_TYPE(t); -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> No functional change intended in this commit. Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-1@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/vector_helper.c | 1132 +++++++++++++++++----------------- 1 file changed, 565 insertions(+), 567 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, uint32_t desc, - uint32_t esz, uint32_t dsz, opivv2_fn *fn) { uint32_t vm = vext_vm(desc); @@ -XXX,XX +XXX,XX @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, } /* generate the helpers for OPIVV */ -#define GEN_VEXT_VV(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VV(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ - do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ + do_vext_vv(vd, v0, vs1, vs2, env, desc, \ do_##NAME); \ } -GEN_VEXT_VV(vadd_vv_b, 1, 1) -GEN_VEXT_VV(vadd_vv_h, 2, 2) -GEN_VEXT_VV(vadd_vv_w, 4, 4) -GEN_VEXT_VV(vadd_vv_d, 8, 8) -GEN_VEXT_VV(vsub_vv_b, 1, 1) -GEN_VEXT_VV(vsub_vv_h, 2, 2) -GEN_VEXT_VV(vsub_vv_w, 4, 4) -GEN_VEXT_VV(vsub_vv_d, 8, 8) +GEN_VEXT_VV(vadd_vv_b) +GEN_VEXT_VV(vadd_vv_h) +GEN_VEXT_VV(vadd_vv_w) +GEN_VEXT_VV(vadd_vv_d) +GEN_VEXT_VV(vsub_vv_b) +GEN_VEXT_VV(vsub_vv_h) +GEN_VEXT_VV(vsub_vv_w) +GEN_VEXT_VV(vsub_vv_d) typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, uint32_t desc, - uint32_t esz, uint32_t dsz, opivx2_fn fn) { uint32_t vm = vext_vm(desc); @@ -XXX,XX +XXX,XX @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, } /* generate the helpers for OPIVX */ -#define GEN_VEXT_VX(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VX(NAME) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ - do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ + do_vext_vx(vd, v0, s1, vs2, env, desc, \ do_##NAME); \ } -GEN_VEXT_VX(vadd_vx_b, 1, 1) -GEN_VEXT_VX(vadd_vx_h, 2, 2) -GEN_VEXT_VX(vadd_vx_w, 4, 4) -GEN_VEXT_VX(vadd_vx_d, 8, 8) -GEN_VEXT_VX(vsub_vx_b, 1, 1) -GEN_VEXT_VX(vsub_vx_h, 2, 2) -GEN_VEXT_VX(vsub_vx_w, 4, 4) -GEN_VEXT_VX(vsub_vx_d, 8, 8) -GEN_VEXT_VX(vrsub_vx_b, 1, 1) -GEN_VEXT_VX(vrsub_vx_h, 2, 2) -GEN_VEXT_VX(vrsub_vx_w, 4, 4) -GEN_VEXT_VX(vrsub_vx_d, 8, 8) +GEN_VEXT_VX(vadd_vx_b) +GEN_VEXT_VX(vadd_vx_h) +GEN_VEXT_VX(vadd_vx_w) +GEN_VEXT_VX(vadd_vx_d) +GEN_VEXT_VX(vsub_vx_b) +GEN_VEXT_VX(vsub_vx_h) +GEN_VEXT_VX(vsub_vx_w) +GEN_VEXT_VX(vsub_vx_d) +GEN_VEXT_VX(vrsub_vx_b) +GEN_VEXT_VX(vrsub_vx_h) +GEN_VEXT_VX(vrsub_vx_w) +GEN_VEXT_VX(vrsub_vx_d) void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) { @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) -GEN_VEXT_VV(vwaddu_vv_b, 1, 2) -GEN_VEXT_VV(vwaddu_vv_h, 2, 4) -GEN_VEXT_VV(vwaddu_vv_w, 4, 8) -GEN_VEXT_VV(vwsubu_vv_b, 1, 2) -GEN_VEXT_VV(vwsubu_vv_h, 2, 4) -GEN_VEXT_VV(vwsubu_vv_w, 4, 8) -GEN_VEXT_VV(vwadd_vv_b, 1, 2) -GEN_VEXT_VV(vwadd_vv_h, 2, 4) -GEN_VEXT_VV(vwadd_vv_w, 4, 8) -GEN_VEXT_VV(vwsub_vv_b, 1, 2) -GEN_VEXT_VV(vwsub_vv_h, 2, 4) -GEN_VEXT_VV(vwsub_vv_w, 4, 8) -GEN_VEXT_VV(vwaddu_wv_b, 1, 2) -GEN_VEXT_VV(vwaddu_wv_h, 2, 4) -GEN_VEXT_VV(vwaddu_wv_w, 4, 8) -GEN_VEXT_VV(vwsubu_wv_b, 1, 2) -GEN_VEXT_VV(vwsubu_wv_h, 2, 4) -GEN_VEXT_VV(vwsubu_wv_w, 4, 8) -GEN_VEXT_VV(vwadd_wv_b, 1, 2) -GEN_VEXT_VV(vwadd_wv_h, 2, 4) -GEN_VEXT_VV(vwadd_wv_w, 4, 8) -GEN_VEXT_VV(vwsub_wv_b, 1, 2) -GEN_VEXT_VV(vwsub_wv_h, 2, 4) -GEN_VEXT_VV(vwsub_wv_w, 4, 8) +GEN_VEXT_VV(vwaddu_vv_b) +GEN_VEXT_VV(vwaddu_vv_h) +GEN_VEXT_VV(vwaddu_vv_w) +GEN_VEXT_VV(vwsubu_vv_b) +GEN_VEXT_VV(vwsubu_vv_h) +GEN_VEXT_VV(vwsubu_vv_w) +GEN_VEXT_VV(vwadd_vv_b) +GEN_VEXT_VV(vwadd_vv_h) +GEN_VEXT_VV(vwadd_vv_w) +GEN_VEXT_VV(vwsub_vv_b) +GEN_VEXT_VV(vwsub_vv_h) +GEN_VEXT_VV(vwsub_vv_w) +GEN_VEXT_VV(vwaddu_wv_b) +GEN_VEXT_VV(vwaddu_wv_h) +GEN_VEXT_VV(vwaddu_wv_w) +GEN_VEXT_VV(vwsubu_wv_b) +GEN_VEXT_VV(vwsubu_wv_h) +GEN_VEXT_VV(vwsubu_wv_w) +GEN_VEXT_VV(vwadd_wv_b) +GEN_VEXT_VV(vwadd_wv_h) +GEN_VEXT_VV(vwadd_wv_w) +GEN_VEXT_VV(vwsub_wv_b) +GEN_VEXT_VV(vwsub_wv_h) +GEN_VEXT_VV(vwsub_wv_w) RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) -GEN_VEXT_VX(vwaddu_vx_b, 1, 2) -GEN_VEXT_VX(vwaddu_vx_h, 2, 4) -GEN_VEXT_VX(vwaddu_vx_w, 4, 8) -GEN_VEXT_VX(vwsubu_vx_b, 1, 2) -GEN_VEXT_VX(vwsubu_vx_h, 2, 4) -GEN_VEXT_VX(vwsubu_vx_w, 4, 8) -GEN_VEXT_VX(vwadd_vx_b, 1, 2) -GEN_VEXT_VX(vwadd_vx_h, 2, 4) -GEN_VEXT_VX(vwadd_vx_w, 4, 8) -GEN_VEXT_VX(vwsub_vx_b, 1, 2) -GEN_VEXT_VX(vwsub_vx_h, 2, 4) -GEN_VEXT_VX(vwsub_vx_w, 4, 8) -GEN_VEXT_VX(vwaddu_wx_b, 1, 2) -GEN_VEXT_VX(vwaddu_wx_h, 2, 4) -GEN_VEXT_VX(vwaddu_wx_w, 4, 8) -GEN_VEXT_VX(vwsubu_wx_b, 1, 2) -GEN_VEXT_VX(vwsubu_wx_h, 2, 4) -GEN_VEXT_VX(vwsubu_wx_w, 4, 8) -GEN_VEXT_VX(vwadd_wx_b, 1, 2) -GEN_VEXT_VX(vwadd_wx_h, 2, 4) -GEN_VEXT_VX(vwadd_wx_w, 4, 8) -GEN_VEXT_VX(vwsub_wx_b, 1, 2) -GEN_VEXT_VX(vwsub_wx_h, 2, 4) -GEN_VEXT_VX(vwsub_wx_w, 4, 8) +GEN_VEXT_VX(vwaddu_vx_b) +GEN_VEXT_VX(vwaddu_vx_h) +GEN_VEXT_VX(vwaddu_vx_w) +GEN_VEXT_VX(vwsubu_vx_b) +GEN_VEXT_VX(vwsubu_vx_h) +GEN_VEXT_VX(vwsubu_vx_w) +GEN_VEXT_VX(vwadd_vx_b) +GEN_VEXT_VX(vwadd_vx_h) +GEN_VEXT_VX(vwadd_vx_w) +GEN_VEXT_VX(vwsub_vx_b) +GEN_VEXT_VX(vwsub_vx_h) +GEN_VEXT_VX(vwsub_vx_w) +GEN_VEXT_VX(vwaddu_wx_b) +GEN_VEXT_VX(vwaddu_wx_h) +GEN_VEXT_VX(vwaddu_wx_w) +GEN_VEXT_VX(vwsubu_wx_b) +GEN_VEXT_VX(vwsubu_wx_h) +GEN_VEXT_VX(vwsubu_wx_w) +GEN_VEXT_VX(vwadd_wx_b) +GEN_VEXT_VX(vwadd_wx_h) +GEN_VEXT_VX(vwadd_wx_w) +GEN_VEXT_VX(vwsub_wx_b) +GEN_VEXT_VX(vwsub_wx_h) +GEN_VEXT_VX(vwsub_wx_w) /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ #define DO_VADC(N, M, C) (N + M + C) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) -GEN_VEXT_VV(vand_vv_b, 1, 1) -GEN_VEXT_VV(vand_vv_h, 2, 2) -GEN_VEXT_VV(vand_vv_w, 4, 4) -GEN_VEXT_VV(vand_vv_d, 8, 8) -GEN_VEXT_VV(vor_vv_b, 1, 1) -GEN_VEXT_VV(vor_vv_h, 2, 2) -GEN_VEXT_VV(vor_vv_w, 4, 4) -GEN_VEXT_VV(vor_vv_d, 8, 8) -GEN_VEXT_VV(vxor_vv_b, 1, 1) -GEN_VEXT_VV(vxor_vv_h, 2, 2) -GEN_VEXT_VV(vxor_vv_w, 4, 4) -GEN_VEXT_VV(vxor_vv_d, 8, 8) +GEN_VEXT_VV(vand_vv_b) +GEN_VEXT_VV(vand_vv_h) +GEN_VEXT_VV(vand_vv_w) +GEN_VEXT_VV(vand_vv_d) +GEN_VEXT_VV(vor_vv_b) +GEN_VEXT_VV(vor_vv_h) +GEN_VEXT_VV(vor_vv_w) +GEN_VEXT_VV(vor_vv_d) +GEN_VEXT_VV(vxor_vv_b) +GEN_VEXT_VV(vxor_vv_h) +GEN_VEXT_VV(vxor_vv_w) +GEN_VEXT_VV(vxor_vv_d) RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) -GEN_VEXT_VX(vand_vx_b, 1, 1) -GEN_VEXT_VX(vand_vx_h, 2, 2) -GEN_VEXT_VX(vand_vx_w, 4, 4) -GEN_VEXT_VX(vand_vx_d, 8, 8) -GEN_VEXT_VX(vor_vx_b, 1, 1) -GEN_VEXT_VX(vor_vx_h, 2, 2) -GEN_VEXT_VX(vor_vx_w, 4, 4) -GEN_VEXT_VX(vor_vx_d, 8, 8) -GEN_VEXT_VX(vxor_vx_b, 1, 1) -GEN_VEXT_VX(vxor_vx_h, 2, 2) -GEN_VEXT_VX(vxor_vx_w, 4, 4) -GEN_VEXT_VX(vxor_vx_d, 8, 8) +GEN_VEXT_VX(vand_vx_b) +GEN_VEXT_VX(vand_vx_h) +GEN_VEXT_VX(vand_vx_w) +GEN_VEXT_VX(vand_vx_d) +GEN_VEXT_VX(vor_vx_b) +GEN_VEXT_VX(vor_vx_h) +GEN_VEXT_VX(vor_vx_w) +GEN_VEXT_VX(vor_vx_d) +GEN_VEXT_VX(vxor_vx_b) +GEN_VEXT_VX(vxor_vx_h) +GEN_VEXT_VX(vxor_vx_w) +GEN_VEXT_VX(vxor_vx_d) /* Vector Single-Width Bit Shift Instructions */ #define DO_SLL(N, M) (N << (M)) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) -GEN_VEXT_VV(vminu_vv_b, 1, 1) -GEN_VEXT_VV(vminu_vv_h, 2, 2) -GEN_VEXT_VV(vminu_vv_w, 4, 4) -GEN_VEXT_VV(vminu_vv_d, 8, 8) -GEN_VEXT_VV(vmin_vv_b, 1, 1) -GEN_VEXT_VV(vmin_vv_h, 2, 2) -GEN_VEXT_VV(vmin_vv_w, 4, 4) -GEN_VEXT_VV(vmin_vv_d, 8, 8) -GEN_VEXT_VV(vmaxu_vv_b, 1, 1) -GEN_VEXT_VV(vmaxu_vv_h, 2, 2) -GEN_VEXT_VV(vmaxu_vv_w, 4, 4) -GEN_VEXT_VV(vmaxu_vv_d, 8, 8) -GEN_VEXT_VV(vmax_vv_b, 1, 1) -GEN_VEXT_VV(vmax_vv_h, 2, 2) -GEN_VEXT_VV(vmax_vv_w, 4, 4) -GEN_VEXT_VV(vmax_vv_d, 8, 8) +GEN_VEXT_VV(vminu_vv_b) +GEN_VEXT_VV(vminu_vv_h) +GEN_VEXT_VV(vminu_vv_w) +GEN_VEXT_VV(vminu_vv_d) +GEN_VEXT_VV(vmin_vv_b) +GEN_VEXT_VV(vmin_vv_h) +GEN_VEXT_VV(vmin_vv_w) +GEN_VEXT_VV(vmin_vv_d) +GEN_VEXT_VV(vmaxu_vv_b) +GEN_VEXT_VV(vmaxu_vv_h) +GEN_VEXT_VV(vmaxu_vv_w) +GEN_VEXT_VV(vmaxu_vv_d) +GEN_VEXT_VV(vmax_vv_b) +GEN_VEXT_VV(vmax_vv_h) +GEN_VEXT_VV(vmax_vv_w) +GEN_VEXT_VV(vmax_vv_d) RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) -GEN_VEXT_VX(vminu_vx_b, 1, 1) -GEN_VEXT_VX(vminu_vx_h, 2, 2) -GEN_VEXT_VX(vminu_vx_w, 4, 4) -GEN_VEXT_VX(vminu_vx_d, 8, 8) -GEN_VEXT_VX(vmin_vx_b, 1, 1) -GEN_VEXT_VX(vmin_vx_h, 2, 2) -GEN_VEXT_VX(vmin_vx_w, 4, 4) -GEN_VEXT_VX(vmin_vx_d, 8, 8) -GEN_VEXT_VX(vmaxu_vx_b, 1, 1) -GEN_VEXT_VX(vmaxu_vx_h, 2, 2) -GEN_VEXT_VX(vmaxu_vx_w, 4, 4) -GEN_VEXT_VX(vmaxu_vx_d, 8, 8) -GEN_VEXT_VX(vmax_vx_b, 1, 1) -GEN_VEXT_VX(vmax_vx_h, 2, 2) -GEN_VEXT_VX(vmax_vx_w, 4, 4) -GEN_VEXT_VX(vmax_vx_d, 8, 8) +GEN_VEXT_VX(vminu_vx_b) +GEN_VEXT_VX(vminu_vx_h) +GEN_VEXT_VX(vminu_vx_w) +GEN_VEXT_VX(vminu_vx_d) +GEN_VEXT_VX(vmin_vx_b) +GEN_VEXT_VX(vmin_vx_h) +GEN_VEXT_VX(vmin_vx_w) +GEN_VEXT_VX(vmin_vx_d) +GEN_VEXT_VX(vmaxu_vx_b) +GEN_VEXT_VX(vmaxu_vx_h) +GEN_VEXT_VX(vmaxu_vx_w) +GEN_VEXT_VX(vmaxu_vx_d) +GEN_VEXT_VX(vmax_vx_b) +GEN_VEXT_VX(vmax_vx_h) +GEN_VEXT_VX(vmax_vx_w) +GEN_VEXT_VX(vmax_vx_d) /* Vector Single-Width Integer Multiply Instructions */ #define DO_MUL(N, M) (N * M) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) -GEN_VEXT_VV(vmul_vv_b, 1, 1) -GEN_VEXT_VV(vmul_vv_h, 2, 2) -GEN_VEXT_VV(vmul_vv_w, 4, 4) -GEN_VEXT_VV(vmul_vv_d, 8, 8) +GEN_VEXT_VV(vmul_vv_b) +GEN_VEXT_VV(vmul_vv_h) +GEN_VEXT_VV(vmul_vv_w) +GEN_VEXT_VV(vmul_vv_d) static int8_t do_mulh_b(int8_t s2, int8_t s1) { @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) -GEN_VEXT_VV(vmulh_vv_b, 1, 1) -GEN_VEXT_VV(vmulh_vv_h, 2, 2) -GEN_VEXT_VV(vmulh_vv_w, 4, 4) -GEN_VEXT_VV(vmulh_vv_d, 8, 8) -GEN_VEXT_VV(vmulhu_vv_b, 1, 1) -GEN_VEXT_VV(vmulhu_vv_h, 2, 2) -GEN_VEXT_VV(vmulhu_vv_w, 4, 4) -GEN_VEXT_VV(vmulhu_vv_d, 8, 8) -GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) -GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) -GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) -GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) +GEN_VEXT_VV(vmulh_vv_b) +GEN_VEXT_VV(vmulh_vv_h) +GEN_VEXT_VV(vmulh_vv_w) +GEN_VEXT_VV(vmulh_vv_d) +GEN_VEXT_VV(vmulhu_vv_b) +GEN_VEXT_VV(vmulhu_vv_h) +GEN_VEXT_VV(vmulhu_vv_w) +GEN_VEXT_VV(vmulhu_vv_d) +GEN_VEXT_VV(vmulhsu_vv_b) +GEN_VEXT_VV(vmulhsu_vv_h) +GEN_VEXT_VV(vmulhsu_vv_w) +GEN_VEXT_VV(vmulhsu_vv_d) RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) -GEN_VEXT_VX(vmul_vx_b, 1, 1) -GEN_VEXT_VX(vmul_vx_h, 2, 2) -GEN_VEXT_VX(vmul_vx_w, 4, 4) -GEN_VEXT_VX(vmul_vx_d, 8, 8) -GEN_VEXT_VX(vmulh_vx_b, 1, 1) -GEN_VEXT_VX(vmulh_vx_h, 2, 2) -GEN_VEXT_VX(vmulh_vx_w, 4, 4) -GEN_VEXT_VX(vmulh_vx_d, 8, 8) -GEN_VEXT_VX(vmulhu_vx_b, 1, 1) -GEN_VEXT_VX(vmulhu_vx_h, 2, 2) -GEN_VEXT_VX(vmulhu_vx_w, 4, 4) -GEN_VEXT_VX(vmulhu_vx_d, 8, 8) -GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) -GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) -GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) -GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) +GEN_VEXT_VX(vmul_vx_b) +GEN_VEXT_VX(vmul_vx_h) +GEN_VEXT_VX(vmul_vx_w) +GEN_VEXT_VX(vmul_vx_d) +GEN_VEXT_VX(vmulh_vx_b) +GEN_VEXT_VX(vmulh_vx_h) +GEN_VEXT_VX(vmulh_vx_w) +GEN_VEXT_VX(vmulh_vx_d) +GEN_VEXT_VX(vmulhu_vx_b) +GEN_VEXT_VX(vmulhu_vx_h) +GEN_VEXT_VX(vmulhu_vx_w) +GEN_VEXT_VX(vmulhu_vx_d) +GEN_VEXT_VX(vmulhsu_vx_b) +GEN_VEXT_VX(vmulhsu_vx_h) +GEN_VEXT_VX(vmulhsu_vx_w) +GEN_VEXT_VX(vmulhsu_vx_d) /* Vector Integer Divide Instructions */ #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) -GEN_VEXT_VV(vdivu_vv_b, 1, 1) -GEN_VEXT_VV(vdivu_vv_h, 2, 2) -GEN_VEXT_VV(vdivu_vv_w, 4, 4) -GEN_VEXT_VV(vdivu_vv_d, 8, 8) -GEN_VEXT_VV(vdiv_vv_b, 1, 1) -GEN_VEXT_VV(vdiv_vv_h, 2, 2) -GEN_VEXT_VV(vdiv_vv_w, 4, 4) -GEN_VEXT_VV(vdiv_vv_d, 8, 8) -GEN_VEXT_VV(vremu_vv_b, 1, 1) -GEN_VEXT_VV(vremu_vv_h, 2, 2) -GEN_VEXT_VV(vremu_vv_w, 4, 4) -GEN_VEXT_VV(vremu_vv_d, 8, 8) -GEN_VEXT_VV(vrem_vv_b, 1, 1) -GEN_VEXT_VV(vrem_vv_h, 2, 2) -GEN_VEXT_VV(vrem_vv_w, 4, 4) -GEN_VEXT_VV(vrem_vv_d, 8, 8) +GEN_VEXT_VV(vdivu_vv_b) +GEN_VEXT_VV(vdivu_vv_h) +GEN_VEXT_VV(vdivu_vv_w) +GEN_VEXT_VV(vdivu_vv_d) +GEN_VEXT_VV(vdiv_vv_b) +GEN_VEXT_VV(vdiv_vv_h) +GEN_VEXT_VV(vdiv_vv_w) +GEN_VEXT_VV(vdiv_vv_d) +GEN_VEXT_VV(vremu_vv_b) +GEN_VEXT_VV(vremu_vv_h) +GEN_VEXT_VV(vremu_vv_w) +GEN_VEXT_VV(vremu_vv_d) +GEN_VEXT_VV(vrem_vv_b) +GEN_VEXT_VV(vrem_vv_h) +GEN_VEXT_VV(vrem_vv_w) +GEN_VEXT_VV(vrem_vv_d) RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) -GEN_VEXT_VX(vdivu_vx_b, 1, 1) -GEN_VEXT_VX(vdivu_vx_h, 2, 2) -GEN_VEXT_VX(vdivu_vx_w, 4, 4) -GEN_VEXT_VX(vdivu_vx_d, 8, 8) -GEN_VEXT_VX(vdiv_vx_b, 1, 1) -GEN_VEXT_VX(vdiv_vx_h, 2, 2) -GEN_VEXT_VX(vdiv_vx_w, 4, 4) -GEN_VEXT_VX(vdiv_vx_d, 8, 8) -GEN_VEXT_VX(vremu_vx_b, 1, 1) -GEN_VEXT_VX(vremu_vx_h, 2, 2) -GEN_VEXT_VX(vremu_vx_w, 4, 4) -GEN_VEXT_VX(vremu_vx_d, 8, 8) -GEN_VEXT_VX(vrem_vx_b, 1, 1) -GEN_VEXT_VX(vrem_vx_h, 2, 2) -GEN_VEXT_VX(vrem_vx_w, 4, 4) -GEN_VEXT_VX(vrem_vx_d, 8, 8) +GEN_VEXT_VX(vdivu_vx_b) +GEN_VEXT_VX(vdivu_vx_h) +GEN_VEXT_VX(vdivu_vx_w) +GEN_VEXT_VX(vdivu_vx_d) +GEN_VEXT_VX(vdiv_vx_b) +GEN_VEXT_VX(vdiv_vx_h) +GEN_VEXT_VX(vdiv_vx_w) +GEN_VEXT_VX(vdiv_vx_d) +GEN_VEXT_VX(vremu_vx_b) +GEN_VEXT_VX(vremu_vx_h) +GEN_VEXT_VX(vremu_vx_w) +GEN_VEXT_VX(vremu_vx_d) +GEN_VEXT_VX(vrem_vx_b) +GEN_VEXT_VX(vrem_vx_h) +GEN_VEXT_VX(vrem_vx_w) +GEN_VEXT_VX(vrem_vx_d) /* Vector Widening Integer Multiply Instructions */ RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) -GEN_VEXT_VV(vwmul_vv_b, 1, 2) -GEN_VEXT_VV(vwmul_vv_h, 2, 4) -GEN_VEXT_VV(vwmul_vv_w, 4, 8) -GEN_VEXT_VV(vwmulu_vv_b, 1, 2) -GEN_VEXT_VV(vwmulu_vv_h, 2, 4) -GEN_VEXT_VV(vwmulu_vv_w, 4, 8) -GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) -GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) -GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) +GEN_VEXT_VV(vwmul_vv_b) +GEN_VEXT_VV(vwmul_vv_h) +GEN_VEXT_VV(vwmul_vv_w) +GEN_VEXT_VV(vwmulu_vv_b) +GEN_VEXT_VV(vwmulu_vv_h) +GEN_VEXT_VV(vwmulu_vv_w) +GEN_VEXT_VV(vwmulsu_vv_b) +GEN_VEXT_VV(vwmulsu_vv_h) +GEN_VEXT_VV(vwmulsu_vv_w) RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) -GEN_VEXT_VX(vwmul_vx_b, 1, 2) -GEN_VEXT_VX(vwmul_vx_h, 2, 4) -GEN_VEXT_VX(vwmul_vx_w, 4, 8) -GEN_VEXT_VX(vwmulu_vx_b, 1, 2) -GEN_VEXT_VX(vwmulu_vx_h, 2, 4) -GEN_VEXT_VX(vwmulu_vx_w, 4, 8) -GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) -GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) -GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) +GEN_VEXT_VX(vwmul_vx_b) +GEN_VEXT_VX(vwmul_vx_h) +GEN_VEXT_VX(vwmul_vx_w) +GEN_VEXT_VX(vwmulu_vx_b) +GEN_VEXT_VX(vwmulu_vx_h) +GEN_VEXT_VX(vwmulu_vx_w) +GEN_VEXT_VX(vwmulsu_vx_b) +GEN_VEXT_VX(vwmulsu_vx_h) +GEN_VEXT_VX(vwmulsu_vx_w) /* Vector Single-Width Integer Multiply-Add Instructions */ #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) -GEN_VEXT_VV(vmacc_vv_b, 1, 1) -GEN_VEXT_VV(vmacc_vv_h, 2, 2) -GEN_VEXT_VV(vmacc_vv_w, 4, 4) -GEN_VEXT_VV(vmacc_vv_d, 8, 8) -GEN_VEXT_VV(vnmsac_vv_b, 1, 1) -GEN_VEXT_VV(vnmsac_vv_h, 2, 2) -GEN_VEXT_VV(vnmsac_vv_w, 4, 4) -GEN_VEXT_VV(vnmsac_vv_d, 8, 8) -GEN_VEXT_VV(vmadd_vv_b, 1, 1) -GEN_VEXT_VV(vmadd_vv_h, 2, 2) -GEN_VEXT_VV(vmadd_vv_w, 4, 4) -GEN_VEXT_VV(vmadd_vv_d, 8, 8) -GEN_VEXT_VV(vnmsub_vv_b, 1, 1) -GEN_VEXT_VV(vnmsub_vv_h, 2, 2) -GEN_VEXT_VV(vnmsub_vv_w, 4, 4) -GEN_VEXT_VV(vnmsub_vv_d, 8, 8) +GEN_VEXT_VV(vmacc_vv_b) +GEN_VEXT_VV(vmacc_vv_h) +GEN_VEXT_VV(vmacc_vv_w) +GEN_VEXT_VV(vmacc_vv_d) +GEN_VEXT_VV(vnmsac_vv_b) +GEN_VEXT_VV(vnmsac_vv_h) +GEN_VEXT_VV(vnmsac_vv_w) +GEN_VEXT_VV(vnmsac_vv_d) +GEN_VEXT_VV(vmadd_vv_b) +GEN_VEXT_VV(vmadd_vv_h) +GEN_VEXT_VV(vmadd_vv_w) +GEN_VEXT_VV(vmadd_vv_d) +GEN_VEXT_VV(vnmsub_vv_b) +GEN_VEXT_VV(vnmsub_vv_h) +GEN_VEXT_VV(vnmsub_vv_w) +GEN_VEXT_VV(vnmsub_vv_d) #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) -GEN_VEXT_VX(vmacc_vx_b, 1, 1) -GEN_VEXT_VX(vmacc_vx_h, 2, 2) -GEN_VEXT_VX(vmacc_vx_w, 4, 4) -GEN_VEXT_VX(vmacc_vx_d, 8, 8) -GEN_VEXT_VX(vnmsac_vx_b, 1, 1) -GEN_VEXT_VX(vnmsac_vx_h, 2, 2) -GEN_VEXT_VX(vnmsac_vx_w, 4, 4) -GEN_VEXT_VX(vnmsac_vx_d, 8, 8) -GEN_VEXT_VX(vmadd_vx_b, 1, 1) -GEN_VEXT_VX(vmadd_vx_h, 2, 2) -GEN_VEXT_VX(vmadd_vx_w, 4, 4) -GEN_VEXT_VX(vmadd_vx_d, 8, 8) -GEN_VEXT_VX(vnmsub_vx_b, 1, 1) -GEN_VEXT_VX(vnmsub_vx_h, 2, 2) -GEN_VEXT_VX(vnmsub_vx_w, 4, 4) -GEN_VEXT_VX(vnmsub_vx_d, 8, 8) +GEN_VEXT_VX(vmacc_vx_b) +GEN_VEXT_VX(vmacc_vx_h) +GEN_VEXT_VX(vmacc_vx_w) +GEN_VEXT_VX(vmacc_vx_d) +GEN_VEXT_VX(vnmsac_vx_b) +GEN_VEXT_VX(vnmsac_vx_h) +GEN_VEXT_VX(vnmsac_vx_w) +GEN_VEXT_VX(vnmsac_vx_d) +GEN_VEXT_VX(vmadd_vx_b) +GEN_VEXT_VX(vmadd_vx_h) +GEN_VEXT_VX(vmadd_vx_w) +GEN_VEXT_VX(vmadd_vx_d) +GEN_VEXT_VX(vnmsub_vx_b) +GEN_VEXT_VX(vnmsub_vx_h) +GEN_VEXT_VX(vnmsub_vx_w) +GEN_VEXT_VX(vnmsub_vx_d) /* Vector Widening Integer Multiply-Add Instructions */ RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) -GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) -GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) -GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) -GEN_VEXT_VV(vwmacc_vv_b, 1, 2) -GEN_VEXT_VV(vwmacc_vv_h, 2, 4) -GEN_VEXT_VV(vwmacc_vv_w, 4, 8) -GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) -GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) -GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) +GEN_VEXT_VV(vwmaccu_vv_b) +GEN_VEXT_VV(vwmaccu_vv_h) +GEN_VEXT_VV(vwmaccu_vv_w) +GEN_VEXT_VV(vwmacc_vv_b) +GEN_VEXT_VV(vwmacc_vv_h) +GEN_VEXT_VV(vwmacc_vv_w) +GEN_VEXT_VV(vwmaccsu_vv_b) +GEN_VEXT_VV(vwmaccsu_vv_h) +GEN_VEXT_VV(vwmaccsu_vv_w) RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) -GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) -GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) -GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) -GEN_VEXT_VX(vwmacc_vx_b, 1, 2) -GEN_VEXT_VX(vwmacc_vx_h, 2, 4) -GEN_VEXT_VX(vwmacc_vx_w, 4, 8) -GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) -GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) -GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) -GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) -GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) -GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) +GEN_VEXT_VX(vwmaccu_vx_b) +GEN_VEXT_VX(vwmaccu_vx_h) +GEN_VEXT_VX(vwmaccu_vx_w) +GEN_VEXT_VX(vwmacc_vx_b) +GEN_VEXT_VX(vwmacc_vx_h) +GEN_VEXT_VX(vwmacc_vx_w) +GEN_VEXT_VX(vwmaccsu_vx_b) +GEN_VEXT_VX(vwmaccsu_vx_h) +GEN_VEXT_VX(vwmaccsu_vx_w) +GEN_VEXT_VX(vwmaccus_vx_b) +GEN_VEXT_VX(vwmaccus_vx_h) +GEN_VEXT_VX(vwmaccus_vx_w) /* Vector Integer Merge and Move Instructions */ #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ @@ -XXX,XX +XXX,XX @@ vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, static inline void vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, - uint32_t desc, uint32_t esz, uint32_t dsz, + uint32_t desc, opivv2_rm_fn *fn) { uint32_t vm = vext_vm(desc); @@ -XXX,XX +XXX,XX @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, } /* generate helpers for fixed point instructions with OPIVV format */ -#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VV_RM(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ + vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ do_##NAME); \ } @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) -GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) -GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) -GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) -GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) +GEN_VEXT_VV_RM(vsaddu_vv_b) +GEN_VEXT_VV_RM(vsaddu_vv_h) +GEN_VEXT_VV_RM(vsaddu_vv_w) +GEN_VEXT_VV_RM(vsaddu_vv_d) typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, CPURISCVState *env, int vxrm); @@ -XXX,XX +XXX,XX @@ vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, static inline void vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, - uint32_t desc, uint32_t esz, uint32_t dsz, + uint32_t desc, opivx2_rm_fn *fn) { uint32_t vm = vext_vm(desc); @@ -XXX,XX +XXX,XX @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, } /* generate helpers for fixed point instructions with OPIVX format */ -#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VX_RM(NAME) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, uint32_t desc) \ { \ - vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ + vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ do_##NAME); \ } @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) -GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) -GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) -GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) -GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) +GEN_VEXT_VX_RM(vsaddu_vx_b) +GEN_VEXT_VX_RM(vsaddu_vx_h) +GEN_VEXT_VX_RM(vsaddu_vx_w) +GEN_VEXT_VX_RM(vsaddu_vx_d) static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) { @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) -GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) -GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) -GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) -GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) +GEN_VEXT_VV_RM(vsadd_vv_b) +GEN_VEXT_VV_RM(vsadd_vv_h) +GEN_VEXT_VV_RM(vsadd_vv_w) +GEN_VEXT_VV_RM(vsadd_vv_d) RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) -GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) -GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) -GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) -GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) +GEN_VEXT_VX_RM(vsadd_vx_b) +GEN_VEXT_VX_RM(vsadd_vx_h) +GEN_VEXT_VX_RM(vsadd_vx_w) +GEN_VEXT_VX_RM(vsadd_vx_d) static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) { @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) -GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) -GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) -GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) -GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) +GEN_VEXT_VV_RM(vssubu_vv_b) +GEN_VEXT_VV_RM(vssubu_vv_h) +GEN_VEXT_VV_RM(vssubu_vv_w) +GEN_VEXT_VV_RM(vssubu_vv_d) RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) -GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) -GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) -GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) -GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) +GEN_VEXT_VX_RM(vssubu_vx_b) +GEN_VEXT_VX_RM(vssubu_vx_h) +GEN_VEXT_VX_RM(vssubu_vx_w) +GEN_VEXT_VX_RM(vssubu_vx_d) static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) { @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) -GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) -GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) -GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) -GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) +GEN_VEXT_VV_RM(vssub_vv_b) +GEN_VEXT_VV_RM(vssub_vv_h) +GEN_VEXT_VV_RM(vssub_vv_w) +GEN_VEXT_VV_RM(vssub_vv_d) RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) -GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) -GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) -GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) -GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) +GEN_VEXT_VX_RM(vssub_vx_b) +GEN_VEXT_VX_RM(vssub_vx_h) +GEN_VEXT_VX_RM(vssub_vx_w) +GEN_VEXT_VX_RM(vssub_vx_d) /* Vector Single-Width Averaging Add and Subtract */ static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) -GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) -GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) -GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) -GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) +GEN_VEXT_VV_RM(vaadd_vv_b) +GEN_VEXT_VV_RM(vaadd_vv_h) +GEN_VEXT_VV_RM(vaadd_vv_w) +GEN_VEXT_VV_RM(vaadd_vv_d) RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) -GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) -GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) -GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) -GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) +GEN_VEXT_VX_RM(vaadd_vx_b) +GEN_VEXT_VX_RM(vaadd_vx_h) +GEN_VEXT_VX_RM(vaadd_vx_w) +GEN_VEXT_VX_RM(vaadd_vx_d) static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) -GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) -GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) -GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) -GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) +GEN_VEXT_VV_RM(vaaddu_vv_b) +GEN_VEXT_VV_RM(vaaddu_vv_h) +GEN_VEXT_VV_RM(vaaddu_vv_w) +GEN_VEXT_VV_RM(vaaddu_vv_d) RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) -GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) -GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) -GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) -GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) +GEN_VEXT_VX_RM(vaaddu_vx_b) +GEN_VEXT_VX_RM(vaaddu_vx_h) +GEN_VEXT_VX_RM(vaaddu_vx_w) +GEN_VEXT_VX_RM(vaaddu_vx_d) static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) { @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) -GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) -GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) -GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) -GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) +GEN_VEXT_VV_RM(vasub_vv_b) +GEN_VEXT_VV_RM(vasub_vv_h) +GEN_VEXT_VV_RM(vasub_vv_w) +GEN_VEXT_VV_RM(vasub_vv_d) RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) -GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) -GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) -GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) -GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) +GEN_VEXT_VX_RM(vasub_vx_b) +GEN_VEXT_VX_RM(vasub_vx_h) +GEN_VEXT_VX_RM(vasub_vx_w) +GEN_VEXT_VX_RM(vasub_vx_d) static inline uint32_t asubu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) -GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) -GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) -GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) -GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) +GEN_VEXT_VV_RM(vasubu_vv_b) +GEN_VEXT_VV_RM(vasubu_vv_h) +GEN_VEXT_VV_RM(vasubu_vv_w) +GEN_VEXT_VV_RM(vasubu_vv_d) RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) -GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) -GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) -GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) -GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) +GEN_VEXT_VX_RM(vasubu_vx_b) +GEN_VEXT_VX_RM(vasubu_vx_h) +GEN_VEXT_VX_RM(vasubu_vx_w) +GEN_VEXT_VX_RM(vasubu_vx_d) /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) -GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) -GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) -GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) -GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) +GEN_VEXT_VV_RM(vsmul_vv_b) +GEN_VEXT_VV_RM(vsmul_vv_h) +GEN_VEXT_VV_RM(vsmul_vv_w) +GEN_VEXT_VV_RM(vsmul_vv_d) RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) -GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) -GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) -GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) -GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) +GEN_VEXT_VX_RM(vsmul_vx_b) +GEN_VEXT_VX_RM(vsmul_vx_h) +GEN_VEXT_VX_RM(vsmul_vx_w) +GEN_VEXT_VX_RM(vsmul_vx_d) /* Vector Single-Width Scaling Shift Instructions */ static inline uint8_t @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) -GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) -GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) -GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) -GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) +GEN_VEXT_VV_RM(vssrl_vv_b) +GEN_VEXT_VV_RM(vssrl_vv_h) +GEN_VEXT_VV_RM(vssrl_vv_w) +GEN_VEXT_VV_RM(vssrl_vv_d) RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) -GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) -GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) -GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) -GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) +GEN_VEXT_VX_RM(vssrl_vx_b) +GEN_VEXT_VX_RM(vssrl_vx_h) +GEN_VEXT_VX_RM(vssrl_vx_w) +GEN_VEXT_VX_RM(vssrl_vx_d) static inline int8_t vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) -GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) -GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) -GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) -GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) +GEN_VEXT_VV_RM(vssra_vv_b) +GEN_VEXT_VV_RM(vssra_vv_h) +GEN_VEXT_VV_RM(vssra_vv_w) +GEN_VEXT_VV_RM(vssra_vv_d) RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) -GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) -GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) -GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) -GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) +GEN_VEXT_VX_RM(vssra_vx_b) +GEN_VEXT_VX_RM(vssra_vx_h) +GEN_VEXT_VX_RM(vssra_vx_w) +GEN_VEXT_VX_RM(vssra_vx_d) /* Vector Narrowing Fixed-Point Clip Instructions */ static inline int8_t @@ -XXX,XX +XXX,XX @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) -GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1) -GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2) -GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4) +GEN_VEXT_VV_RM(vnclip_wv_b) +GEN_VEXT_VV_RM(vnclip_wv_h) +GEN_VEXT_VV_RM(vnclip_wv_w) RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) -GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1) -GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2) -GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4) +GEN_VEXT_VX_RM(vnclip_wx_b) +GEN_VEXT_VX_RM(vnclip_wx_h) +GEN_VEXT_VX_RM(vnclip_wx_w) static inline uint8_t vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) @@ -XXX,XX +XXX,XX @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) -GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1) -GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2) -GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4) +GEN_VEXT_VV_RM(vnclipu_wv_b) +GEN_VEXT_VV_RM(vnclipu_wv_h) +GEN_VEXT_VV_RM(vnclipu_wv_w) RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) -GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1) -GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2) -GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4) +GEN_VEXT_VX_RM(vnclipu_wx_b) +GEN_VEXT_VX_RM(vnclipu_wx_h) +GEN_VEXT_VX_RM(vnclipu_wx_w) /* *** Vector Float Point Arithmetic Instructions @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ } -#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VV_ENV(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) -GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfadd_vv_h) +GEN_VEXT_VV_ENV(vfadd_vv_w) +GEN_VEXT_VV_ENV(vfadd_vv_d) #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ } -#define GEN_VEXT_VF(NAME, ESZ, DSZ) \ +#define GEN_VEXT_VF(NAME) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) -GEN_VEXT_VF(vfadd_vf_h, 2, 2) -GEN_VEXT_VF(vfadd_vf_w, 4, 4) -GEN_VEXT_VF(vfadd_vf_d, 8, 8) +GEN_VEXT_VF(vfadd_vf_h) +GEN_VEXT_VF(vfadd_vf_w) +GEN_VEXT_VF(vfadd_vf_d) RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) -GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfsub_vv_h) +GEN_VEXT_VV_ENV(vfsub_vv_w) +GEN_VEXT_VV_ENV(vfsub_vv_d) RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) -GEN_VEXT_VF(vfsub_vf_h, 2, 2) -GEN_VEXT_VF(vfsub_vf_w, 4, 4) -GEN_VEXT_VF(vfsub_vf_d, 8, 8) +GEN_VEXT_VF(vfsub_vf_h) +GEN_VEXT_VF(vfsub_vf_w) +GEN_VEXT_VF(vfsub_vf_d) static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) -GEN_VEXT_VF(vfrsub_vf_h, 2, 2) -GEN_VEXT_VF(vfrsub_vf_w, 4, 4) -GEN_VEXT_VF(vfrsub_vf_d, 8, 8) +GEN_VEXT_VF(vfrsub_vf_h) +GEN_VEXT_VF(vfrsub_vf_w) +GEN_VEXT_VF(vfrsub_vf_d) /* Vector Widening Floating-Point Add/Subtract Instructions */ static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) @@ -XXX,XX +XXX,XX @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) -GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwadd_vv_h) +GEN_VEXT_VV_ENV(vfwadd_vv_w) RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) -GEN_VEXT_VF(vfwadd_vf_h, 2, 4) -GEN_VEXT_VF(vfwadd_vf_w, 4, 8) +GEN_VEXT_VF(vfwadd_vf_h) +GEN_VEXT_VF(vfwadd_vf_w) static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) -GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwsub_vv_h) +GEN_VEXT_VV_ENV(vfwsub_vv_w) RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) -GEN_VEXT_VF(vfwsub_vf_h, 2, 4) -GEN_VEXT_VF(vfwsub_vf_w, 4, 8) +GEN_VEXT_VF(vfwsub_vf_h) +GEN_VEXT_VF(vfwsub_vf_w) static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) -GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwadd_wv_h) +GEN_VEXT_VV_ENV(vfwadd_wv_w) RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) -GEN_VEXT_VF(vfwadd_wf_h, 2, 4) -GEN_VEXT_VF(vfwadd_wf_w, 4, 8) +GEN_VEXT_VF(vfwadd_wf_h) +GEN_VEXT_VF(vfwadd_wf_w) static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) -GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwsub_wv_h) +GEN_VEXT_VV_ENV(vfwsub_wv_w) RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) -GEN_VEXT_VF(vfwsub_wf_h, 2, 4) -GEN_VEXT_VF(vfwsub_wf_w, 4, 8) +GEN_VEXT_VF(vfwsub_wf_h) +GEN_VEXT_VF(vfwsub_wf_w) /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) -GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmul_vv_h) +GEN_VEXT_VV_ENV(vfmul_vv_w) +GEN_VEXT_VV_ENV(vfmul_vv_d) RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) -GEN_VEXT_VF(vfmul_vf_h, 2, 2) -GEN_VEXT_VF(vfmul_vf_w, 4, 4) -GEN_VEXT_VF(vfmul_vf_d, 8, 8) +GEN_VEXT_VF(vfmul_vf_h) +GEN_VEXT_VF(vfmul_vf_w) +GEN_VEXT_VF(vfmul_vf_d) RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) -GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfdiv_vv_h) +GEN_VEXT_VV_ENV(vfdiv_vv_w) +GEN_VEXT_VV_ENV(vfdiv_vv_d) RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) -GEN_VEXT_VF(vfdiv_vf_h, 2, 2) -GEN_VEXT_VF(vfdiv_vf_w, 4, 4) -GEN_VEXT_VF(vfdiv_vf_d, 8, 8) +GEN_VEXT_VF(vfdiv_vf_h) +GEN_VEXT_VF(vfdiv_vf_w) +GEN_VEXT_VF(vfdiv_vf_d) static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) -GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) -GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) -GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) +GEN_VEXT_VF(vfrdiv_vf_h) +GEN_VEXT_VF(vfrdiv_vf_w) +GEN_VEXT_VF(vfrdiv_vf_d) /* Vector Widening Floating-Point Multiply */ static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) @@ -XXX,XX +XXX,XX @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) } RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) -GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwmul_vv_h) +GEN_VEXT_VV_ENV(vfwmul_vv_w) RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) -GEN_VEXT_VF(vfwmul_vf_h, 2, 4) -GEN_VEXT_VF(vfwmul_vf_w, 4, 8) +GEN_VEXT_VF(vfwmul_vf_h) +GEN_VEXT_VF(vfwmul_vf_w) /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -XXX,XX +XXX,XX @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) -GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmacc_vv_h) +GEN_VEXT_VV_ENV(vfmacc_vv_w) +GEN_VEXT_VV_ENV(vfmacc_vv_d) #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) -GEN_VEXT_VF(vfmacc_vf_h, 2, 2) -GEN_VEXT_VF(vfmacc_vf_w, 4, 4) -GEN_VEXT_VF(vfmacc_vf_d, 8, 8) +GEN_VEXT_VF(vfmacc_vf_h) +GEN_VEXT_VF(vfmacc_vf_w) +GEN_VEXT_VF(vfmacc_vf_d) static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) -GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfnmacc_vv_h) +GEN_VEXT_VV_ENV(vfnmacc_vv_w) +GEN_VEXT_VV_ENV(vfnmacc_vv_d) RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) -GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) -GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) -GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) +GEN_VEXT_VF(vfnmacc_vf_h) +GEN_VEXT_VF(vfnmacc_vf_w) +GEN_VEXT_VF(vfnmacc_vf_d) static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) -GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmsac_vv_h) +GEN_VEXT_VV_ENV(vfmsac_vv_w) +GEN_VEXT_VV_ENV(vfmsac_vv_d) RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) -GEN_VEXT_VF(vfmsac_vf_h, 2, 2) -GEN_VEXT_VF(vfmsac_vf_w, 4, 4) -GEN_VEXT_VF(vfmsac_vf_d, 8, 8) +GEN_VEXT_VF(vfmsac_vf_h) +GEN_VEXT_VF(vfmsac_vf_w) +GEN_VEXT_VF(vfmsac_vf_d) static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) -GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfnmsac_vv_h) +GEN_VEXT_VV_ENV(vfnmsac_vv_w) +GEN_VEXT_VV_ENV(vfnmsac_vv_d) RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) -GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) -GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) -GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) +GEN_VEXT_VF(vfnmsac_vf_h) +GEN_VEXT_VF(vfnmsac_vf_w) +GEN_VEXT_VF(vfnmsac_vf_d) static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) -GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmadd_vv_h) +GEN_VEXT_VV_ENV(vfmadd_vv_w) +GEN_VEXT_VV_ENV(vfmadd_vv_d) RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) -GEN_VEXT_VF(vfmadd_vf_h, 2, 2) -GEN_VEXT_VF(vfmadd_vf_w, 4, 4) -GEN_VEXT_VF(vfmadd_vf_d, 8, 8) +GEN_VEXT_VF(vfmadd_vf_h) +GEN_VEXT_VF(vfmadd_vf_w) +GEN_VEXT_VF(vfmadd_vf_d) static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) -GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfnmadd_vv_h) +GEN_VEXT_VV_ENV(vfnmadd_vv_w) +GEN_VEXT_VV_ENV(vfnmadd_vv_d) RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) -GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) -GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) -GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) +GEN_VEXT_VF(vfnmadd_vf_h) +GEN_VEXT_VF(vfnmadd_vf_w) +GEN_VEXT_VF(vfnmadd_vf_d) static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) -GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmsub_vv_h) +GEN_VEXT_VV_ENV(vfmsub_vv_w) +GEN_VEXT_VV_ENV(vfmsub_vv_d) RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) -GEN_VEXT_VF(vfmsub_vf_h, 2, 2) -GEN_VEXT_VF(vfmsub_vf_w, 4, 4) -GEN_VEXT_VF(vfmsub_vf_d, 8, 8) +GEN_VEXT_VF(vfmsub_vf_h) +GEN_VEXT_VF(vfmsub_vf_w) +GEN_VEXT_VF(vfmsub_vf_d) static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) -GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfnmsub_vv_h) +GEN_VEXT_VV_ENV(vfnmsub_vv_w) +GEN_VEXT_VV_ENV(vfnmsub_vv_d) RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) -GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) -GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) -GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) +GEN_VEXT_VF(vfnmsub_vf_h) +GEN_VEXT_VF(vfnmsub_vf_w) +GEN_VEXT_VF(vfnmsub_vf_d) /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) @@ -XXX,XX +XXX,XX @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) -GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwmacc_vv_h) +GEN_VEXT_VV_ENV(vfwmacc_vv_w) RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) -GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) -GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) +GEN_VEXT_VF(vfwmacc_vf_h) +GEN_VEXT_VF(vfwmacc_vf_w) static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) -GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwnmacc_vv_h) +GEN_VEXT_VV_ENV(vfwnmacc_vv_w) RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) -GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) -GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) +GEN_VEXT_VF(vfwnmacc_vf_h) +GEN_VEXT_VF(vfwnmacc_vf_w) static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) -GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwmsac_vv_h) +GEN_VEXT_VV_ENV(vfwmsac_vv_w) RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) -GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) -GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) +GEN_VEXT_VF(vfwmsac_vf_h) +GEN_VEXT_VF(vfwmsac_vf_w) static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) -GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) -GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) +GEN_VEXT_VV_ENV(vfwnmsac_vv_h) +GEN_VEXT_VV_ENV(vfwnmsac_vv_w) RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) -GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) -GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) +GEN_VEXT_VF(vfwnmsac_vf_h) +GEN_VEXT_VF(vfwnmsac_vf_w) /* Vector Floating-Point Square-Root Instruction */ /* (TD, T2, TX2) */ @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ } -#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ +#define GEN_VEXT_V_ENV(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) -GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) -GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) -GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) +GEN_VEXT_V_ENV(vfsqrt_v_h) +GEN_VEXT_V_ENV(vfsqrt_v_w) +GEN_VEXT_V_ENV(vfsqrt_v_d) /* * Vector Floating-Point Reciprocal Square-Root Estimate Instruction @@ -XXX,XX +XXX,XX @@ static float64 frsqrt7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) -GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2) -GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4) -GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8) +GEN_VEXT_V_ENV(vfrsqrt7_v_h) +GEN_VEXT_V_ENV(vfrsqrt7_v_w) +GEN_VEXT_V_ENV(vfrsqrt7_v_d) /* * Vector Floating-Point Reciprocal Estimate Instruction @@ -XXX,XX +XXX,XX @@ static float64 frec7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) -GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2) -GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4) -GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8) +GEN_VEXT_V_ENV(vfrec7_v_h) +GEN_VEXT_V_ENV(vfrec7_v_w) +GEN_VEXT_V_ENV(vfrec7_v_d) /* Vector Floating-Point MIN/MAX Instructions */ RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) -GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmin_vv_h) +GEN_VEXT_VV_ENV(vfmin_vv_w) +GEN_VEXT_VV_ENV(vfmin_vv_d) RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) -GEN_VEXT_VF(vfmin_vf_h, 2, 2) -GEN_VEXT_VF(vfmin_vf_w, 4, 4) -GEN_VEXT_VF(vfmin_vf_d, 8, 8) +GEN_VEXT_VF(vfmin_vf_h) +GEN_VEXT_VF(vfmin_vf_w) +GEN_VEXT_VF(vfmin_vf_d) RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) -GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfmax_vv_h) +GEN_VEXT_VV_ENV(vfmax_vv_w) +GEN_VEXT_VV_ENV(vfmax_vv_d) RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) -GEN_VEXT_VF(vfmax_vf_h, 2, 2) -GEN_VEXT_VF(vfmax_vf_w, 4, 4) -GEN_VEXT_VF(vfmax_vf_d, 8, 8) +GEN_VEXT_VF(vfmax_vf_h) +GEN_VEXT_VF(vfmax_vf_w) +GEN_VEXT_VF(vfmax_vf_d) /* Vector Floating-Point Sign-Injection Instructions */ static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) -GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfsgnj_vv_h) +GEN_VEXT_VV_ENV(vfsgnj_vv_w) +GEN_VEXT_VV_ENV(vfsgnj_vv_d) RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) -GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) -GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) -GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) +GEN_VEXT_VF(vfsgnj_vf_h) +GEN_VEXT_VF(vfsgnj_vf_w) +GEN_VEXT_VF(vfsgnj_vf_d) static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) -GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfsgnjn_vv_h) +GEN_VEXT_VV_ENV(vfsgnjn_vv_w) +GEN_VEXT_VV_ENV(vfsgnjn_vv_d) RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) -GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) -GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) -GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) +GEN_VEXT_VF(vfsgnjn_vf_h) +GEN_VEXT_VF(vfsgnjn_vf_w) +GEN_VEXT_VF(vfsgnjn_vf_d) static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) -GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) -GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) -GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) +GEN_VEXT_VV_ENV(vfsgnjx_vv_h) +GEN_VEXT_VV_ENV(vfsgnjx_vv_w) +GEN_VEXT_VV_ENV(vfsgnjx_vv_d) RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) -GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) -GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) -GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) +GEN_VEXT_VF(vfsgnjx_vf_h) +GEN_VEXT_VF(vfsgnjx_vf_w) +GEN_VEXT_VF(vfsgnjx_vf_d) /* Vector Floating-Point Compare Instructions */ #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i) \ *((TD *)vd + HD(i)) = OP(s2); \ } -#define GEN_VEXT_V(NAME, ESZ, DSZ) \ +#define GEN_VEXT_V(NAME) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ @@ -XXX,XX +XXX,XX @@ target_ulong fclass_d(uint64_t frs1) RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) -GEN_VEXT_V(vfclass_v_h, 2, 2) -GEN_VEXT_V(vfclass_v_w, 4, 4) -GEN_VEXT_V(vfclass_v_d, 8, 8) +GEN_VEXT_V(vfclass_v_h) +GEN_VEXT_V(vfclass_v_w) +GEN_VEXT_V(vfclass_v_d) /* Vector Floating-Point Merge Instruction */ #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ @@ -XXX,XX +XXX,XX @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) -GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) -GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) -GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) +GEN_VEXT_V_ENV(vfcvt_x_f_v_h) +GEN_VEXT_V_ENV(vfcvt_x_f_v_w) +GEN_VEXT_V_ENV(vfcvt_x_f_v_d) /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) -GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) -GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) +GEN_VEXT_V_ENV(vfcvt_f_x_v_h) +GEN_VEXT_V_ENV(vfcvt_f_x_v_w) +GEN_VEXT_V_ENV(vfcvt_f_x_v_d) /* Widening Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) /* * vfwcvt.f.f.v vd, vs2, vm @@ -XXX,XX +XXX,XX @@ static uint32_t vfwcvtffv16(uint16_t a, float_status *s) RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) /* Narrowing Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) -GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1) -GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_x_f_w_b) +GEN_VEXT_V_ENV(vfncvt_x_f_w_h) +GEN_VEXT_V_ENV(vfncvt_x_f_w_w) /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_f_x_w_h) +GEN_VEXT_V_ENV(vfncvt_f_x_w_w) /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ static uint16_t vfncvtffv16(uint32_t a, float_status *s) @@ -XXX,XX +XXX,XX @@ static uint16_t vfncvtffv16(uint32_t a, float_status *s) RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2) -GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4) +GEN_VEXT_V_ENV(vfncvt_f_f_w_h) +GEN_VEXT_V_ENV(vfncvt_f_f_w_w) /* *** Vector Reduction Operations -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> No functional change intended in this commit. Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-2@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/vector_helper.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride, CPURISCVState *env, uint32_t desc, uint32_t vm, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra, MMUAccessType access_type) + uint32_t esz, uintptr_t ra) { uint32_t i, k; uint32_t nf = vext_nf(desc); @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ { \ uint32_t vm = vext_vm(desc); \ vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ + ctzl(sizeof(ETYPE)), GETPC()); \ } GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ { \ uint32_t vm = vext_vm(desc); \ vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ + ctzl(sizeof(ETYPE)), GETPC()); \ } GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) static void vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, - uintptr_t ra, MMUAccessType access_type) + uintptr_t ra) { uint32_t i, k; uint32_t nf = vext_nf(desc); @@ -XXX,XX +XXX,XX @@ void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ { \ uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ + ctzl(sizeof(ETYPE)), GETPC()); \ } \ \ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ CPURISCVState *env, uint32_t desc) \ { \ vext_ldst_us(vd, base, env, desc, LOAD_FN, \ - ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \ + ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ } GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ { \ uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ + ctzl(sizeof(ETYPE)), GETPC()); \ } \ \ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ CPURISCVState *env, uint32_t desc) \ { \ vext_ldst_us(vd, base, env, desc, STORE_FN, \ - ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \ + ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ } GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) @@ -XXX,XX +XXX,XX @@ void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, /* evl = ceil(vl/8) */ uint8_t evl = (env->vl + 7) >> 3; vext_ldst_us(vd, base, env, desc, lde_b, - 0, evl, GETPC(), MMU_DATA_LOAD); + 0, evl, GETPC()); } void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, @@ -XXX,XX +XXX,XX @@ void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, /* evl = ceil(vl/8) */ uint8_t evl = (env->vl + 7) >> 3; vext_ldst_us(vd, base, env, desc, ste_b, - 0, evl, GETPC(), MMU_DATA_STORE); + 0, evl, GETPC()); } /* @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, void *vs2, CPURISCVState *env, uint32_t desc, vext_get_index_addr get_index_addr, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra, MMUAccessType access_type) + uint32_t esz, uintptr_t ra) { uint32_t i, k; uint32_t nf = vext_nf(desc); @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ void *vs2, CPURISCVState *env, uint32_t desc) \ { \ vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ - LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ + LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ } GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ { \ vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ STORE_FN, ctzl(sizeof(ETYPE)), \ - GETPC(), MMU_DATA_STORE); \ + GETPC()); \ } GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) @@ -XXX,XX +XXX,XX @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) */ static void vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, - MMUAccessType access_type) + vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra) { uint32_t i, k, off, pos; uint32_t nf = vext_nf(desc); @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, target_ulong base, \ CPURISCVState *env, uint32_t desc) \ { \ vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ - ctzl(sizeof(ETYPE)), GETPC(), \ - MMU_DATA_LOAD); \ + ctzl(sizeof(ETYPE)), GETPC()); \ } GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, target_ulong base, \ CPURISCVState *env, uint32_t desc) \ { \ vext_ldst_whole(vd, base, env, desc, STORE_FN, \ - ctzl(sizeof(ETYPE)), GETPC(), \ - MMU_DATA_STORE); \ + ctzl(sizeof(ETYPE)), GETPC()); \ } GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> No functional change intended in this commit. Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-3@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/vector_helper.c | 76 ++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ static inline int32_t vext_lmul(uint32_t desc) /* * Get the maximum number of elements can be operated. * - * esz: log2 of element size in bytes. + * log2_esz: log2 of element size in bytes. */ -static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) +static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) { /* * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) uint32_t vlenb = simd_maxsz(desc); /* Return VLMAX */ - int scale = vext_lmul(desc) - esz; + int scale = vext_lmul(desc) - log2_esz; return scale < 0 ? vlenb >> -scale : vlenb << scale; } @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride, CPURISCVState *env, uint32_t desc, uint32_t vm, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra) + uint32_t log2_esz, uintptr_t ra) { uint32_t i, k; uint32_t nf = vext_nf(desc); - uint32_t max_elems = vext_max_elems(desc, esz); + uint32_t max_elems = vext_max_elems(desc, log2_esz); for (i = env->vstart; i < env->vl; i++, env->vstart++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, k = 0; while (k < nf) { - target_ulong addr = base + stride * i + (k << esz); + target_ulong addr = base + stride * i + (k << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); k++; } @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) /* unmasked unit-stride load and store operation*/ static void vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, + vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, uintptr_t ra) { uint32_t i, k; uint32_t nf = vext_nf(desc); - uint32_t max_elems = vext_max_elems(desc, esz); + uint32_t max_elems = vext_max_elems(desc, log2_esz); /* load bytes from guest memory */ for (i = env->vstart; i < evl; i++, env->vstart++) { k = 0; while (k < nf) { - target_ulong addr = base + ((i * nf + k) << esz); + target_ulong addr = base + ((i * nf + k) << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); k++; } @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, void *vs2, CPURISCVState *env, uint32_t desc, vext_get_index_addr get_index_addr, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra) + uint32_t log2_esz, uintptr_t ra) { uint32_t i, k; uint32_t nf = vext_nf(desc); uint32_t vm = vext_vm(desc); - uint32_t max_elems = vext_max_elems(desc, esz); + uint32_t max_elems = vext_max_elems(desc, log2_esz); /* load bytes from guest memory */ for (i = env->vstart; i < env->vl; i++, env->vstart++) { @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, k = 0; while (k < nf) { - abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); + abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); k++; } @@ -XXX,XX +XXX,XX @@ static inline void vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, uint32_t desc, vext_ldst_elem_fn *ldst_elem, - uint32_t esz, uintptr_t ra) + uint32_t log2_esz, uintptr_t ra) { void *host; uint32_t i, k, vl = 0; uint32_t nf = vext_nf(desc); uint32_t vm = vext_vm(desc); - uint32_t max_elems = vext_max_elems(desc, esz); + uint32_t max_elems = vext_max_elems(desc, log2_esz); target_ulong addr, offset, remain; /* probe every access*/ @@ -XXX,XX +XXX,XX @@ vext_ldff(void *vd, void *v0, target_ulong base, if (!vm && !vext_elem_mask(v0, i)) { continue; } - addr = adjust_addr(env, base + i * (nf << esz)); + addr = adjust_addr(env, base + i * (nf << log2_esz)); if (i == 0) { - probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); + probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); } else { /* if it triggers an exception, no need to check watchpoint */ - remain = nf << esz; + remain = nf << log2_esz; while (remain > 0) { offset = -(addr | TARGET_PAGE_MASK); host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, @@ -XXX,XX +XXX,XX @@ ProbeSuccess: continue; } while (k < nf) { - target_ulong addr = base + ((i * nf + k) << esz); + target_ulong addr = base + ((i * nf + k) << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); k++; } @@ -XXX,XX +XXX,XX @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) */ static void vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra) + vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) { uint32_t i, k, off, pos; uint32_t nf = vext_nf(desc); uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; - uint32_t max_elems = vlenb >> esz; + uint32_t max_elems = vlenb >> log2_esz; k = env->vstart / max_elems; off = env->vstart % max_elems; @@ -XXX,XX +XXX,XX @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, if (off) { /* load/store rest of elements of current segment pointed by vstart */ for (pos = off; pos < max_elems; pos++, env->vstart++) { - target_ulong addr = base + ((pos + k * max_elems) << esz); + target_ulong addr = base + ((pos + k * max_elems) << log2_esz); ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); } k++; @@ -XXX,XX +XXX,XX @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, /* load/store elements for rest of segments */ for (; k < nf; k++) { for (i = 0; i < max_elems; i++, env->vstart++) { - target_ulong addr = base + ((i + k * max_elems) << esz); + target_ulong addr = base + ((i + k * max_elems) << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); } } @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) -#define GEN_VEXT_VSLIE1UP(ESZ, H) \ -static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ - CPURISCVState *env, uint32_t desc) \ +#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ +static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ { \ - typedef uint##ESZ##_t ETYPE; \ + typedef uint##BITWIDTH##_t ETYPE; \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ uint32_t i; \ @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIE1UP(16, H2) GEN_VEXT_VSLIE1UP(32, H4) GEN_VEXT_VSLIE1UP(64, H8) -#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ +#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) -#define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ -static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ - CPURISCVState *env, uint32_t desc) \ +#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ +static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ { \ - typedef uint##ESZ##_t ETYPE; \ + typedef uint##BITWIDTH##_t ETYPE; \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ uint32_t i; \ @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1DOWN(16, H2) GEN_VEXT_VSLIDE1DOWN(32, H4) GEN_VEXT_VSLIDE1DOWN(64, H8) -#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ +#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) /* Vector Floating-Point Slide Instructions */ -#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ +#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) -#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ +#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> According to v-spec (section 5.4): When vstart ≥ vl, there are no body elements, and no elements are updated in any destination vector register group, including that no tail elements are updated with agnostic values. vmsbf.m, vmsif.m, vmsof.m, viota.m, vcompress instructions themselves require vstart to be zero. So they don't need the early exit. Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-4@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/insn_trans/trans_rvv.c.inc | 27 +++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -XXX,XX +XXX,XX @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -XXX,XX +XXX,XX @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -XXX,XX +XXX,XX @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, } tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); if (a->vm && s->vl_eq_vlmax) { gvec_fn(s->sew, vreg_ofs(s, a->rd), @@ -XXX,XX +XXX,XX @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -XXX,XX +XXX,XX @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -XXX,XX +XXX,XX @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, uint32_t data = 0; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -XXX,XX +XXX,XX @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, uint32_t data = 0; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ }; \ TCGLabel *over = gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ }; \ TCGLabel *over = gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) }; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), cpu_env, s->cfg_ptr->vlen / 8, @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) TCGv s1; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); s1 = get_gpr(s, a->rs1, EXT_SIGN); @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) }; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); s1 = tcg_constant_i64(simm); dest = tcg_temp_new_ptr(); @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -XXX,XX +XXX,XX @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); mask = tcg_temp_new_ptr(); @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -XXX,XX +XXX,XX @@ static bool do_opfv(DisasContext *s, arg_rmr *a, TCGLabel *over = gen_new_label(); gen_set_rm(s, rm); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) }; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); t1 = tcg_temp_new_i64(); /* NaN-box f[rs1] */ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, FRM); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, FRM); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, FRM); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \ gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ TCGLabel *over = gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ @@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) uint32_t data = 0; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); @@ -XXX,XX +XXX,XX @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) gen_helper_gvec_3_ptr *fn; TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); static gen_helper_gvec_3_ptr * const fns[6][4] = { { -- 2.36.1
From: eopXD <eop.chen@sifive.com> According to v-spec, tail agnostic behavior can be either kept as undisturbed or set elements' bits to all 1s. To distinguish the difference of tail policies, QEMU should be able to simulate the tail agnostic behavior as "set tail elements' bits to all 1s". There are multiple possibility for agnostic elements according to v-spec. The main intent of this patch-set tries to add option that can distinguish between tail policies. Setting agnostic elements to all 1s allows QEMU to express this. This is the first commit regarding the optional tail agnostic behavior. Follow-up commits will add this optional behavior for all rvv instructions. Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-5@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.h | 2 + target/riscv/internals.h | 5 +- target/riscv/cpu_helper.c | 2 + target/riscv/translate.c | 2 + target/riscv/vector_helper.c | 296 +++++++++++++----------- target/riscv/insn_trans/trans_rvv.c.inc | 3 +- 6 files changed, 178 insertions(+), 132 deletions(-) diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -XXX,XX +XXX,XX @@ struct RISCVCPUConfig { bool ext_zve32f; bool ext_zve64f; bool ext_zmmul; + bool rvv_ta_all_1s; uint32_t mvendorid; uint64_t marchid; @@ -XXX,XX +XXX,XX @@ FIELD(TB_FLAGS, XL, 20, 2) /* If PointerMasking should be applied */ FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1) FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1) +FIELD(TB_FLAGS, VTA, 24, 1) #ifdef TARGET_RISCV32 #define riscv_cpu_mxl(env) ((void)(env), MXL_RV32) diff --git a/target/riscv/internals.h b/target/riscv/internals.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -XXX,XX +XXX,XX @@ /* share data between vector helpers and decode code */ FIELD(VDATA, VM, 0, 1) FIELD(VDATA, LMUL, 1, 3) -FIELD(VDATA, NF, 4, 4) -FIELD(VDATA, WD, 4, 1) +FIELD(VDATA, VTA, 4, 1) +FIELD(VDATA, NF, 5, 4) +FIELD(VDATA, WD, 5, 1) /* float point classify helpers */ target_ulong fclass_h(uint64_t frs1); diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc, flags = FIELD_DP32(flags, TB_FLAGS, LMUL, FIELD_EX64(env->vtype, VTYPE, VLMUL)); flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax); + flags = FIELD_DP32(flags, TB_FLAGS, VTA, + FIELD_EX64(env->vtype, VTYPE, VTA)); } else { flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1); } diff --git a/target/riscv/translate.c b/target/riscv/translate.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { */ int8_t lmul; uint8_t sew; + uint8_t vta; target_ulong vstart; bool vl_eq_vlmax; uint8_t ntemp; @@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL); ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3); + ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s; ctx->vstart = env->vstart; ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); ctx->misa_mxl_max = env->misa_mxl_max; diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ static inline int32_t vext_lmul(uint32_t desc) return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); } +static inline uint32_t vext_vta(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VTA); +} + /* * Get the maximum number of elements can be operated. * @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) return scale < 0 ? vlenb >> -scale : vlenb << scale; } +/* + * Get number of total elements, including prestart, body and tail elements. + * Note that when LMUL < 1, the tail includes the elements past VLMAX that + * are held in the same vector register. + */ +static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, + uint32_t esz) +{ + uint32_t vlenb = simd_maxsz(desc); + uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); + int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : + ctzl(esz) - ctzl(sew) + vext_lmul(desc); + return (vlenb << emul) / esz; +} + static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) { return (addr & env->cur_pmmask) | env->cur_pmbase; @@ -XXX,XX +XXX,XX @@ static void probe_pages(CPURISCVState *env, target_ulong addr, } } +/* set agnostic elements to 1s */ +static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, + uint32_t tot) +{ + if (is_agnostic == 0) { + /* policy undisturbed */ + return; + } + if (tot - cnt == 0) { + return ; + } + memset(base + cnt, -1, tot - cnt); +} + static inline void vext_set_elem_mask(void *v0, int index, uint8_t value) { @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, uint32_t desc, - opivv2_fn *fn) + opivv2_fn *fn, uint32_t esz) { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); uint32_t i; for (i = env->vstart; i < vl; i++) { @@ -XXX,XX +XXX,XX @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, fn(vd, vs1, vs2, i); } env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); } /* generate the helpers for OPIVV */ -#define GEN_VEXT_VV(NAME) \ +#define GEN_VEXT_VV(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ do_vext_vv(vd, v0, vs1, vs2, env, desc, \ - do_##NAME); \ + do_##NAME, ESZ); \ } -GEN_VEXT_VV(vadd_vv_b) -GEN_VEXT_VV(vadd_vv_h) -GEN_VEXT_VV(vadd_vv_w) -GEN_VEXT_VV(vadd_vv_d) -GEN_VEXT_VV(vsub_vv_b) -GEN_VEXT_VV(vsub_vv_h) -GEN_VEXT_VV(vsub_vv_w) -GEN_VEXT_VV(vsub_vv_d) +GEN_VEXT_VV(vadd_vv_b, 1) +GEN_VEXT_VV(vadd_vv_h, 2) +GEN_VEXT_VV(vadd_vv_w, 4) +GEN_VEXT_VV(vadd_vv_d, 8) +GEN_VEXT_VV(vsub_vv_b, 1) +GEN_VEXT_VV(vsub_vv_h, 2) +GEN_VEXT_VV(vsub_vv_w, 4) +GEN_VEXT_VV(vsub_vv_d, 8) typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) -GEN_VEXT_VV(vwaddu_vv_b) -GEN_VEXT_VV(vwaddu_vv_h) -GEN_VEXT_VV(vwaddu_vv_w) -GEN_VEXT_VV(vwsubu_vv_b) -GEN_VEXT_VV(vwsubu_vv_h) -GEN_VEXT_VV(vwsubu_vv_w) -GEN_VEXT_VV(vwadd_vv_b) -GEN_VEXT_VV(vwadd_vv_h) -GEN_VEXT_VV(vwadd_vv_w) -GEN_VEXT_VV(vwsub_vv_b) -GEN_VEXT_VV(vwsub_vv_h) -GEN_VEXT_VV(vwsub_vv_w) -GEN_VEXT_VV(vwaddu_wv_b) -GEN_VEXT_VV(vwaddu_wv_h) -GEN_VEXT_VV(vwaddu_wv_w) -GEN_VEXT_VV(vwsubu_wv_b) -GEN_VEXT_VV(vwsubu_wv_h) -GEN_VEXT_VV(vwsubu_wv_w) -GEN_VEXT_VV(vwadd_wv_b) -GEN_VEXT_VV(vwadd_wv_h) -GEN_VEXT_VV(vwadd_wv_w) -GEN_VEXT_VV(vwsub_wv_b) -GEN_VEXT_VV(vwsub_wv_h) -GEN_VEXT_VV(vwsub_wv_w) +GEN_VEXT_VV(vwaddu_vv_b, 2) +GEN_VEXT_VV(vwaddu_vv_h, 4) +GEN_VEXT_VV(vwaddu_vv_w, 8) +GEN_VEXT_VV(vwsubu_vv_b, 2) +GEN_VEXT_VV(vwsubu_vv_h, 4) +GEN_VEXT_VV(vwsubu_vv_w, 8) +GEN_VEXT_VV(vwadd_vv_b, 2) +GEN_VEXT_VV(vwadd_vv_h, 4) +GEN_VEXT_VV(vwadd_vv_w, 8) +GEN_VEXT_VV(vwsub_vv_b, 2) +GEN_VEXT_VV(vwsub_vv_h, 4) +GEN_VEXT_VV(vwsub_vv_w, 8) +GEN_VEXT_VV(vwaddu_wv_b, 2) +GEN_VEXT_VV(vwaddu_wv_h, 4) +GEN_VEXT_VV(vwaddu_wv_w, 8) +GEN_VEXT_VV(vwsubu_wv_b, 2) +GEN_VEXT_VV(vwsubu_wv_h, 4) +GEN_VEXT_VV(vwsubu_wv_w, 8) +GEN_VEXT_VV(vwadd_wv_b, 2) +GEN_VEXT_VV(vwadd_wv_h, 4) +GEN_VEXT_VV(vwadd_wv_w, 8) +GEN_VEXT_VV(vwsub_wv_b, 2) +GEN_VEXT_VV(vwsub_wv_h, 4) +GEN_VEXT_VV(vwsub_wv_w, 8) RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) -GEN_VEXT_VV(vand_vv_b) -GEN_VEXT_VV(vand_vv_h) -GEN_VEXT_VV(vand_vv_w) -GEN_VEXT_VV(vand_vv_d) -GEN_VEXT_VV(vor_vv_b) -GEN_VEXT_VV(vor_vv_h) -GEN_VEXT_VV(vor_vv_w) -GEN_VEXT_VV(vor_vv_d) -GEN_VEXT_VV(vxor_vv_b) -GEN_VEXT_VV(vxor_vv_h) -GEN_VEXT_VV(vxor_vv_w) -GEN_VEXT_VV(vxor_vv_d) +GEN_VEXT_VV(vand_vv_b, 1) +GEN_VEXT_VV(vand_vv_h, 2) +GEN_VEXT_VV(vand_vv_w, 4) +GEN_VEXT_VV(vand_vv_d, 8) +GEN_VEXT_VV(vor_vv_b, 1) +GEN_VEXT_VV(vor_vv_h, 2) +GEN_VEXT_VV(vor_vv_w, 4) +GEN_VEXT_VV(vor_vv_d, 8) +GEN_VEXT_VV(vxor_vv_b, 1) +GEN_VEXT_VV(vxor_vv_h, 2) +GEN_VEXT_VV(vxor_vv_w, 4) +GEN_VEXT_VV(vxor_vv_d, 8) RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) -GEN_VEXT_VV(vminu_vv_b) -GEN_VEXT_VV(vminu_vv_h) -GEN_VEXT_VV(vminu_vv_w) -GEN_VEXT_VV(vminu_vv_d) -GEN_VEXT_VV(vmin_vv_b) -GEN_VEXT_VV(vmin_vv_h) -GEN_VEXT_VV(vmin_vv_w) -GEN_VEXT_VV(vmin_vv_d) -GEN_VEXT_VV(vmaxu_vv_b) -GEN_VEXT_VV(vmaxu_vv_h) -GEN_VEXT_VV(vmaxu_vv_w) -GEN_VEXT_VV(vmaxu_vv_d) -GEN_VEXT_VV(vmax_vv_b) -GEN_VEXT_VV(vmax_vv_h) -GEN_VEXT_VV(vmax_vv_w) -GEN_VEXT_VV(vmax_vv_d) +GEN_VEXT_VV(vminu_vv_b, 1) +GEN_VEXT_VV(vminu_vv_h, 2) +GEN_VEXT_VV(vminu_vv_w, 4) +GEN_VEXT_VV(vminu_vv_d, 8) +GEN_VEXT_VV(vmin_vv_b, 1) +GEN_VEXT_VV(vmin_vv_h, 2) +GEN_VEXT_VV(vmin_vv_w, 4) +GEN_VEXT_VV(vmin_vv_d, 8) +GEN_VEXT_VV(vmaxu_vv_b, 1) +GEN_VEXT_VV(vmaxu_vv_h, 2) +GEN_VEXT_VV(vmaxu_vv_w, 4) +GEN_VEXT_VV(vmaxu_vv_d, 8) +GEN_VEXT_VV(vmax_vv_b, 1) +GEN_VEXT_VV(vmax_vv_h, 2) +GEN_VEXT_VV(vmax_vv_w, 4) +GEN_VEXT_VV(vmax_vv_d, 8) RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) -GEN_VEXT_VV(vmul_vv_b) -GEN_VEXT_VV(vmul_vv_h) -GEN_VEXT_VV(vmul_vv_w) -GEN_VEXT_VV(vmul_vv_d) +GEN_VEXT_VV(vmul_vv_b, 1) +GEN_VEXT_VV(vmul_vv_h, 2) +GEN_VEXT_VV(vmul_vv_w, 4) +GEN_VEXT_VV(vmul_vv_d, 8) static int8_t do_mulh_b(int8_t s2, int8_t s1) { @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) -GEN_VEXT_VV(vmulh_vv_b) -GEN_VEXT_VV(vmulh_vv_h) -GEN_VEXT_VV(vmulh_vv_w) -GEN_VEXT_VV(vmulh_vv_d) -GEN_VEXT_VV(vmulhu_vv_b) -GEN_VEXT_VV(vmulhu_vv_h) -GEN_VEXT_VV(vmulhu_vv_w) -GEN_VEXT_VV(vmulhu_vv_d) -GEN_VEXT_VV(vmulhsu_vv_b) -GEN_VEXT_VV(vmulhsu_vv_h) -GEN_VEXT_VV(vmulhsu_vv_w) -GEN_VEXT_VV(vmulhsu_vv_d) +GEN_VEXT_VV(vmulh_vv_b, 1) +GEN_VEXT_VV(vmulh_vv_h, 2) +GEN_VEXT_VV(vmulh_vv_w, 4) +GEN_VEXT_VV(vmulh_vv_d, 8) +GEN_VEXT_VV(vmulhu_vv_b, 1) +GEN_VEXT_VV(vmulhu_vv_h, 2) +GEN_VEXT_VV(vmulhu_vv_w, 4) +GEN_VEXT_VV(vmulhu_vv_d, 8) +GEN_VEXT_VV(vmulhsu_vv_b, 1) +GEN_VEXT_VV(vmulhsu_vv_h, 2) +GEN_VEXT_VV(vmulhsu_vv_w, 4) +GEN_VEXT_VV(vmulhsu_vv_d, 8) RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) -GEN_VEXT_VV(vdivu_vv_b) -GEN_VEXT_VV(vdivu_vv_h) -GEN_VEXT_VV(vdivu_vv_w) -GEN_VEXT_VV(vdivu_vv_d) -GEN_VEXT_VV(vdiv_vv_b) -GEN_VEXT_VV(vdiv_vv_h) -GEN_VEXT_VV(vdiv_vv_w) -GEN_VEXT_VV(vdiv_vv_d) -GEN_VEXT_VV(vremu_vv_b) -GEN_VEXT_VV(vremu_vv_h) -GEN_VEXT_VV(vremu_vv_w) -GEN_VEXT_VV(vremu_vv_d) -GEN_VEXT_VV(vrem_vv_b) -GEN_VEXT_VV(vrem_vv_h) -GEN_VEXT_VV(vrem_vv_w) -GEN_VEXT_VV(vrem_vv_d) +GEN_VEXT_VV(vdivu_vv_b, 1) +GEN_VEXT_VV(vdivu_vv_h, 2) +GEN_VEXT_VV(vdivu_vv_w, 4) +GEN_VEXT_VV(vdivu_vv_d, 8) +GEN_VEXT_VV(vdiv_vv_b, 1) +GEN_VEXT_VV(vdiv_vv_h, 2) +GEN_VEXT_VV(vdiv_vv_w, 4) +GEN_VEXT_VV(vdiv_vv_d, 8) +GEN_VEXT_VV(vremu_vv_b, 1) +GEN_VEXT_VV(vremu_vv_h, 2) +GEN_VEXT_VV(vremu_vv_w, 4) +GEN_VEXT_VV(vremu_vv_d, 8) +GEN_VEXT_VV(vrem_vv_b, 1) +GEN_VEXT_VV(vrem_vv_h, 2) +GEN_VEXT_VV(vrem_vv_w, 4) +GEN_VEXT_VV(vrem_vv_d, 8) RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) -GEN_VEXT_VV(vwmul_vv_b) -GEN_VEXT_VV(vwmul_vv_h) -GEN_VEXT_VV(vwmul_vv_w) -GEN_VEXT_VV(vwmulu_vv_b) -GEN_VEXT_VV(vwmulu_vv_h) -GEN_VEXT_VV(vwmulu_vv_w) -GEN_VEXT_VV(vwmulsu_vv_b) -GEN_VEXT_VV(vwmulsu_vv_h) -GEN_VEXT_VV(vwmulsu_vv_w) +GEN_VEXT_VV(vwmul_vv_b, 2) +GEN_VEXT_VV(vwmul_vv_h, 4) +GEN_VEXT_VV(vwmul_vv_w, 8) +GEN_VEXT_VV(vwmulu_vv_b, 2) +GEN_VEXT_VV(vwmulu_vv_h, 4) +GEN_VEXT_VV(vwmulu_vv_w, 8) +GEN_VEXT_VV(vwmulsu_vv_b, 2) +GEN_VEXT_VV(vwmulsu_vv_h, 4) +GEN_VEXT_VV(vwmulsu_vv_w, 8) RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) -GEN_VEXT_VV(vmacc_vv_b) -GEN_VEXT_VV(vmacc_vv_h) -GEN_VEXT_VV(vmacc_vv_w) -GEN_VEXT_VV(vmacc_vv_d) -GEN_VEXT_VV(vnmsac_vv_b) -GEN_VEXT_VV(vnmsac_vv_h) -GEN_VEXT_VV(vnmsac_vv_w) -GEN_VEXT_VV(vnmsac_vv_d) -GEN_VEXT_VV(vmadd_vv_b) -GEN_VEXT_VV(vmadd_vv_h) -GEN_VEXT_VV(vmadd_vv_w) -GEN_VEXT_VV(vmadd_vv_d) -GEN_VEXT_VV(vnmsub_vv_b) -GEN_VEXT_VV(vnmsub_vv_h) -GEN_VEXT_VV(vnmsub_vv_w) -GEN_VEXT_VV(vnmsub_vv_d) +GEN_VEXT_VV(vmacc_vv_b, 1) +GEN_VEXT_VV(vmacc_vv_h, 2) +GEN_VEXT_VV(vmacc_vv_w, 4) +GEN_VEXT_VV(vmacc_vv_d, 8) +GEN_VEXT_VV(vnmsac_vv_b, 1) +GEN_VEXT_VV(vnmsac_vv_h, 2) +GEN_VEXT_VV(vnmsac_vv_w, 4) +GEN_VEXT_VV(vnmsac_vv_d, 8) +GEN_VEXT_VV(vmadd_vv_b, 1) +GEN_VEXT_VV(vmadd_vv_h, 2) +GEN_VEXT_VV(vmadd_vv_w, 4) +GEN_VEXT_VV(vmadd_vv_d, 8) +GEN_VEXT_VV(vnmsub_vv_b, 1) +GEN_VEXT_VV(vnmsub_vv_h, 2) +GEN_VEXT_VV(vnmsub_vv_w, 4) +GEN_VEXT_VV(vnmsub_vv_d, 8) #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) -GEN_VEXT_VV(vwmaccu_vv_b) -GEN_VEXT_VV(vwmaccu_vv_h) -GEN_VEXT_VV(vwmaccu_vv_w) -GEN_VEXT_VV(vwmacc_vv_b) -GEN_VEXT_VV(vwmacc_vv_h) -GEN_VEXT_VV(vwmacc_vv_w) -GEN_VEXT_VV(vwmaccsu_vv_b) -GEN_VEXT_VV(vwmaccsu_vv_h) -GEN_VEXT_VV(vwmaccsu_vv_w) +GEN_VEXT_VV(vwmaccu_vv_b, 2) +GEN_VEXT_VV(vwmaccu_vv_h, 4) +GEN_VEXT_VV(vwmaccu_vv_w, 8) +GEN_VEXT_VV(vwmacc_vv_b, 2) +GEN_VEXT_VV(vwmacc_vv_h, 4) +GEN_VEXT_VV(vwmacc_vv_w, 8) +GEN_VEXT_VV(vwmaccsu_vv_b, 2) +GEN_VEXT_VV(vwmaccsu_vv_h, 4) +GEN_VEXT_VV(vwmaccsu_vv_w, 8) RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); - if (a->vm && s->vl_eq_vlmax) { + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), MAXSZ(s), MAXSZ(s)); @@ -XXX,XX +XXX,XX @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> Destination register of unit-stride mask load and store instructions are always written with a tail-agnostic policy. A vector segment load / store instruction may contain fractional lmul with nf * lmul > 1. The rest of the elements in the last register should be treated as tail elements. Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-6@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/translate.c | 2 + target/riscv/vector_helper.c | 60 +++++++++++++++++++++++++ target/riscv/insn_trans/trans_rvv.c.inc | 6 +++ 3 files changed, 68 insertions(+) diff --git a/target/riscv/translate.c b/target/riscv/translate.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { int8_t lmul; uint8_t sew; uint8_t vta; + bool cfg_vta_all_1s; target_ulong vstart; bool vl_eq_vlmax; uint8_t ntemp; @@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3); ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s; + ctx->cfg_vta_all_1s = cpu->cfg.rvv_ta_all_1s; ctx->vstart = env->vstart; ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); ctx->misa_mxl_max = env->misa_mxl_max; diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, uint32_t i, k; uint32_t nf = vext_nf(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); + uint32_t esz = 1 << log2_esz; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); for (i = env->vstart; i < env->vl; i++, env->vstart++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -XXX,XX +XXX,XX @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, } } env->vstart = 0; + /* set tail elements to 1s */ + for (k = 0; k < nf; ++k) { + vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, + (k * max_elems + max_elems) * esz); + } + if (nf * max_elems % total_elems != 0) { + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; + uint32_t registers_used = + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, + registers_used * vlenb); + } } #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ @@ -XXX,XX +XXX,XX @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, uint32_t i, k; uint32_t nf = vext_nf(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); + uint32_t esz = 1 << log2_esz; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); /* load bytes from guest memory */ for (i = env->vstart; i < evl; i++, env->vstart++) { @@ -XXX,XX +XXX,XX @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, } } env->vstart = 0; + /* set tail elements to 1s */ + for (k = 0; k < nf; ++k) { + vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz, + (k * max_elems + max_elems) * esz); + } + if (nf * max_elems % total_elems != 0) { + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; + uint32_t registers_used = + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, + registers_used * vlenb); + } } /* @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, uint32_t nf = vext_nf(desc); uint32_t vm = vext_vm(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); + uint32_t esz = 1 << log2_esz; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); /* load bytes from guest memory */ for (i = env->vstart; i < env->vl; i++, env->vstart++) { @@ -XXX,XX +XXX,XX @@ vext_ldst_index(void *vd, void *v0, target_ulong base, } } env->vstart = 0; + /* set tail elements to 1s */ + for (k = 0; k < nf; ++k) { + vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, + (k * max_elems + max_elems) * esz); + } + if (nf * max_elems % total_elems != 0) { + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; + uint32_t registers_used = + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, + registers_used * vlenb); + } } #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ @@ -XXX,XX +XXX,XX @@ vext_ldff(void *vd, void *v0, target_ulong base, uint32_t nf = vext_nf(desc); uint32_t vm = vext_vm(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); + uint32_t esz = 1 << log2_esz; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); target_ulong addr, offset, remain; /* probe every access*/ @@ -XXX,XX +XXX,XX @@ ProbeSuccess: } } env->vstart = 0; + /* set tail elements to 1s */ + for (k = 0; k < nf; ++k) { + vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, + (k * max_elems + max_elems) * esz); + } + if (nf * max_elems % total_elems != 0) { + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; + uint32_t registers_used = + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, + registers_used * vlenb); + } } #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, emul); data = FIELD_DP32(data, VDATA, NF, a->nf); + data = FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); } @@ -XXX,XX +XXX,XX @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew) /* EMUL = 1, NFIELDS = 1 */ data = FIELD_DP32(data, VDATA, LMUL, 0); data = FIELD_DP32(data, VDATA, NF, 1); + /* Mask destination register are always tail-agnostic */ + data = FIELD_DP32(data, VDATA, VTA, s->cfg_vta_all_1s); return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); } @@ -XXX,XX +XXX,XX @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, emul); data = FIELD_DP32(data, VDATA, NF, a->nf); + data = FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false); } @@ -XXX,XX +XXX,XX @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, emul); data = FIELD_DP32(data, VDATA, NF, a->nf); + data = FIELD_DP32(data, VDATA, VTA, s->vta); return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false); } @@ -XXX,XX +XXX,XX @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, emul); data = FIELD_DP32(data, VDATA, NF, a->nf); + data = FIELD_DP32(data, VDATA, VTA, s->vta); return ldff_trans(a->rd, a->rs1, data, fn, s); } -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> `vmadc` and `vmsbc` produces a mask value, they always operate with a tail agnostic policy. Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-7@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/internals.h | 5 +- target/riscv/vector_helper.c | 314 +++++++++++++----------- target/riscv/insn_trans/trans_rvv.c.inc | 13 +- 3 files changed, 190 insertions(+), 142 deletions(-) diff --git a/target/riscv/internals.h b/target/riscv/internals.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -XXX,XX +XXX,XX @@ FIELD(VDATA, VM, 0, 1) FIELD(VDATA, LMUL, 1, 3) FIELD(VDATA, VTA, 4, 1) -FIELD(VDATA, NF, 5, 4) -FIELD(VDATA, WD, 5, 1) +FIELD(VDATA, VTA_ALL_1S, 5, 1) +FIELD(VDATA, NF, 6, 4) +FIELD(VDATA, WD, 6, 1) /* float point classify helpers */ target_ulong fclass_h(uint64_t frs1); diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_vta(uint32_t desc) return FIELD_EX32(simd_data(desc), VDATA, VTA); } +static inline uint32_t vext_vta_all_1s(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); +} + /* * Get the maximum number of elements can be operated. * @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, uint32_t desc, - opivx2_fn fn) + opivx2_fn fn, uint32_t esz) { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); uint32_t i; for (i = env->vstart; i < vl; i++) { @@ -XXX,XX +XXX,XX @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, fn(vd, s1, vs2, i); } env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); } /* generate the helpers for OPIVX */ -#define GEN_VEXT_VX(NAME) \ +#define GEN_VEXT_VX(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ do_vext_vx(vd, v0, s1, vs2, env, desc, \ - do_##NAME); \ -} - -GEN_VEXT_VX(vadd_vx_b) -GEN_VEXT_VX(vadd_vx_h) -GEN_VEXT_VX(vadd_vx_w) -GEN_VEXT_VX(vadd_vx_d) -GEN_VEXT_VX(vsub_vx_b) -GEN_VEXT_VX(vsub_vx_h) -GEN_VEXT_VX(vsub_vx_w) -GEN_VEXT_VX(vsub_vx_d) -GEN_VEXT_VX(vrsub_vx_b) -GEN_VEXT_VX(vrsub_vx_h) -GEN_VEXT_VX(vrsub_vx_w) -GEN_VEXT_VX(vrsub_vx_d) + do_##NAME, ESZ); \ +} + +GEN_VEXT_VX(vadd_vx_b, 1) +GEN_VEXT_VX(vadd_vx_h, 2) +GEN_VEXT_VX(vadd_vx_w, 4) +GEN_VEXT_VX(vadd_vx_d, 8) +GEN_VEXT_VX(vsub_vx_b, 1) +GEN_VEXT_VX(vsub_vx_h, 2) +GEN_VEXT_VX(vsub_vx_w, 4) +GEN_VEXT_VX(vsub_vx_d, 8) +GEN_VEXT_VX(vrsub_vx_b, 1) +GEN_VEXT_VX(vrsub_vx_h, 2) +GEN_VEXT_VX(vrsub_vx_w, 4) +GEN_VEXT_VX(vrsub_vx_d, 8) void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) { @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) -GEN_VEXT_VX(vwaddu_vx_b) -GEN_VEXT_VX(vwaddu_vx_h) -GEN_VEXT_VX(vwaddu_vx_w) -GEN_VEXT_VX(vwsubu_vx_b) -GEN_VEXT_VX(vwsubu_vx_h) -GEN_VEXT_VX(vwsubu_vx_w) -GEN_VEXT_VX(vwadd_vx_b) -GEN_VEXT_VX(vwadd_vx_h) -GEN_VEXT_VX(vwadd_vx_w) -GEN_VEXT_VX(vwsub_vx_b) -GEN_VEXT_VX(vwsub_vx_h) -GEN_VEXT_VX(vwsub_vx_w) -GEN_VEXT_VX(vwaddu_wx_b) -GEN_VEXT_VX(vwaddu_wx_h) -GEN_VEXT_VX(vwaddu_wx_w) -GEN_VEXT_VX(vwsubu_wx_b) -GEN_VEXT_VX(vwsubu_wx_h) -GEN_VEXT_VX(vwsubu_wx_w) -GEN_VEXT_VX(vwadd_wx_b) -GEN_VEXT_VX(vwadd_wx_h) -GEN_VEXT_VX(vwadd_wx_w) -GEN_VEXT_VX(vwsub_wx_b) -GEN_VEXT_VX(vwsub_wx_h) -GEN_VEXT_VX(vwsub_wx_w) +GEN_VEXT_VX(vwaddu_vx_b, 2) +GEN_VEXT_VX(vwaddu_vx_h, 4) +GEN_VEXT_VX(vwaddu_vx_w, 8) +GEN_VEXT_VX(vwsubu_vx_b, 2) +GEN_VEXT_VX(vwsubu_vx_h, 4) +GEN_VEXT_VX(vwsubu_vx_w, 8) +GEN_VEXT_VX(vwadd_vx_b, 2) +GEN_VEXT_VX(vwadd_vx_h, 4) +GEN_VEXT_VX(vwadd_vx_w, 8) +GEN_VEXT_VX(vwsub_vx_b, 2) +GEN_VEXT_VX(vwsub_vx_h, 4) +GEN_VEXT_VX(vwsub_vx_w, 8) +GEN_VEXT_VX(vwaddu_wx_b, 2) +GEN_VEXT_VX(vwaddu_wx_h, 4) +GEN_VEXT_VX(vwaddu_wx_w, 8) +GEN_VEXT_VX(vwsubu_wx_b, 2) +GEN_VEXT_VX(vwsubu_wx_h, 4) +GEN_VEXT_VX(vwsubu_wx_w, 8) +GEN_VEXT_VX(vwadd_wx_b, 2) +GEN_VEXT_VX(vwadd_wx_h, 4) +GEN_VEXT_VX(vwadd_wx_w, 8) +GEN_VEXT_VX(vwsub_wx_b, 2) +GEN_VEXT_VX(vwsub_wx_h, 4) +GEN_VEXT_VX(vwsub_wx_w, 8) /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ #define DO_VADC(N, M, C) (N + M + C) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = \ + vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ { \ uint32_t vl = env->vl; \ uint32_t vm = vext_vm(desc); \ + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ } \ env->vstart = 0; \ + /* mask destination register are always tail-agnostic */ \ + /* set tail elements to 1s */ \ + if (vta_all_1s) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ { \ uint32_t vl = env->vl; \ uint32_t vm = vext_vm(desc); \ + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ } \ env->vstart = 0; \ + /* mask destination register are always tail-agnostic */ \ + /* set tail elements to 1s */ \ + if (vta_all_1s) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) -GEN_VEXT_VX(vand_vx_b) -GEN_VEXT_VX(vand_vx_h) -GEN_VEXT_VX(vand_vx_w) -GEN_VEXT_VX(vand_vx_d) -GEN_VEXT_VX(vor_vx_b) -GEN_VEXT_VX(vor_vx_h) -GEN_VEXT_VX(vor_vx_w) -GEN_VEXT_VX(vor_vx_d) -GEN_VEXT_VX(vxor_vx_b) -GEN_VEXT_VX(vxor_vx_h) -GEN_VEXT_VX(vxor_vx_w) -GEN_VEXT_VX(vxor_vx_d) +GEN_VEXT_VX(vand_vx_b, 1) +GEN_VEXT_VX(vand_vx_h, 2) +GEN_VEXT_VX(vand_vx_w, 4) +GEN_VEXT_VX(vand_vx_d, 8) +GEN_VEXT_VX(vor_vx_b, 1) +GEN_VEXT_VX(vor_vx_h, 2) +GEN_VEXT_VX(vor_vx_w, 4) +GEN_VEXT_VX(vor_vx_d, 8) +GEN_VEXT_VX(vxor_vx_b, 1) +GEN_VEXT_VX(vxor_vx_h, 2) +GEN_VEXT_VX(vxor_vx_w, 4) +GEN_VEXT_VX(vxor_vx_d, 8) /* Vector Single-Width Bit Shift Instructions */ #define DO_SLL(N, M) (N << (M)) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) -GEN_VEXT_VX(vminu_vx_b) -GEN_VEXT_VX(vminu_vx_h) -GEN_VEXT_VX(vminu_vx_w) -GEN_VEXT_VX(vminu_vx_d) -GEN_VEXT_VX(vmin_vx_b) -GEN_VEXT_VX(vmin_vx_h) -GEN_VEXT_VX(vmin_vx_w) -GEN_VEXT_VX(vmin_vx_d) -GEN_VEXT_VX(vmaxu_vx_b) -GEN_VEXT_VX(vmaxu_vx_h) -GEN_VEXT_VX(vmaxu_vx_w) -GEN_VEXT_VX(vmaxu_vx_d) -GEN_VEXT_VX(vmax_vx_b) -GEN_VEXT_VX(vmax_vx_h) -GEN_VEXT_VX(vmax_vx_w) -GEN_VEXT_VX(vmax_vx_d) +GEN_VEXT_VX(vminu_vx_b, 1) +GEN_VEXT_VX(vminu_vx_h, 2) +GEN_VEXT_VX(vminu_vx_w, 4) +GEN_VEXT_VX(vminu_vx_d, 8) +GEN_VEXT_VX(vmin_vx_b, 1) +GEN_VEXT_VX(vmin_vx_h, 2) +GEN_VEXT_VX(vmin_vx_w, 4) +GEN_VEXT_VX(vmin_vx_d, 8) +GEN_VEXT_VX(vmaxu_vx_b, 1) +GEN_VEXT_VX(vmaxu_vx_h, 2) +GEN_VEXT_VX(vmaxu_vx_w, 4) +GEN_VEXT_VX(vmaxu_vx_d, 8) +GEN_VEXT_VX(vmax_vx_b, 1) +GEN_VEXT_VX(vmax_vx_h, 2) +GEN_VEXT_VX(vmax_vx_w, 4) +GEN_VEXT_VX(vmax_vx_d, 8) /* Vector Single-Width Integer Multiply Instructions */ #define DO_MUL(N, M) (N * M) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) -GEN_VEXT_VX(vmul_vx_b) -GEN_VEXT_VX(vmul_vx_h) -GEN_VEXT_VX(vmul_vx_w) -GEN_VEXT_VX(vmul_vx_d) -GEN_VEXT_VX(vmulh_vx_b) -GEN_VEXT_VX(vmulh_vx_h) -GEN_VEXT_VX(vmulh_vx_w) -GEN_VEXT_VX(vmulh_vx_d) -GEN_VEXT_VX(vmulhu_vx_b) -GEN_VEXT_VX(vmulhu_vx_h) -GEN_VEXT_VX(vmulhu_vx_w) -GEN_VEXT_VX(vmulhu_vx_d) -GEN_VEXT_VX(vmulhsu_vx_b) -GEN_VEXT_VX(vmulhsu_vx_h) -GEN_VEXT_VX(vmulhsu_vx_w) -GEN_VEXT_VX(vmulhsu_vx_d) +GEN_VEXT_VX(vmul_vx_b, 1) +GEN_VEXT_VX(vmul_vx_h, 2) +GEN_VEXT_VX(vmul_vx_w, 4) +GEN_VEXT_VX(vmul_vx_d, 8) +GEN_VEXT_VX(vmulh_vx_b, 1) +GEN_VEXT_VX(vmulh_vx_h, 2) +GEN_VEXT_VX(vmulh_vx_w, 4) +GEN_VEXT_VX(vmulh_vx_d, 8) +GEN_VEXT_VX(vmulhu_vx_b, 1) +GEN_VEXT_VX(vmulhu_vx_h, 2) +GEN_VEXT_VX(vmulhu_vx_w, 4) +GEN_VEXT_VX(vmulhu_vx_d, 8) +GEN_VEXT_VX(vmulhsu_vx_b, 1) +GEN_VEXT_VX(vmulhsu_vx_h, 2) +GEN_VEXT_VX(vmulhsu_vx_w, 4) +GEN_VEXT_VX(vmulhsu_vx_d, 8) /* Vector Integer Divide Instructions */ #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) -GEN_VEXT_VX(vdivu_vx_b) -GEN_VEXT_VX(vdivu_vx_h) -GEN_VEXT_VX(vdivu_vx_w) -GEN_VEXT_VX(vdivu_vx_d) -GEN_VEXT_VX(vdiv_vx_b) -GEN_VEXT_VX(vdiv_vx_h) -GEN_VEXT_VX(vdiv_vx_w) -GEN_VEXT_VX(vdiv_vx_d) -GEN_VEXT_VX(vremu_vx_b) -GEN_VEXT_VX(vremu_vx_h) -GEN_VEXT_VX(vremu_vx_w) -GEN_VEXT_VX(vremu_vx_d) -GEN_VEXT_VX(vrem_vx_b) -GEN_VEXT_VX(vrem_vx_h) -GEN_VEXT_VX(vrem_vx_w) -GEN_VEXT_VX(vrem_vx_d) +GEN_VEXT_VX(vdivu_vx_b, 1) +GEN_VEXT_VX(vdivu_vx_h, 2) +GEN_VEXT_VX(vdivu_vx_w, 4) +GEN_VEXT_VX(vdivu_vx_d, 8) +GEN_VEXT_VX(vdiv_vx_b, 1) +GEN_VEXT_VX(vdiv_vx_h, 2) +GEN_VEXT_VX(vdiv_vx_w, 4) +GEN_VEXT_VX(vdiv_vx_d, 8) +GEN_VEXT_VX(vremu_vx_b, 1) +GEN_VEXT_VX(vremu_vx_h, 2) +GEN_VEXT_VX(vremu_vx_w, 4) +GEN_VEXT_VX(vremu_vx_d, 8) +GEN_VEXT_VX(vrem_vx_b, 1) +GEN_VEXT_VX(vrem_vx_h, 2) +GEN_VEXT_VX(vrem_vx_w, 4) +GEN_VEXT_VX(vrem_vx_d, 8) /* Vector Widening Integer Multiply Instructions */ RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) -GEN_VEXT_VX(vwmul_vx_b) -GEN_VEXT_VX(vwmul_vx_h) -GEN_VEXT_VX(vwmul_vx_w) -GEN_VEXT_VX(vwmulu_vx_b) -GEN_VEXT_VX(vwmulu_vx_h) -GEN_VEXT_VX(vwmulu_vx_w) -GEN_VEXT_VX(vwmulsu_vx_b) -GEN_VEXT_VX(vwmulsu_vx_h) -GEN_VEXT_VX(vwmulsu_vx_w) +GEN_VEXT_VX(vwmul_vx_b, 2) +GEN_VEXT_VX(vwmul_vx_h, 4) +GEN_VEXT_VX(vwmul_vx_w, 8) +GEN_VEXT_VX(vwmulu_vx_b, 2) +GEN_VEXT_VX(vwmulu_vx_h, 4) +GEN_VEXT_VX(vwmulu_vx_w, 8) +GEN_VEXT_VX(vwmulsu_vx_b, 2) +GEN_VEXT_VX(vwmulsu_vx_h, 4) +GEN_VEXT_VX(vwmulsu_vx_w, 8) /* Vector Single-Width Integer Multiply-Add Instructions */ #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) -GEN_VEXT_VX(vmacc_vx_b) -GEN_VEXT_VX(vmacc_vx_h) -GEN_VEXT_VX(vmacc_vx_w) -GEN_VEXT_VX(vmacc_vx_d) -GEN_VEXT_VX(vnmsac_vx_b) -GEN_VEXT_VX(vnmsac_vx_h) -GEN_VEXT_VX(vnmsac_vx_w) -GEN_VEXT_VX(vnmsac_vx_d) -GEN_VEXT_VX(vmadd_vx_b) -GEN_VEXT_VX(vmadd_vx_h) -GEN_VEXT_VX(vmadd_vx_w) -GEN_VEXT_VX(vmadd_vx_d) -GEN_VEXT_VX(vnmsub_vx_b) -GEN_VEXT_VX(vnmsub_vx_h) -GEN_VEXT_VX(vnmsub_vx_w) -GEN_VEXT_VX(vnmsub_vx_d) +GEN_VEXT_VX(vmacc_vx_b, 1) +GEN_VEXT_VX(vmacc_vx_h, 2) +GEN_VEXT_VX(vmacc_vx_w, 4) +GEN_VEXT_VX(vmacc_vx_d, 8) +GEN_VEXT_VX(vnmsac_vx_b, 1) +GEN_VEXT_VX(vnmsac_vx_h, 2) +GEN_VEXT_VX(vnmsac_vx_w, 4) +GEN_VEXT_VX(vnmsac_vx_d, 8) +GEN_VEXT_VX(vmadd_vx_b, 1) +GEN_VEXT_VX(vmadd_vx_h, 2) +GEN_VEXT_VX(vmadd_vx_w, 4) +GEN_VEXT_VX(vmadd_vx_d, 8) +GEN_VEXT_VX(vnmsub_vx_b, 1) +GEN_VEXT_VX(vnmsub_vx_h, 2) +GEN_VEXT_VX(vnmsub_vx_w, 4) +GEN_VEXT_VX(vnmsub_vx_d, 8) /* Vector Widening Integer Multiply-Add Instructions */ RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) -GEN_VEXT_VX(vwmaccu_vx_b) -GEN_VEXT_VX(vwmaccu_vx_h) -GEN_VEXT_VX(vwmaccu_vx_w) -GEN_VEXT_VX(vwmacc_vx_b) -GEN_VEXT_VX(vwmacc_vx_h) -GEN_VEXT_VX(vwmacc_vx_w) -GEN_VEXT_VX(vwmaccsu_vx_b) -GEN_VEXT_VX(vwmaccsu_vx_h) -GEN_VEXT_VX(vwmaccsu_vx_w) -GEN_VEXT_VX(vwmaccus_vx_b) -GEN_VEXT_VX(vwmaccus_vx_h) -GEN_VEXT_VX(vwmaccus_vx_w) +GEN_VEXT_VX(vwmaccu_vx_b, 2) +GEN_VEXT_VX(vwmaccu_vx_h, 4) +GEN_VEXT_VX(vwmaccu_vx_w, 8) +GEN_VEXT_VX(vwmacc_vx_b, 2) +GEN_VEXT_VX(vwmacc_vx_h, 4) +GEN_VEXT_VX(vwmacc_vx_w, 8) +GEN_VEXT_VX(vwmaccsu_vx_b, 2) +GEN_VEXT_VX(vwmaccsu_vx_h, 4) +GEN_VEXT_VX(vwmaccsu_vx_w, 8) +GEN_VEXT_VX(vwmaccus_vx_b, 2) +GEN_VEXT_VX(vwmaccus_vx_h, 4) +GEN_VEXT_VX(vwmaccus_vx_w, 8) /* Vector Integer Merge and Move Instructions */ #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, data = FIELD_DP32(data, VDATA, VM, vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); @@ -XXX,XX +XXX,XX @@ do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, return false; } - if (a->vm && s->vl_eq_vlmax) { + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { TCGv_i64 src1 = tcg_temp_new_i64(); tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN)); @@ -XXX,XX +XXX,XX @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, data = FIELD_DP32(data, VDATA, VM, vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); @@ -XXX,XX +XXX,XX @@ do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, return false; } - if (a->vm && s->vl_eq_vlmax) { + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s)); mark_vs_dirty(s); @@ -XXX,XX +XXX,XX @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), @@ -XXX,XX +XXX,XX @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = \ + FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-8@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/vector_helper.c | 11 +++++++++++ target/riscv/insn_trans/trans_rvv.c.inc | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TS1); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TD); \ + uint32_t total_elems = \ + vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ } GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, return false; } - if (a->vm && s->vl_eq_vlmax) { + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { TCGv_i32 src1 = tcg_temp_new_i32(); tcg_gen_trunc_tl_i32(src1, get_gpr(s, a->rs1, EXT_NONE)); @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> Compares write mask registers, and so always operate under a tail- agnostic policy. Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-9@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/vector_helper.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ } \ env->vstart = 0; \ + /* mask destination register are always tail-agnostic */ \ + /* set tail elements to 1s */ \ + if (vta_all_1s) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ DO_OP(s2, (ETYPE)(target_long)s1)); \ } \ env->vstart = 0; \ + /* mask destination register are always tail-agnostic */ \ + /* set tail elements to 1s */ \ + if (vta_all_1s) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-10@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/vector_helper.c | 20 ++++++++++++++++++++ target/riscv/insn_trans/trans_rvv.c.inc | 12 ++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ *((ETYPE *)vd + H(i)) = s1; \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ *((ETYPE *)vd + H(i)) = d; \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) vext_check_isa_ill(s) && /* vmv.v.v has rs2 = 0 and vm = 1 */ vext_check_sss(s, a->rd, a->rs1, 0, 1)) { - if (s->vl_eq_vlmax) { + if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), MAXSZ(s), MAXSZ(s)); } else { uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_gvec_2_ptr * const fns[4] = { gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) s1 = get_gpr(s, a->rs1, EXT_SIGN); - if (s->vl_eq_vlmax) { + if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), MAXSZ(s), MAXSZ(s), s1); } else { @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) TCGv_i64 s1_i64 = tcg_temp_new_i64(); TCGv_ptr dest = tcg_temp_new_ptr(); uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_vmv_vx * const fns[4] = { gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) /* vmv.v.i has rs2 = 0 and vm = 1 */ vext_check_ss(s, a->rd, 0, 1)) { int64_t simm = sextract64(a->rs1, 0, 5); - if (s->vl_eq_vlmax) { + if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd), MAXSZ(s), MAXSZ(s), simm); mark_vs_dirty(s); @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) TCGv_i64 s1; TCGv_ptr dest; uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_vmv_vx * const fns[4] = { gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) TCGv_i64 t1; - if (s->vl_eq_vlmax) { + if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { t1 = tcg_temp_new_i64(); /* NaN-box f[rs1] */ do_nanbox(s, t1, cpu_fpr[a->rs1]); @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) TCGv_ptr dest; TCGv_i32 desc; uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_vmv_vx * const fns[3] = { gen_helper_vmv_v_x_h, gen_helper_vmv_v_x_w, -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-11@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/vector_helper.c | 220 ++++++++++++++++++----------------- 1 file changed, 114 insertions(+), 106 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ static inline void vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, uint32_t desc, - opivv2_rm_fn *fn) + opivv2_rm_fn *fn, uint32_t esz) { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); switch (env->vxrm) { case 0: /* rnu */ @@ -XXX,XX +XXX,XX @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, env, vl, vm, 3, fn); break; } + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); } /* generate helpers for fixed point instructions with OPIVV format */ -#define GEN_VEXT_VV_RM(NAME) \ +#define GEN_VEXT_VV_RM(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ - do_##NAME); \ + do_##NAME, ESZ); \ } static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) -GEN_VEXT_VV_RM(vsaddu_vv_b) -GEN_VEXT_VV_RM(vsaddu_vv_h) -GEN_VEXT_VV_RM(vsaddu_vv_w) -GEN_VEXT_VV_RM(vsaddu_vv_d) +GEN_VEXT_VV_RM(vsaddu_vv_b, 1) +GEN_VEXT_VV_RM(vsaddu_vv_h, 2) +GEN_VEXT_VV_RM(vsaddu_vv_w, 4) +GEN_VEXT_VV_RM(vsaddu_vv_d, 8) typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, CPURISCVState *env, int vxrm); @@ -XXX,XX +XXX,XX @@ static inline void vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, uint32_t desc, - opivx2_rm_fn *fn) + opivx2_rm_fn *fn, uint32_t esz) { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); switch (env->vxrm) { case 0: /* rnu */ @@ -XXX,XX +XXX,XX @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, env, vl, vm, 3, fn); break; } + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); } /* generate helpers for fixed point instructions with OPIVX format */ -#define GEN_VEXT_VX_RM(NAME) \ +#define GEN_VEXT_VX_RM(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, uint32_t desc) \ { \ vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ - do_##NAME); \ + do_##NAME, ESZ); \ } RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) -GEN_VEXT_VX_RM(vsaddu_vx_b) -GEN_VEXT_VX_RM(vsaddu_vx_h) -GEN_VEXT_VX_RM(vsaddu_vx_w) -GEN_VEXT_VX_RM(vsaddu_vx_d) +GEN_VEXT_VX_RM(vsaddu_vx_b, 1) +GEN_VEXT_VX_RM(vsaddu_vx_h, 2) +GEN_VEXT_VX_RM(vsaddu_vx_w, 4) +GEN_VEXT_VX_RM(vsaddu_vx_d, 8) static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) { @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) -GEN_VEXT_VV_RM(vsadd_vv_b) -GEN_VEXT_VV_RM(vsadd_vv_h) -GEN_VEXT_VV_RM(vsadd_vv_w) -GEN_VEXT_VV_RM(vsadd_vv_d) +GEN_VEXT_VV_RM(vsadd_vv_b, 1) +GEN_VEXT_VV_RM(vsadd_vv_h, 2) +GEN_VEXT_VV_RM(vsadd_vv_w, 4) +GEN_VEXT_VV_RM(vsadd_vv_d, 8) RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) -GEN_VEXT_VX_RM(vsadd_vx_b) -GEN_VEXT_VX_RM(vsadd_vx_h) -GEN_VEXT_VX_RM(vsadd_vx_w) -GEN_VEXT_VX_RM(vsadd_vx_d) +GEN_VEXT_VX_RM(vsadd_vx_b, 1) +GEN_VEXT_VX_RM(vsadd_vx_h, 2) +GEN_VEXT_VX_RM(vsadd_vx_w, 4) +GEN_VEXT_VX_RM(vsadd_vx_d, 8) static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) { @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) -GEN_VEXT_VV_RM(vssubu_vv_b) -GEN_VEXT_VV_RM(vssubu_vv_h) -GEN_VEXT_VV_RM(vssubu_vv_w) -GEN_VEXT_VV_RM(vssubu_vv_d) +GEN_VEXT_VV_RM(vssubu_vv_b, 1) +GEN_VEXT_VV_RM(vssubu_vv_h, 2) +GEN_VEXT_VV_RM(vssubu_vv_w, 4) +GEN_VEXT_VV_RM(vssubu_vv_d, 8) RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) -GEN_VEXT_VX_RM(vssubu_vx_b) -GEN_VEXT_VX_RM(vssubu_vx_h) -GEN_VEXT_VX_RM(vssubu_vx_w) -GEN_VEXT_VX_RM(vssubu_vx_d) +GEN_VEXT_VX_RM(vssubu_vx_b, 1) +GEN_VEXT_VX_RM(vssubu_vx_h, 2) +GEN_VEXT_VX_RM(vssubu_vx_w, 4) +GEN_VEXT_VX_RM(vssubu_vx_d, 8) static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) { @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) -GEN_VEXT_VV_RM(vssub_vv_b) -GEN_VEXT_VV_RM(vssub_vv_h) -GEN_VEXT_VV_RM(vssub_vv_w) -GEN_VEXT_VV_RM(vssub_vv_d) +GEN_VEXT_VV_RM(vssub_vv_b, 1) +GEN_VEXT_VV_RM(vssub_vv_h, 2) +GEN_VEXT_VV_RM(vssub_vv_w, 4) +GEN_VEXT_VV_RM(vssub_vv_d, 8) RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) -GEN_VEXT_VX_RM(vssub_vx_b) -GEN_VEXT_VX_RM(vssub_vx_h) -GEN_VEXT_VX_RM(vssub_vx_w) -GEN_VEXT_VX_RM(vssub_vx_d) +GEN_VEXT_VX_RM(vssub_vx_b, 1) +GEN_VEXT_VX_RM(vssub_vx_h, 2) +GEN_VEXT_VX_RM(vssub_vx_w, 4) +GEN_VEXT_VX_RM(vssub_vx_d, 8) /* Vector Single-Width Averaging Add and Subtract */ static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) -GEN_VEXT_VV_RM(vaadd_vv_b) -GEN_VEXT_VV_RM(vaadd_vv_h) -GEN_VEXT_VV_RM(vaadd_vv_w) -GEN_VEXT_VV_RM(vaadd_vv_d) +GEN_VEXT_VV_RM(vaadd_vv_b, 1) +GEN_VEXT_VV_RM(vaadd_vv_h, 2) +GEN_VEXT_VV_RM(vaadd_vv_w, 4) +GEN_VEXT_VV_RM(vaadd_vv_d, 8) RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) -GEN_VEXT_VX_RM(vaadd_vx_b) -GEN_VEXT_VX_RM(vaadd_vx_h) -GEN_VEXT_VX_RM(vaadd_vx_w) -GEN_VEXT_VX_RM(vaadd_vx_d) +GEN_VEXT_VX_RM(vaadd_vx_b, 1) +GEN_VEXT_VX_RM(vaadd_vx_h, 2) +GEN_VEXT_VX_RM(vaadd_vx_w, 4) +GEN_VEXT_VX_RM(vaadd_vx_d, 8) static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) -GEN_VEXT_VV_RM(vaaddu_vv_b) -GEN_VEXT_VV_RM(vaaddu_vv_h) -GEN_VEXT_VV_RM(vaaddu_vv_w) -GEN_VEXT_VV_RM(vaaddu_vv_d) +GEN_VEXT_VV_RM(vaaddu_vv_b, 1) +GEN_VEXT_VV_RM(vaaddu_vv_h, 2) +GEN_VEXT_VV_RM(vaaddu_vv_w, 4) +GEN_VEXT_VV_RM(vaaddu_vv_d, 8) RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) -GEN_VEXT_VX_RM(vaaddu_vx_b) -GEN_VEXT_VX_RM(vaaddu_vx_h) -GEN_VEXT_VX_RM(vaaddu_vx_w) -GEN_VEXT_VX_RM(vaaddu_vx_d) +GEN_VEXT_VX_RM(vaaddu_vx_b, 1) +GEN_VEXT_VX_RM(vaaddu_vx_h, 2) +GEN_VEXT_VX_RM(vaaddu_vx_w, 4) +GEN_VEXT_VX_RM(vaaddu_vx_d, 8) static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) { @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) -GEN_VEXT_VV_RM(vasub_vv_b) -GEN_VEXT_VV_RM(vasub_vv_h) -GEN_VEXT_VV_RM(vasub_vv_w) -GEN_VEXT_VV_RM(vasub_vv_d) +GEN_VEXT_VV_RM(vasub_vv_b, 1) +GEN_VEXT_VV_RM(vasub_vv_h, 2) +GEN_VEXT_VV_RM(vasub_vv_w, 4) +GEN_VEXT_VV_RM(vasub_vv_d, 8) RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) -GEN_VEXT_VX_RM(vasub_vx_b) -GEN_VEXT_VX_RM(vasub_vx_h) -GEN_VEXT_VX_RM(vasub_vx_w) -GEN_VEXT_VX_RM(vasub_vx_d) +GEN_VEXT_VX_RM(vasub_vx_b, 1) +GEN_VEXT_VX_RM(vasub_vx_h, 2) +GEN_VEXT_VX_RM(vasub_vx_w, 4) +GEN_VEXT_VX_RM(vasub_vx_d, 8) static inline uint32_t asubu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) -GEN_VEXT_VV_RM(vasubu_vv_b) -GEN_VEXT_VV_RM(vasubu_vv_h) -GEN_VEXT_VV_RM(vasubu_vv_w) -GEN_VEXT_VV_RM(vasubu_vv_d) +GEN_VEXT_VV_RM(vasubu_vv_b, 1) +GEN_VEXT_VV_RM(vasubu_vv_h, 2) +GEN_VEXT_VV_RM(vasubu_vv_w, 4) +GEN_VEXT_VV_RM(vasubu_vv_d, 8) RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) -GEN_VEXT_VX_RM(vasubu_vx_b) -GEN_VEXT_VX_RM(vasubu_vx_h) -GEN_VEXT_VX_RM(vasubu_vx_w) -GEN_VEXT_VX_RM(vasubu_vx_d) +GEN_VEXT_VX_RM(vasubu_vx_b, 1) +GEN_VEXT_VX_RM(vasubu_vx_h, 2) +GEN_VEXT_VX_RM(vasubu_vx_w, 4) +GEN_VEXT_VX_RM(vasubu_vx_d, 8) /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) -GEN_VEXT_VV_RM(vsmul_vv_b) -GEN_VEXT_VV_RM(vsmul_vv_h) -GEN_VEXT_VV_RM(vsmul_vv_w) -GEN_VEXT_VV_RM(vsmul_vv_d) +GEN_VEXT_VV_RM(vsmul_vv_b, 1) +GEN_VEXT_VV_RM(vsmul_vv_h, 2) +GEN_VEXT_VV_RM(vsmul_vv_w, 4) +GEN_VEXT_VV_RM(vsmul_vv_d, 8) RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) -GEN_VEXT_VX_RM(vsmul_vx_b) -GEN_VEXT_VX_RM(vsmul_vx_h) -GEN_VEXT_VX_RM(vsmul_vx_w) -GEN_VEXT_VX_RM(vsmul_vx_d) +GEN_VEXT_VX_RM(vsmul_vx_b, 1) +GEN_VEXT_VX_RM(vsmul_vx_h, 2) +GEN_VEXT_VX_RM(vsmul_vx_w, 4) +GEN_VEXT_VX_RM(vsmul_vx_d, 8) /* Vector Single-Width Scaling Shift Instructions */ static inline uint8_t @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) -GEN_VEXT_VV_RM(vssrl_vv_b) -GEN_VEXT_VV_RM(vssrl_vv_h) -GEN_VEXT_VV_RM(vssrl_vv_w) -GEN_VEXT_VV_RM(vssrl_vv_d) +GEN_VEXT_VV_RM(vssrl_vv_b, 1) +GEN_VEXT_VV_RM(vssrl_vv_h, 2) +GEN_VEXT_VV_RM(vssrl_vv_w, 4) +GEN_VEXT_VV_RM(vssrl_vv_d, 8) RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) -GEN_VEXT_VX_RM(vssrl_vx_b) -GEN_VEXT_VX_RM(vssrl_vx_h) -GEN_VEXT_VX_RM(vssrl_vx_w) -GEN_VEXT_VX_RM(vssrl_vx_d) +GEN_VEXT_VX_RM(vssrl_vx_b, 1) +GEN_VEXT_VX_RM(vssrl_vx_h, 2) +GEN_VEXT_VX_RM(vssrl_vx_w, 4) +GEN_VEXT_VX_RM(vssrl_vx_d, 8) static inline int8_t vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) -GEN_VEXT_VV_RM(vssra_vv_b) -GEN_VEXT_VV_RM(vssra_vv_h) -GEN_VEXT_VV_RM(vssra_vv_w) -GEN_VEXT_VV_RM(vssra_vv_d) +GEN_VEXT_VV_RM(vssra_vv_b, 1) +GEN_VEXT_VV_RM(vssra_vv_h, 2) +GEN_VEXT_VV_RM(vssra_vv_w, 4) +GEN_VEXT_VV_RM(vssra_vv_d, 8) RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) -GEN_VEXT_VX_RM(vssra_vx_b) -GEN_VEXT_VX_RM(vssra_vx_h) -GEN_VEXT_VX_RM(vssra_vx_w) -GEN_VEXT_VX_RM(vssra_vx_d) +GEN_VEXT_VX_RM(vssra_vx_b, 1) +GEN_VEXT_VX_RM(vssra_vx_h, 2) +GEN_VEXT_VX_RM(vssra_vx_w, 4) +GEN_VEXT_VX_RM(vssra_vx_d, 8) /* Vector Narrowing Fixed-Point Clip Instructions */ static inline int8_t @@ -XXX,XX +XXX,XX @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) -GEN_VEXT_VV_RM(vnclip_wv_b) -GEN_VEXT_VV_RM(vnclip_wv_h) -GEN_VEXT_VV_RM(vnclip_wv_w) +GEN_VEXT_VV_RM(vnclip_wv_b, 1) +GEN_VEXT_VV_RM(vnclip_wv_h, 2) +GEN_VEXT_VV_RM(vnclip_wv_w, 4) RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) -GEN_VEXT_VX_RM(vnclip_wx_b) -GEN_VEXT_VX_RM(vnclip_wx_h) -GEN_VEXT_VX_RM(vnclip_wx_w) +GEN_VEXT_VX_RM(vnclip_wx_b, 1) +GEN_VEXT_VX_RM(vnclip_wx_h, 2) +GEN_VEXT_VX_RM(vnclip_wx_w, 4) static inline uint8_t vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) @@ -XXX,XX +XXX,XX @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) -GEN_VEXT_VV_RM(vnclipu_wv_b) -GEN_VEXT_VV_RM(vnclipu_wv_h) -GEN_VEXT_VV_RM(vnclipu_wv_w) +GEN_VEXT_VV_RM(vnclipu_wv_b, 1) +GEN_VEXT_VV_RM(vnclipu_wv_h, 2) +GEN_VEXT_VV_RM(vnclipu_wv_w, 4) RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) -GEN_VEXT_VX_RM(vnclipu_wx_b) -GEN_VEXT_VX_RM(vnclipu_wx_h) -GEN_VEXT_VX_RM(vnclipu_wx_w) +GEN_VEXT_VX_RM(vnclipu_wx_b, 1) +GEN_VEXT_VX_RM(vnclipu_wx_h, 2) +GEN_VEXT_VX_RM(vnclipu_wx_w, 4) /* *** Vector Float Point Arithmetic Instructions -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> Compares write mask registers, and so always operate under a tail- agnostic policy. Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-12@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/vector_helper.c | 440 +++++++++++++----------- target/riscv/insn_trans/trans_rvv.c.inc | 17 + 2 files changed, 261 insertions(+), 196 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ } -#define GEN_VEXT_VV_ENV(NAME) \ +#define GEN_VEXT_VV_ENV(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t total_elems = \ + vext_get_total_elems(env, desc, ESZ); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ do_##NAME(vd, vs1, vs2, i, env); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * ESZ, \ + total_elems * ESZ); \ } RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) -GEN_VEXT_VV_ENV(vfadd_vv_h) -GEN_VEXT_VV_ENV(vfadd_vv_w) -GEN_VEXT_VV_ENV(vfadd_vv_d) +GEN_VEXT_VV_ENV(vfadd_vv_h, 2) +GEN_VEXT_VV_ENV(vfadd_vv_w, 4) +GEN_VEXT_VV_ENV(vfadd_vv_d, 8) #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ } -#define GEN_VEXT_VF(NAME) \ +#define GEN_VEXT_VF(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t total_elems = \ + vext_get_total_elems(env, desc, ESZ); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ do_##NAME(vd, s1, vs2, i, env); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * ESZ, \ + total_elems * ESZ); \ } RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) -GEN_VEXT_VF(vfadd_vf_h) -GEN_VEXT_VF(vfadd_vf_w) -GEN_VEXT_VF(vfadd_vf_d) +GEN_VEXT_VF(vfadd_vf_h, 2) +GEN_VEXT_VF(vfadd_vf_w, 4) +GEN_VEXT_VF(vfadd_vf_d, 8) RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) -GEN_VEXT_VV_ENV(vfsub_vv_h) -GEN_VEXT_VV_ENV(vfsub_vv_w) -GEN_VEXT_VV_ENV(vfsub_vv_d) +GEN_VEXT_VV_ENV(vfsub_vv_h, 2) +GEN_VEXT_VV_ENV(vfsub_vv_w, 4) +GEN_VEXT_VV_ENV(vfsub_vv_d, 8) RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) -GEN_VEXT_VF(vfsub_vf_h) -GEN_VEXT_VF(vfsub_vf_w) -GEN_VEXT_VF(vfsub_vf_d) +GEN_VEXT_VF(vfsub_vf_h, 2) +GEN_VEXT_VF(vfsub_vf_w, 4) +GEN_VEXT_VF(vfsub_vf_d, 8) static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) -GEN_VEXT_VF(vfrsub_vf_h) -GEN_VEXT_VF(vfrsub_vf_w) -GEN_VEXT_VF(vfrsub_vf_d) +GEN_VEXT_VF(vfrsub_vf_h, 2) +GEN_VEXT_VF(vfrsub_vf_w, 4) +GEN_VEXT_VF(vfrsub_vf_d, 8) /* Vector Widening Floating-Point Add/Subtract Instructions */ static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) @@ -XXX,XX +XXX,XX @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) -GEN_VEXT_VV_ENV(vfwadd_vv_h) -GEN_VEXT_VV_ENV(vfwadd_vv_w) +GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) +GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) -GEN_VEXT_VF(vfwadd_vf_h) -GEN_VEXT_VF(vfwadd_vf_w) +GEN_VEXT_VF(vfwadd_vf_h, 4) +GEN_VEXT_VF(vfwadd_vf_w, 8) static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) -GEN_VEXT_VV_ENV(vfwsub_vv_h) -GEN_VEXT_VV_ENV(vfwsub_vv_w) +GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) +GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) -GEN_VEXT_VF(vfwsub_vf_h) -GEN_VEXT_VF(vfwsub_vf_w) +GEN_VEXT_VF(vfwsub_vf_h, 4) +GEN_VEXT_VF(vfwsub_vf_w, 8) static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) -GEN_VEXT_VV_ENV(vfwadd_wv_h) -GEN_VEXT_VV_ENV(vfwadd_wv_w) +GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) +GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) -GEN_VEXT_VF(vfwadd_wf_h) -GEN_VEXT_VF(vfwadd_wf_w) +GEN_VEXT_VF(vfwadd_wf_h, 4) +GEN_VEXT_VF(vfwadd_wf_w, 8) static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) -GEN_VEXT_VV_ENV(vfwsub_wv_h) -GEN_VEXT_VV_ENV(vfwsub_wv_w) +GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) +GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) -GEN_VEXT_VF(vfwsub_wf_h) -GEN_VEXT_VF(vfwsub_wf_w) +GEN_VEXT_VF(vfwsub_wf_h, 4) +GEN_VEXT_VF(vfwsub_wf_w, 8) /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) -GEN_VEXT_VV_ENV(vfmul_vv_h) -GEN_VEXT_VV_ENV(vfmul_vv_w) -GEN_VEXT_VV_ENV(vfmul_vv_d) +GEN_VEXT_VV_ENV(vfmul_vv_h, 2) +GEN_VEXT_VV_ENV(vfmul_vv_w, 4) +GEN_VEXT_VV_ENV(vfmul_vv_d, 8) RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) -GEN_VEXT_VF(vfmul_vf_h) -GEN_VEXT_VF(vfmul_vf_w) -GEN_VEXT_VF(vfmul_vf_d) +GEN_VEXT_VF(vfmul_vf_h, 2) +GEN_VEXT_VF(vfmul_vf_w, 4) +GEN_VEXT_VF(vfmul_vf_d, 8) RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) -GEN_VEXT_VV_ENV(vfdiv_vv_h) -GEN_VEXT_VV_ENV(vfdiv_vv_w) -GEN_VEXT_VV_ENV(vfdiv_vv_d) +GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) +GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) +GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) -GEN_VEXT_VF(vfdiv_vf_h) -GEN_VEXT_VF(vfdiv_vf_w) -GEN_VEXT_VF(vfdiv_vf_d) +GEN_VEXT_VF(vfdiv_vf_h, 2) +GEN_VEXT_VF(vfdiv_vf_w, 4) +GEN_VEXT_VF(vfdiv_vf_d, 8) static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) -GEN_VEXT_VF(vfrdiv_vf_h) -GEN_VEXT_VF(vfrdiv_vf_w) -GEN_VEXT_VF(vfrdiv_vf_d) +GEN_VEXT_VF(vfrdiv_vf_h, 2) +GEN_VEXT_VF(vfrdiv_vf_w, 4) +GEN_VEXT_VF(vfrdiv_vf_d, 8) /* Vector Widening Floating-Point Multiply */ static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) @@ -XXX,XX +XXX,XX @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) } RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) -GEN_VEXT_VV_ENV(vfwmul_vv_h) -GEN_VEXT_VV_ENV(vfwmul_vv_w) +GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) +GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) -GEN_VEXT_VF(vfwmul_vf_h) -GEN_VEXT_VF(vfwmul_vf_w) +GEN_VEXT_VF(vfwmul_vf_h, 4) +GEN_VEXT_VF(vfwmul_vf_w, 8) /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -XXX,XX +XXX,XX @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) -GEN_VEXT_VV_ENV(vfmacc_vv_h) -GEN_VEXT_VV_ENV(vfmacc_vv_w) -GEN_VEXT_VV_ENV(vfmacc_vv_d) +GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) +GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) +GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) -GEN_VEXT_VF(vfmacc_vf_h) -GEN_VEXT_VF(vfmacc_vf_w) -GEN_VEXT_VF(vfmacc_vf_d) +GEN_VEXT_VF(vfmacc_vf_h, 2) +GEN_VEXT_VF(vfmacc_vf_w, 4) +GEN_VEXT_VF(vfmacc_vf_d, 8) static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) -GEN_VEXT_VV_ENV(vfnmacc_vv_h) -GEN_VEXT_VV_ENV(vfnmacc_vv_w) -GEN_VEXT_VV_ENV(vfnmacc_vv_d) +GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) +GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) +GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) -GEN_VEXT_VF(vfnmacc_vf_h) -GEN_VEXT_VF(vfnmacc_vf_w) -GEN_VEXT_VF(vfnmacc_vf_d) +GEN_VEXT_VF(vfnmacc_vf_h, 2) +GEN_VEXT_VF(vfnmacc_vf_w, 4) +GEN_VEXT_VF(vfnmacc_vf_d, 8) static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) -GEN_VEXT_VV_ENV(vfmsac_vv_h) -GEN_VEXT_VV_ENV(vfmsac_vv_w) -GEN_VEXT_VV_ENV(vfmsac_vv_d) +GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) +GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) +GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) -GEN_VEXT_VF(vfmsac_vf_h) -GEN_VEXT_VF(vfmsac_vf_w) -GEN_VEXT_VF(vfmsac_vf_d) +GEN_VEXT_VF(vfmsac_vf_h, 2) +GEN_VEXT_VF(vfmsac_vf_w, 4) +GEN_VEXT_VF(vfmsac_vf_d, 8) static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) -GEN_VEXT_VV_ENV(vfnmsac_vv_h) -GEN_VEXT_VV_ENV(vfnmsac_vv_w) -GEN_VEXT_VV_ENV(vfnmsac_vv_d) +GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) +GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) +GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) -GEN_VEXT_VF(vfnmsac_vf_h) -GEN_VEXT_VF(vfnmsac_vf_w) -GEN_VEXT_VF(vfnmsac_vf_d) +GEN_VEXT_VF(vfnmsac_vf_h, 2) +GEN_VEXT_VF(vfnmsac_vf_w, 4) +GEN_VEXT_VF(vfnmsac_vf_d, 8) static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) -GEN_VEXT_VV_ENV(vfmadd_vv_h) -GEN_VEXT_VV_ENV(vfmadd_vv_w) -GEN_VEXT_VV_ENV(vfmadd_vv_d) +GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) +GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) +GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) -GEN_VEXT_VF(vfmadd_vf_h) -GEN_VEXT_VF(vfmadd_vf_w) -GEN_VEXT_VF(vfmadd_vf_d) +GEN_VEXT_VF(vfmadd_vf_h, 2) +GEN_VEXT_VF(vfmadd_vf_w, 4) +GEN_VEXT_VF(vfmadd_vf_d, 8) static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) -GEN_VEXT_VV_ENV(vfnmadd_vv_h) -GEN_VEXT_VV_ENV(vfnmadd_vv_w) -GEN_VEXT_VV_ENV(vfnmadd_vv_d) +GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) +GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) +GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) -GEN_VEXT_VF(vfnmadd_vf_h) -GEN_VEXT_VF(vfnmadd_vf_w) -GEN_VEXT_VF(vfnmadd_vf_d) +GEN_VEXT_VF(vfnmadd_vf_h, 2) +GEN_VEXT_VF(vfnmadd_vf_w, 4) +GEN_VEXT_VF(vfnmadd_vf_d, 8) static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) -GEN_VEXT_VV_ENV(vfmsub_vv_h) -GEN_VEXT_VV_ENV(vfmsub_vv_w) -GEN_VEXT_VV_ENV(vfmsub_vv_d) +GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) +GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) +GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) -GEN_VEXT_VF(vfmsub_vf_h) -GEN_VEXT_VF(vfmsub_vf_w) -GEN_VEXT_VF(vfmsub_vf_d) +GEN_VEXT_VF(vfmsub_vf_h, 2) +GEN_VEXT_VF(vfmsub_vf_w, 4) +GEN_VEXT_VF(vfmsub_vf_d, 8) static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) -GEN_VEXT_VV_ENV(vfnmsub_vv_h) -GEN_VEXT_VV_ENV(vfnmsub_vv_w) -GEN_VEXT_VV_ENV(vfnmsub_vv_d) +GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) +GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) +GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) -GEN_VEXT_VF(vfnmsub_vf_h) -GEN_VEXT_VF(vfnmsub_vf_w) -GEN_VEXT_VF(vfnmsub_vf_d) +GEN_VEXT_VF(vfnmsub_vf_h, 2) +GEN_VEXT_VF(vfnmsub_vf_w, 4) +GEN_VEXT_VF(vfnmsub_vf_d, 8) /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) @@ -XXX,XX +XXX,XX @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) -GEN_VEXT_VV_ENV(vfwmacc_vv_h) -GEN_VEXT_VV_ENV(vfwmacc_vv_w) +GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) +GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) -GEN_VEXT_VF(vfwmacc_vf_h) -GEN_VEXT_VF(vfwmacc_vf_w) +GEN_VEXT_VF(vfwmacc_vf_h, 4) +GEN_VEXT_VF(vfwmacc_vf_w, 8) static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) -GEN_VEXT_VV_ENV(vfwnmacc_vv_h) -GEN_VEXT_VV_ENV(vfwnmacc_vv_w) +GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) +GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) -GEN_VEXT_VF(vfwnmacc_vf_h) -GEN_VEXT_VF(vfwnmacc_vf_w) +GEN_VEXT_VF(vfwnmacc_vf_h, 4) +GEN_VEXT_VF(vfwnmacc_vf_w, 8) static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) -GEN_VEXT_VV_ENV(vfwmsac_vv_h) -GEN_VEXT_VV_ENV(vfwmsac_vv_w) +GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) +GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) -GEN_VEXT_VF(vfwmsac_vf_h) -GEN_VEXT_VF(vfwmsac_vf_w) +GEN_VEXT_VF(vfwmsac_vf_h, 4) +GEN_VEXT_VF(vfwmsac_vf_w, 8) static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) -GEN_VEXT_VV_ENV(vfwnmsac_vv_h) -GEN_VEXT_VV_ENV(vfwnmsac_vv_w) +GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) +GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) -GEN_VEXT_VF(vfwnmsac_vf_h) -GEN_VEXT_VF(vfwnmsac_vf_w) +GEN_VEXT_VF(vfwnmsac_vf_h, 4) +GEN_VEXT_VF(vfwnmsac_vf_w, 8) /* Vector Floating-Point Square-Root Instruction */ /* (TD, T2, TX2) */ @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i, \ *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ } -#define GEN_VEXT_V_ENV(NAME) \ +#define GEN_VEXT_V_ENV(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t total_elems = \ + vext_get_total_elems(env, desc, ESZ); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ if (vl == 0) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ do_##NAME(vd, vs2, i, env); \ } \ env->vstart = 0; \ + vext_set_elems_1s(vd, vta, vl * ESZ, \ + total_elems * ESZ); \ } RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) -GEN_VEXT_V_ENV(vfsqrt_v_h) -GEN_VEXT_V_ENV(vfsqrt_v_w) -GEN_VEXT_V_ENV(vfsqrt_v_d) +GEN_VEXT_V_ENV(vfsqrt_v_h, 2) +GEN_VEXT_V_ENV(vfsqrt_v_w, 4) +GEN_VEXT_V_ENV(vfsqrt_v_d, 8) /* * Vector Floating-Point Reciprocal Square-Root Estimate Instruction @@ -XXX,XX +XXX,XX @@ static float64 frsqrt7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) -GEN_VEXT_V_ENV(vfrsqrt7_v_h) -GEN_VEXT_V_ENV(vfrsqrt7_v_w) -GEN_VEXT_V_ENV(vfrsqrt7_v_d) +GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) +GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) +GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) /* * Vector Floating-Point Reciprocal Estimate Instruction @@ -XXX,XX +XXX,XX @@ static float64 frec7_d(float64 f, float_status *s) RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) -GEN_VEXT_V_ENV(vfrec7_v_h) -GEN_VEXT_V_ENV(vfrec7_v_w) -GEN_VEXT_V_ENV(vfrec7_v_d) +GEN_VEXT_V_ENV(vfrec7_v_h, 2) +GEN_VEXT_V_ENV(vfrec7_v_w, 4) +GEN_VEXT_V_ENV(vfrec7_v_d, 8) /* Vector Floating-Point MIN/MAX Instructions */ RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) -GEN_VEXT_VV_ENV(vfmin_vv_h) -GEN_VEXT_VV_ENV(vfmin_vv_w) -GEN_VEXT_VV_ENV(vfmin_vv_d) +GEN_VEXT_VV_ENV(vfmin_vv_h, 2) +GEN_VEXT_VV_ENV(vfmin_vv_w, 4) +GEN_VEXT_VV_ENV(vfmin_vv_d, 8) RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) -GEN_VEXT_VF(vfmin_vf_h) -GEN_VEXT_VF(vfmin_vf_w) -GEN_VEXT_VF(vfmin_vf_d) +GEN_VEXT_VF(vfmin_vf_h, 2) +GEN_VEXT_VF(vfmin_vf_w, 4) +GEN_VEXT_VF(vfmin_vf_d, 8) RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) -GEN_VEXT_VV_ENV(vfmax_vv_h) -GEN_VEXT_VV_ENV(vfmax_vv_w) -GEN_VEXT_VV_ENV(vfmax_vv_d) +GEN_VEXT_VV_ENV(vfmax_vv_h, 2) +GEN_VEXT_VV_ENV(vfmax_vv_w, 4) +GEN_VEXT_VV_ENV(vfmax_vv_d, 8) RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) -GEN_VEXT_VF(vfmax_vf_h) -GEN_VEXT_VF(vfmax_vf_w) -GEN_VEXT_VF(vfmax_vf_d) +GEN_VEXT_VF(vfmax_vf_h, 2) +GEN_VEXT_VF(vfmax_vf_w, 4) +GEN_VEXT_VF(vfmax_vf_d, 8) /* Vector Floating-Point Sign-Injection Instructions */ static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) -GEN_VEXT_VV_ENV(vfsgnj_vv_h) -GEN_VEXT_VV_ENV(vfsgnj_vv_w) -GEN_VEXT_VV_ENV(vfsgnj_vv_d) +GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) +GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) +GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) -GEN_VEXT_VF(vfsgnj_vf_h) -GEN_VEXT_VF(vfsgnj_vf_w) -GEN_VEXT_VF(vfsgnj_vf_d) +GEN_VEXT_VF(vfsgnj_vf_h, 2) +GEN_VEXT_VF(vfsgnj_vf_w, 4) +GEN_VEXT_VF(vfsgnj_vf_d, 8) static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) -GEN_VEXT_VV_ENV(vfsgnjn_vv_h) -GEN_VEXT_VV_ENV(vfsgnjn_vv_w) -GEN_VEXT_VV_ENV(vfsgnjn_vv_d) +GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) +GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) +GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) -GEN_VEXT_VF(vfsgnjn_vf_h) -GEN_VEXT_VF(vfsgnjn_vf_w) -GEN_VEXT_VF(vfsgnjn_vf_d) +GEN_VEXT_VF(vfsgnjn_vf_h, 2) +GEN_VEXT_VF(vfsgnjn_vf_w, 4) +GEN_VEXT_VF(vfsgnjn_vf_d, 8) static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) { @@ -XXX,XX +XXX,XX @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) -GEN_VEXT_VV_ENV(vfsgnjx_vv_h) -GEN_VEXT_VV_ENV(vfsgnjx_vv_w) -GEN_VEXT_VV_ENV(vfsgnjx_vv_d) +GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) +GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) +GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) -GEN_VEXT_VF(vfsgnjx_vf_h) -GEN_VEXT_VF(vfsgnjx_vf_w) -GEN_VEXT_VF(vfsgnjx_vf_d) +GEN_VEXT_VF(vfsgnjx_vf_h, 2) +GEN_VEXT_VF(vfsgnjx_vf_w, 4) +GEN_VEXT_VF(vfsgnjx_vf_d, 8) /* Vector Floating-Point Compare Instructions */ #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ DO_OP(s2, s1, &env->fp_status)); \ } \ env->vstart = 0; \ + /* mask destination register are always tail-agnostic */ \ + /* set tail elements to 1s */ \ + if (vta_all_1s) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ } \ env->vstart = 0; \ + /* mask destination register are always tail-agnostic */ \ + /* set tail elements to 1s */ \ + if (vta_all_1s) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) @@ -XXX,XX +XXX,XX @@ static void do_##NAME(void *vd, void *vs2, int i) \ *((TD *)vd + HD(i)) = OP(s2); \ } -#define GEN_VEXT_V(NAME) \ +#define GEN_VEXT_V(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t total_elems = \ + vext_get_total_elems(env, desc, ESZ); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ do_##NAME(vd, vs2, i); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * ESZ, \ + total_elems * ESZ); \ } target_ulong fclass_h(uint64_t frs1) @@ -XXX,XX +XXX,XX @@ target_ulong fclass_d(uint64_t frs1) RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) -GEN_VEXT_V(vfclass_v_h) -GEN_VEXT_V(vfclass_v_w) -GEN_VEXT_V(vfclass_v_d) +GEN_VEXT_V(vfclass_v_h, 2) +GEN_VEXT_V(vfclass_v_w, 4) +GEN_VEXT_V(vfclass_v_d, 8) /* Vector Floating-Point Merge Instruction */ + #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = \ + vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) @@ -XXX,XX +XXX,XX @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) -GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) -GEN_VEXT_V_ENV(vfcvt_x_f_v_h) -GEN_VEXT_V_ENV(vfcvt_x_f_v_w) -GEN_VEXT_V_ENV(vfcvt_x_f_v_d) +GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) +GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) +GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) -GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) -GEN_VEXT_V_ENV(vfcvt_f_x_v_h) -GEN_VEXT_V_ENV(vfcvt_f_x_v_w) -GEN_VEXT_V_ENV(vfcvt_f_x_v_d) +GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) +GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) +GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) /* Widening Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d) /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) -GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) -GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) -GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) -GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) /* * vfwcvt.f.f.v vd, vs2, vm @@ -XXX,XX +XXX,XX @@ static uint32_t vfwcvtffv16(uint16_t a, float_status *s) RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) -GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) /* Narrowing Floating-Point/Integer Type-Convert Instructions */ /* (TD, T2, TX2) */ @@ -XXX,XX +XXX,XX @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) -GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) -GEN_VEXT_V_ENV(vfncvt_x_f_w_b) -GEN_VEXT_V_ENV(vfncvt_x_f_w_h) -GEN_VEXT_V_ENV(vfncvt_x_f_w_w) +GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) +GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) -GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_x_w_h) -GEN_VEXT_V_ENV(vfncvt_f_x_w_w) +GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ static uint16_t vfncvtffv16(uint32_t a, float_status *s) @@ -XXX,XX +XXX,XX @@ static uint16_t vfncvtffv16(uint32_t a, float_status *s) RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) -GEN_VEXT_V_ENV(vfncvt_f_f_w_h) -GEN_VEXT_V_ENV(vfncvt_f_f_w_w) +GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) +GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) /* *** Vector Reduction Operations diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = \ + FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_set_rm(s, RISCV_FRM_DYN); \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, \ + s->cfg_vta_all_1s); \ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ fns[s->sew - 1], s); \ } \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_set_rm(s, RISCV_FRM_DYN); \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ fns[s->sew - 1], s); \ } \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_set_rm(s, RISCV_FRM_DYN); \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ fns[s->sew - 1], s); \ } \ @@ -XXX,XX +XXX,XX @@ static bool do_opfv(DisasContext *s, arg_rmr *a, data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs2), cpu_env, \ s->cfg_ptr->vlen / 8, \ -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-13@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/vector_helper.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TD); \ + uint32_t vlenb = simd_maxsz(desc); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ TD s1 = *((TD *)vs1 + HD(0)); \ \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ } \ *((TD *)vd + HD(0)) = s1; \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, esz, vlenb); \ } /* vd[0] = sum(vs1[0], vs2[*]) */ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TD); \ + uint32_t vlenb = simd_maxsz(desc); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ TD s1 = *((TD *)vs1 + HD(0)); \ \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ } \ *((TD *)vd + HD(0)) = s1; \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, esz, vlenb); \ } /* Unordered sum */ @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t esz = sizeof(uint32_t); + uint32_t vlenb = simd_maxsz(desc); + uint32_t vta = vext_vta(desc); uint32_t i; uint32_t s1 = *((uint32_t *)vs1 + H4(0)); @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, } *((uint32_t *)vd + H4(0)) = s1; env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, esz, vlenb); } void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t esz = sizeof(uint64_t); + uint32_t vlenb = simd_maxsz(desc); + uint32_t vta = vext_vta(desc); uint32_t i; uint64_t s1 = *((uint64_t *)vs1); @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, } *((uint64_t *)vd) = s1; env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, esz, vlenb); } /* -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> The tail elements in the destination mask register are updated under a tail-agnostic policy. Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-14@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/vector_helper.c | 30 +++++++++++++++++++++++++ target/riscv/insn_trans/trans_rvv.c.inc | 6 +++++ 2 files changed, 36 insertions(+) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ + uint32_t vta_all_1s = vext_vta_all_1s(desc); \ uint32_t i; \ int a, b; \ \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ vext_set_elem_mask(vd, i, OP(b, a)); \ } \ env->vstart = 0; \ + /* mask destination register are always tail- \ + * agnostic \ + */ \ + /* set tail elements to 1s */ \ + if (vta_all_1s) { \ + for (; i < total_elems; i++) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ + } \ } #define DO_NAND(N, M) (!(N & M)) @@ -XXX,XX +XXX,XX @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, { uint32_t vm = vext_vm(desc); uint32_t vl = env->vl; + uint32_t total_elems = env_archcpu(env)->cfg.vlen; + uint32_t vta_all_1s = vext_vta_all_1s(desc); int i; bool first_mask_bit = false; @@ -XXX,XX +XXX,XX @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, } } env->vstart = 0; + /* mask destination register are always tail-agnostic */ + /* set tail elements to 1s */ + if (vta_all_1s) { + for (; i < total_elems; i++) { + vext_set_elem_mask(vd, i, 1); + } + } } void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t sum = 0; \ int i; \ \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ } \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ int i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ *((ETYPE *)vd + H(i)) = i; \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = \ + FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ vreg_ofs(s, a->rs1), \ vreg_ofs(s, a->rs2), cpu_env, \ @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = \ + FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \ vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \ cpu_env, s->cfg_ptr->vlen / 8, \ @@ -XXX,XX +XXX,XX @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_gvec_3_ptr * const fns[4] = { gen_helper_viota_m_b, gen_helper_viota_m_h, gen_helper_viota_m_w, gen_helper_viota_m_d, @@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); static gen_helper_gvec_2_ptr * const fns[4] = { gen_helper_vid_v_b, gen_helper_vid_v_h, gen_helper_vid_v_w, gen_helper_vid_v_d, -- 2.36.1
From: eopXD <yueh.ting.chen@gmail.com> Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-15@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/vector_helper.c | 40 +++++++++++++++++++++++++ target/riscv/insn_trans/trans_rvv.c.inc | 7 +++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ target_ulong offset = s1, i_min, i; \ \ i_min = MAX(env->vstart, offset); \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ } \ *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ } \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ target_ulong i_max, i; \ \ i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ } \ \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ @@ -XXX,XX +XXX,XX @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ typedef uint##BITWIDTH##_t ETYPE; \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ } \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } GEN_VEXT_VSLIE1UP(8, H1) @@ -XXX,XX +XXX,XX @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ typedef uint##BITWIDTH##_t ETYPE; \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ } \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } GEN_VEXT_VSLIDE1DOWN(8, H1) @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TS2); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint64_t index; \ uint32_t i; \ \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ } \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint64_t index = s1; \ uint32_t i; \ \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ } \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t num = 0, i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ num++; \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } /* Compress into vd elements of vs2 where vs1 is enabled */ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ { \ uint32_t vl = env->vl; \ uint32_t vm = vext_vm(desc); \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ + uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ } \ env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a) return false; } - if (a->vm && s->vl_eq_vlmax) { + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { int scale = s->lmul - (s->sew + 3); int vlmax = s->cfg_ptr->vlen >> -scale; TCGv_i64 dest = tcg_temp_new_i64(); @@ -XXX,XX +XXX,XX @@ static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a) return false; } - if (a->vm && s->vl_eq_vlmax) { + if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { int scale = s->lmul - (s->sew + 3); int vlmax = s->cfg_ptr->vlen >> -scale; if (a->rs1 >= vlmax) { @@ -XXX,XX +XXX,XX @@ static bool trans_vcompress_vm(DisasContext *s, arg_r *a) tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, @@ -XXX,XX +XXX,XX @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) } data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), vreg_ofs(s, a->rs2), cpu_env, -- 2.36.1
From: eopXD <eop.chen@sifive.com> According to v-spec, tail agnostic behavior can be either kept as undisturbed or set elements' bits to all 1s. To distinguish the difference of tail policies, QEMU should be able to simulate the tail agnostic behavior as "set tail elements' bits to all 1s". There are multiple possibility for agnostic elements according to v-spec. The main intent of this patch-set tries to add option that can distinguish between tail policies. Setting agnostic elements to all 1s allows QEMU to express this. This commit adds option 'rvv_ta_all_1s' is added to enable the behavior, it is default as disabled. Signed-off-by: eop Chen <eop.chen@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <165449614532.19704.7000832880482980398-16@git.sr.ht> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC), DEFINE_PROP_BOOL("short-isa-string", RISCVCPU, cfg.short_isa_string, false), + + DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false), DEFINE_PROP_END_OF_LIST(), }; -- 2.36.1
From: Alistair Francis <alistair.francis@wdc.com> There are currently two types of RISC-V CPUs: - Generic CPUs (base or any) that allow complete custimisation - "Named" CPUs that match existing hardware Users can use the base CPUs to custimise the extensions that they want, for example -cpu rv64,v=true. We originally exposed these as part of the named CPUs as well, but that was by accident. Exposing the CPU properties to named CPUs means that we accidently enable extensions that don't exist on the CPUs by default. For example the SiFive E CPU currently support the zba extension, which is a bug. This patch instead only exposes the CPU extensions to the generic CPUs. Signed-off-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Bin Meng <bmeng.cn@gmail.com> Message-Id: <20220608061437.314434-1-alistair.francis@opensource.wdc.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.c | 57 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ static const char * const riscv_intr_names[] = { "reserved" }; +static void register_cpu_props(DeviceState *dev); + const char *riscv_cpu_get_trap_name(target_ulong cause, bool async) { if (async) { @@ -XXX,XX +XXX,XX @@ static void riscv_any_cpu_init(Object *obj) set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVU); #endif set_priv_version(env, PRIV_VERSION_1_12_0); + register_cpu_props(DEVICE(obj)); } #if defined(TARGET_RISCV64) @@ -XXX,XX +XXX,XX @@ static void rv64_base_cpu_init(Object *obj) CPURISCVState *env = &RISCV_CPU(obj)->env; /* We set this in the realise function */ set_misa(env, MXL_RV64, 0); + register_cpu_props(DEVICE(obj)); } static void rv64_sifive_u_cpu_init(Object *obj) @@ -XXX,XX +XXX,XX @@ static void rv64_sifive_u_cpu_init(Object *obj) static void rv64_sifive_e_cpu_init(Object *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; + RISCVCPU *cpu = RISCV_CPU(obj); + set_misa(env, MXL_RV64, RVI | RVM | RVA | RVC | RVU); set_priv_version(env, PRIV_VERSION_1_10_0); - qdev_prop_set_bit(DEVICE(obj), "mmu", false); + cpu->cfg.mmu = false; } static void rv128_base_cpu_init(Object *obj) @@ -XXX,XX +XXX,XX @@ static void rv128_base_cpu_init(Object *obj) CPURISCVState *env = &RISCV_CPU(obj)->env; /* We set this in the realise function */ set_misa(env, MXL_RV128, 0); + register_cpu_props(DEVICE(obj)); } #else static void rv32_base_cpu_init(Object *obj) @@ -XXX,XX +XXX,XX @@ static void rv32_base_cpu_init(Object *obj) CPURISCVState *env = &RISCV_CPU(obj)->env; /* We set this in the realise function */ set_misa(env, MXL_RV32, 0); + register_cpu_props(DEVICE(obj)); } static void rv32_sifive_u_cpu_init(Object *obj) @@ -XXX,XX +XXX,XX @@ static void rv32_sifive_u_cpu_init(Object *obj) static void rv32_sifive_e_cpu_init(Object *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; + RISCVCPU *cpu = RISCV_CPU(obj); + set_misa(env, MXL_RV32, RVI | RVM | RVA | RVC | RVU); set_priv_version(env, PRIV_VERSION_1_10_0); - qdev_prop_set_bit(DEVICE(obj), "mmu", false); + cpu->cfg.mmu = false; } static void rv32_ibex_cpu_init(Object *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; + RISCVCPU *cpu = RISCV_CPU(obj); + set_misa(env, MXL_RV32, RVI | RVM | RVC | RVU); set_priv_version(env, PRIV_VERSION_1_10_0); - qdev_prop_set_bit(DEVICE(obj), "mmu", false); - qdev_prop_set_bit(DEVICE(obj), "x-epmp", true); + cpu->cfg.mmu = false; + cpu->cfg.epmp = true; } static void rv32_imafcu_nommu_cpu_init(Object *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; + RISCVCPU *cpu = RISCV_CPU(obj); + set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVC | RVU); set_priv_version(env, PRIV_VERSION_1_10_0); set_resetvec(env, DEFAULT_RSTVEC); - qdev_prop_set_bit(DEVICE(obj), "mmu", false); + cpu->cfg.mmu = false; } #endif @@ -XXX,XX +XXX,XX @@ static void riscv_host_cpu_init(Object *obj) #elif defined(TARGET_RISCV64) set_misa(env, MXL_RV64, 0); #endif + register_cpu_props(DEVICE(obj)); } #endif @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_init(Object *obj) { RISCVCPU *cpu = RISCV_CPU(obj); + cpu->cfg.ext_counters = true; + cpu->cfg.ext_ifencei = true; + cpu->cfg.ext_icsr = true; + cpu->cfg.mmu = true; + cpu->cfg.pmp = true; + cpu_set_cpustate_pointers(cpu); #ifndef CONFIG_USER_ONLY @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_init(Object *obj) #endif /* CONFIG_USER_ONLY */ } -static Property riscv_cpu_properties[] = { +static Property riscv_cpu_extensions[] = { /* Defaults for standard extensions */ DEFINE_PROP_BOOL("i", RISCVCPU, cfg.ext_i, true), DEFINE_PROP_BOOL("e", RISCVCPU, cfg.ext_e, false), @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("Zve64f", RISCVCPU, cfg.ext_zve64f, false), DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true), DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true), - DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true), DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec), DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec), DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128), DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64), - DEFINE_PROP_UINT32("mvendorid", RISCVCPU, cfg.mvendorid, 0), - DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID), - DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID), - DEFINE_PROP_BOOL("svinval", RISCVCPU, cfg.ext_svinval, false), DEFINE_PROP_BOOL("svnapot", RISCVCPU, cfg.ext_svnapot, false), DEFINE_PROP_BOOL("svpbmt", RISCVCPU, cfg.ext_svpbmt, false), @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false), DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void register_cpu_props(DeviceState *dev) +{ + Property *prop; + + for (prop = riscv_cpu_extensions; prop && prop->name; prop++) { + qdev_property_add_static(dev, prop); + } +} + +static Property riscv_cpu_properties[] = { + DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true), + + DEFINE_PROP_UINT32("mvendorid", RISCVCPU, cfg.mvendorid, 0), + DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID), + DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID), + DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC), DEFINE_PROP_BOOL("short-isa-string", RISCVCPU, cfg.short_isa_string, false), -- 2.36.1
From: Alistair Francis <alistair.francis@wdc.com> When running a 32-bit guest, with a e64 vmv.v.x and vl_eq_vlmax set to true the `tcg_debug_assert(vece <= MO_32)` will be triggered inside tcg_gen_gvec_dup_i32(). This patch checks that condition and instead uses tcg_gen_gvec_dup_i64() is required. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1028 Suggested-by: Robert Bu <robert.bu@gmail.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20220608234701.369536-1-alistair.francis@opensource.wdc.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/insn_trans/trans_rvv.c.inc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) s1 = get_gpr(s, a->rs1, EXT_SIGN); if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { - tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), - MAXSZ(s), MAXSZ(s), s1); + if (get_xl(s) == MXL_RV32 && s->sew == MO_64) { + TCGv_i64 s1_i64 = tcg_temp_new_i64(); + tcg_gen_ext_tl_i64(s1_i64, s1); + tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), s1_i64); + tcg_temp_free_i64(s1_i64); + } else { + tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), s1); + } } else { TCGv_i32 desc; TCGv_i64 s1_i64 = tcg_temp_new_i64(); -- 2.36.1
From: Alistair Francis <alistair.francis@wdc.com> The following changes since commit c77283dd5d79149f4e7e9edd00f65416c648ee59: Merge tag 'pull-request-2025-07-02' of https://gitlab.com/thuth/qemu into staging (2025-07-03 06:01:41 -0400) are available in the Git repository at: https://github.com/alistair23/qemu.git tags/pull-riscv-to-apply-20250704 for you to fetch changes up to dc8bffc4eb0a93d3266cea1b17f8848dea5b915c: target: riscv: Add Svrsw60t59b extension support (2025-07-04 21:09:49 +1000) ---------------------------------------------------------------- Second RISC-V PR for 10.1 * sstc extension fixes * Fix zama16b order in isa_edata_arr * Profile handling fixes * Extend PMP region up to 64 * Remove capital 'Z' CPU properties * Add missing named features * Support atomic instruction fetch (Ziccif) * Add max_satp_mode from host cpu * Extend and configure PMP region count * Fix PPN field of Translation-reponse register * Use qemu_chr_fe_write_all() in DBCN_CONSOLE_WRITE_BYTE * Fix fcvt.s.bf16 NaN box checking * Avoid infinite delay of async xmit function * Device tree reg cleanups * Add Kunminghu CPU and platform * Fix missing exit TB flow for ldff_trans * Fix migration failure when aia is configured as aplic-imsic * Fix MEPC/SEPC bit masking for IALIGN * Add a property to set vill bit on reserved usage of vsetvli instruction * Add Svrsw60t59b extension support ---------------------------------------------------------------- Alexandre Ghiti (1): target: riscv: Add Svrsw60t59b extension support Anton Blanchard (1): target/riscv: Fix fcvt.s.bf16 NaN box checking Charalampos Mitrodimas (2): target/riscv: Fix MEPC/SEPC bit masking for IALIGN tests/tcg/riscv64: Add test for MEPC bit masking Daniel Henrique Barboza (9): target/riscv/cpu.c: fix zama16b order in isa_edata_arr[] target/riscv/tcg: restrict satp_mode changes in cpu_set_profile target/riscv/tcg: decouple profile enablement from user prop target/riscv: add profile->present flag target/riscv: remove capital 'Z' CPU properties target/riscv/cpu.c: add 'sdtrig' in riscv,isa target/riscv/cpu.c: add 'ssstrict' to riscv, isa target/riscv/cpu.c: do better with 'named features' doc target/riscv: use qemu_chr_fe_write_all() in DBCN_CONSOLE_WRITE_BYTE Florian Lugou (1): hw/char: sifive_uart: Avoid infinite delay of async xmit function Huang Borong (2): target/riscv: Add BOSC's Xiangshan Kunminghu CPU hw/riscv: Initial support for BOSC's Xiangshan Kunminghu FPGA prototype Jay Chang (2): target/riscv: Extend PMP region up to 64 target/riscv: Make PMP region count configurable Jim Shu (5): target/riscv: Add the checking into stimecmp write function. hw/intc: riscv_aclint: Fix mtime write for sstc extension target/riscv: Fix VSTIP bit in sstc extension. target/riscv: Enable/Disable S/VS-mode Timer when STCE bit is changed target/riscv: support atomic instruction fetch (Ziccif) Joel Stanley (12): hw/riscv/virt: Fix clint base address type hw/riscv/virt: Use setprop_sized_cells for clint hw/riscv/virt: Use setprop_sized_cells for memory hw/riscv/virt: Use setprop_sized_cells for aplic hw/riscv/virt: Use setprop_sized_cells for aclint hw/riscv/virt: Use setprop_sized_cells for plic hw/riscv/virt: Use setprop_sized_cells for virtio hw/riscv/virt: Use setprop_sized_cells for reset hw/riscv/virt: Use setprop_sized_cells for uart hw/riscv/virt: Use setprop_sized_cells for rtc hw/riscv/virt: Use setprop_sized_cells for iommu hw/riscv/virt: Use setprop_sized_cells for pcie Max Chou (1): target/riscv: rvv: Fix missing exit TB flow for ldff_trans Meng Zhuo (1): target/riscv/kvm: add max_satp_mode from host cpu Nutty Liu (1): hw/riscv/riscv-iommu: Fix PPN field of Translation-reponse register Vasilis Liaskovitis (1): target/riscv: Add a property to set vill bit on reserved usage of vsetvli instruction liu.xuemei1@zte.com.cn (1): migration: Fix migration failure when aia is configured as aplic-imsic MAINTAINERS | 7 + docs/system/riscv/xiangshan-kunminghu.rst | 39 +++++ docs/system/target-riscv.rst | 1 + configs/devices/riscv64-softmmu/default.mak | 1 + hw/riscv/riscv-iommu-bits.h | 1 + include/hw/riscv/xiangshan_kmh.h | 68 +++++++++ target/riscv/cpu-qom.h | 1 + target/riscv/cpu.h | 19 ++- target/riscv/cpu_bits.h | 63 +++++++- target/riscv/helper.h | 2 +- target/riscv/internals.h | 27 ++++ target/riscv/time_helper.h | 1 + target/riscv/cpu_cfg_fields.h.inc | 3 + hw/char/sifive_uart.c | 6 +- hw/intc/riscv_aclint.c | 5 + hw/intc/riscv_aplic.c | 12 +- hw/intc/riscv_imsic.c | 10 +- hw/riscv/riscv-iommu.c | 9 +- hw/riscv/virt.c | 66 ++++----- hw/riscv/xiangshan_kmh.c | 220 ++++++++++++++++++++++++++++ target/riscv/cpu.c | 144 +++++++++++++++--- target/riscv/cpu_helper.c | 3 +- target/riscv/csr.c | 192 +++++++++++++++++++++++- target/riscv/fpu_helper.c | 2 +- target/riscv/kvm/kvm-cpu.c | 18 ++- target/riscv/machine.c | 3 +- target/riscv/op_helper.c | 4 +- target/riscv/pmp.c | 28 ++-- target/riscv/riscv-qmp-cmds.c | 2 +- target/riscv/tcg/tcg-cpu.c | 186 +++++++++++------------ target/riscv/time_helper.c | 65 +++++++- target/riscv/translate.c | 46 ++++-- target/riscv/vector_helper.c | 12 +- target/riscv/insn_trans/trans_rvv.c.inc | 10 +- hw/riscv/Kconfig | 9 ++ hw/riscv/meson.build | 1 + tests/data/acpi/riscv64/virt/RHCT | Bin 400 -> 416 bytes tests/tcg/riscv64/Makefile.softmmu-target | 4 + tests/tcg/riscv64/test-mepc-masking.S | 73 +++++++++ 39 files changed, 1151 insertions(+), 212 deletions(-) create mode 100644 docs/system/riscv/xiangshan-kunminghu.rst create mode 100644 include/hw/riscv/xiangshan_kmh.h create mode 100644 hw/riscv/xiangshan_kmh.c create mode 100644 tests/tcg/riscv64/test-mepc-masking.S
From: Jim Shu <jim.shu@sifive.com> Preparation commit to let aclint timer to use stimecmp write function. Aclint timer doesn't call sstc() predicate so we need to check inside the stimecmp write function. Signed-off-by: Jim Shu <jim.shu@sifive.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250519143518.11086-2-jim.shu@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/time_helper.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/target/riscv/time_helper.c b/target/riscv/time_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/time_helper.c +++ b/target/riscv/time_helper.c @@ -XXX,XX +XXX,XX @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, { uint64_t diff, ns_diff, next; RISCVAclintMTimerState *mtimer = env->rdtime_fn_arg; - uint32_t timebase_freq = mtimer->timebase_freq; - uint64_t rtc_r = env->rdtime_fn(env->rdtime_fn_arg) + delta; + uint32_t timebase_freq; + uint64_t rtc_r; + + if (!riscv_cpu_cfg(env)->ext_sstc || !env->rdtime_fn || + !env->rdtime_fn_arg || !get_field(env->menvcfg, MENVCFG_STCE)) { + /* S/VS Timer IRQ depends on sstc extension, rdtime_fn(), and STCE. */ + return; + } + + if (timer_irq == MIP_VSTIP && + (!riscv_has_ext(env, RVH) || !get_field(env->henvcfg, HENVCFG_STCE))) { + /* VS Timer IRQ also depends on RVH and henvcfg.STCE. */ + return; + } + + timebase_freq = mtimer->timebase_freq; + rtc_r = env->rdtime_fn(env->rdtime_fn_arg) + delta; if (timecmp <= rtc_r) { /* -- 2.50.0
From: Jim Shu <jim.shu@sifive.com> When changing the mtime value, the period of [s|vs]timecmp timers should also be updated, similar to the period of mtimecmp timer. The period of the stimecmp timer is the time until the next S-mode timer IRQ. The value is calculated as "stimecmp - time". [1] It is equal to "stimecmp - mtime" since the time CSR is a read-only shadow of the memory-mapped mtime register. Thus, changing mtime value will update the period of stimecmp timer. Similarly, the period of vstimecmp timer is calculated as "vstimecmp - (mtime + htimedelta)" [2], so changing mtime value will update the period of vstimecmp timer. [1] RISC-V Priv spec ch 9.1.1. Supervisor Timer (stimecmp) Register A supervisor timer interrupt becomes pending, as reflected in the STIP bit in the mip and sip registers whenever time contains a value greater than or equal to stimecmp. [2] RISC-V Priv spec ch19.2.1. Virtual Supervisor Timer (vstimecmp) Register A virtual supervisor timer interrupt becomes pending, as reflected in the VSTIP bit in the hip register, whenever (time + htimedelta), truncated to 64 bits, contains a value greater than or equal to vstimecmp Signed-off-by: Jim Shu <jim.shu@sifive.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250519143518.11086-3-jim.shu@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/intc/riscv_aclint.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c index XXXXXXX..XXXXXXX 100644 --- a/hw/intc/riscv_aclint.c +++ b/hw/intc/riscv_aclint.c @@ -XXX,XX +XXX,XX @@ #include "qemu/module.h" #include "hw/sysbus.h" #include "target/riscv/cpu.h" +#include "target/riscv/time_helper.h" #include "hw/qdev-properties.h" #include "hw/intc/riscv_aclint.h" #include "qemu/timer.h" @@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), mtimer->hartid_base + i, mtimer->timecmp[i]); + riscv_timer_write_timecmp(env, env->stimer, env->stimecmp, 0, MIP_STIP); + riscv_timer_write_timecmp(env, env->vstimer, env->vstimecmp, + env->htimedelta, MIP_VSTIP); + } return; } -- 2.50.0
From: Jim Shu <jim.shu@sifive.com> VSTIP is only writable when both [mh]envcfg.STCE is enabled, or it will revert it's defined behavior as if sstc extension is not implemented. Signed-off-by: Jim Shu <jim.shu@sifive.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250519143518.11086-4-jim.shu@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/csr.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/target/riscv/csr.c b/target/riscv/csr.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -XXX,XX +XXX,XX @@ static RISCVException rmw_mip64(CPURISCVState *env, int csrno, if (riscv_cpu_cfg(env)->ext_sstc && (env->priv == PRV_M) && get_field(env->menvcfg, MENVCFG_STCE)) { /* sstc extension forbids STIP & VSTIP to be writeable in mip */ - mask = mask & ~(MIP_STIP | MIP_VSTIP); + + /* STIP is not writable when menvcfg.STCE is enabled. */ + mask = mask & ~MIP_STIP; + + /* VSTIP is not writable when both [mh]envcfg.STCE are enabled. */ + if (get_field(env->henvcfg, HENVCFG_STCE)) { + mask = mask & ~MIP_VSTIP; + } } if (mask) { -- 2.50.0
From: Jim Shu <jim.shu@sifive.com> Updating STCE will enable/disable SSTC in S-mode or/and VS-mode, so we also need to update S/VS-mode Timer and S/VSTIP bits in $mip CSR. Signed-off-by: Jim Shu <jim.shu@sifive.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250519143518.11086-5-jim.shu@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/time_helper.h | 1 + target/riscv/csr.c | 46 ++++++++++++++++++++++++++++++++++++++ target/riscv/time_helper.c | 46 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+) diff --git a/target/riscv/time_helper.h b/target/riscv/time_helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/time_helper.h +++ b/target/riscv/time_helper.h @@ -XXX,XX +XXX,XX @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, uint64_t timecmp, uint64_t delta, uint32_t timer_irq); +void riscv_timer_stce_changed(CPURISCVState *env, bool is_m_mode, bool enable); void riscv_timer_init(RISCVCPU *cpu); #endif diff --git a/target/riscv/csr.c b/target/riscv/csr.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = MENVCFG_FIOM | MENVCFG_CBIE | MENVCFG_CBCFE | MENVCFG_CBZE | MENVCFG_CDE; + bool stce_changed = false; if (riscv_cpu_mxl(env) == MXL_RV64) { mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | @@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, if ((val & MENVCFG_DTE) == 0) { env->mstatus &= ~MSTATUS_SDT; } + + if (cfg->ext_sstc && + ((env->menvcfg & MENVCFG_STCE) != (val & MENVCFG_STCE))) { + stce_changed = true; + } } env->menvcfg = (env->menvcfg & ~mask) | (val & mask); + + if (stce_changed) { + riscv_timer_stce_changed(env, true, !!(val & MENVCFG_STCE)); + } + return write_henvcfg(env, CSR_HENVCFG, env->henvcfg, ra); } @@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfgh(CPURISCVState *env, int csrno, (cfg->ext_smcdeleg ? MENVCFG_CDE : 0) | (cfg->ext_ssdbltrp ? MENVCFG_DTE : 0); uint64_t valh = (uint64_t)val << 32; + bool stce_changed = false; + + if (cfg->ext_sstc && + ((env->menvcfg & MENVCFG_STCE) != (valh & MENVCFG_STCE))) { + stce_changed = true; + } if ((valh & MENVCFG_DTE) == 0) { env->mstatus &= ~MSTATUS_SDT; } env->menvcfg = (env->menvcfg & ~mask) | (valh & mask); + + if (stce_changed) { + riscv_timer_stce_changed(env, true, !!(valh & MENVCFG_STCE)); + } + return write_henvcfgh(env, CSR_HENVCFGH, env->henvcfg >> 32, ra); } @@ -XXX,XX +XXX,XX @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno, static RISCVException write_henvcfg(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { + const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = HENVCFG_FIOM | HENVCFG_CBIE | HENVCFG_CBCFE | HENVCFG_CBZE; RISCVException ret; + bool stce_changed = false; ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); if (ret != RISCV_EXCP_NONE) { @@ -XXX,XX +XXX,XX @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, get_field(val, HENVCFG_PMM) != PMM_FIELD_RESERVED) { mask |= HENVCFG_PMM; } + + if (cfg->ext_sstc && + ((env->henvcfg & HENVCFG_STCE) != (val & HENVCFG_STCE))) { + stce_changed = true; + } } env->henvcfg = val & mask; @@ -XXX,XX +XXX,XX @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, env->vsstatus &= ~MSTATUS_SDT; } + if (stce_changed) { + riscv_timer_stce_changed(env, false, !!(val & HENVCFG_STCE)); + } + return RISCV_EXCP_NONE; } @@ -XXX,XX +XXX,XX @@ static RISCVException read_henvcfgh(CPURISCVState *env, int csrno, static RISCVException write_henvcfgh(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { + const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE | HENVCFG_DTE); uint64_t valh = (uint64_t)val << 32; RISCVException ret; + bool stce_changed = false; ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); if (ret != RISCV_EXCP_NONE) { return ret; } + + if (cfg->ext_sstc && + ((env->henvcfg & HENVCFG_STCE) != (valh & HENVCFG_STCE))) { + stce_changed = true; + } + env->henvcfg = (env->henvcfg & 0xFFFFFFFF) | (valh & mask); if ((env->henvcfg & HENVCFG_DTE) == 0) { env->vsstatus &= ~MSTATUS_SDT; } + + if (stce_changed) { + riscv_timer_stce_changed(env, false, !!(val & HENVCFG_STCE)); + } + return RISCV_EXCP_NONE; } diff --git a/target/riscv/time_helper.c b/target/riscv/time_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/time_helper.c +++ b/target/riscv/time_helper.c @@ -XXX,XX +XXX,XX @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, timer_mod(timer, next); } +/* + * When disabling xenvcfg.STCE, the S/VS Timer may be disabled at the same time. + * It is safe to call this function regardless of whether the timer has been + * deleted or not. timer_del() will do nothing if the timer has already + * been deleted. + */ +static void riscv_timer_disable_timecmp(CPURISCVState *env, QEMUTimer *timer, + uint32_t timer_irq) +{ + /* Disable S-mode Timer IRQ and HW-based STIP */ + if ((timer_irq == MIP_STIP) && !get_field(env->menvcfg, MENVCFG_STCE)) { + riscv_cpu_update_mip(env, timer_irq, BOOL_TO_MASK(0)); + timer_del(timer); + return; + } + + /* Disable VS-mode Timer IRQ and HW-based VSTIP */ + if ((timer_irq == MIP_VSTIP) && + (!get_field(env->menvcfg, MENVCFG_STCE) || + !get_field(env->henvcfg, HENVCFG_STCE))) { + env->vstime_irq = 0; + riscv_cpu_update_mip(env, 0, BOOL_TO_MASK(0)); + timer_del(timer); + return; + } +} + +/* Enable or disable S/VS-mode Timer when xenvcfg.STCE is changed */ +void riscv_timer_stce_changed(CPURISCVState *env, bool is_m_mode, bool enable) +{ + if (enable) { + riscv_timer_write_timecmp(env, env->vstimer, env->vstimecmp, + env->htimedelta, MIP_VSTIP); + } else { + riscv_timer_disable_timecmp(env, env->vstimer, MIP_VSTIP); + } + + if (is_m_mode) { + if (enable) { + riscv_timer_write_timecmp(env, env->stimer, env->stimecmp, 0, MIP_STIP); + } else { + riscv_timer_disable_timecmp(env, env->stimer, MIP_STIP); + } + } +} + void riscv_timer_init(RISCVCPU *cpu) { CPURISCVState *env; -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Put it after zalrsc and before zawrs. Cc: qemu-trivial@nongnu.org Fixes: a60ce58fd9 ("target/riscv: Support Zama16b extension") Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250522113344.823294-1-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zaamo, PRIV_VERSION_1_12_0, ext_zaamo), ISA_EXT_DATA_ENTRY(zabha, PRIV_VERSION_1_13_0, ext_zabha), ISA_EXT_DATA_ENTRY(zacas, PRIV_VERSION_1_12_0, ext_zacas), - ISA_EXT_DATA_ENTRY(zama16b, PRIV_VERSION_1_13_0, ext_zama16b), ISA_EXT_DATA_ENTRY(zalrsc, PRIV_VERSION_1_12_0, ext_zalrsc), + ISA_EXT_DATA_ENTRY(zama16b, PRIV_VERSION_1_13_0, ext_zama16b), ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs), ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa), ISA_EXT_DATA_ENTRY(zfbfmin, PRIV_VERSION_1_12_0, ext_zfbfmin), -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> We're changing 'mmu' to true regardless of whether the profile is being enabled or not, and at the same time we're changing satp_mode to profile->enabled. This will promote a situation where we'll set mmu=on without a virtual memory mode, which is a mistake. Only touch 'mmu' and satp_mode if the profile is being enabled. Suggested-by: Andrew Jones <ajones@ventanamicro.com> Fixes: 55398025e7 ("target/riscv: add satp_mode profile support") Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Reviewed-by: Björn Töpel <bjorn@rivosinc.com> Tested-by: Björn Töpel <bjorn@rivosinc.com> Message-ID: <20250528184407.1451983-2-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/tcg/tcg-cpu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -XXX,XX +XXX,XX @@ static void cpu_set_profile(Object *obj, Visitor *v, const char *name, if (profile->enabled) { cpu->env.priv_ver = profile->priv_spec; - } #ifndef CONFIG_USER_ONLY - if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { - object_property_set_bool(obj, "mmu", true, NULL); - const char *satp_prop = satp_mode_str(profile->satp_mode, - riscv_cpu_is_32bit(cpu)); - object_property_set_bool(obj, satp_prop, profile->enabled, NULL); - } + if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { + object_property_set_bool(obj, "mmu", true, NULL); + const char *satp_prop = satp_mode_str(profile->satp_mode, + riscv_cpu_is_32bit(cpu)); + object_property_set_bool(obj, satp_prop, true, NULL); + } #endif + } for (i = 0; misa_bits[i] != 0; i++) { uint32_t bit = misa_bits[i]; -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> We have code in riscv_cpu_add_profiles() to enable a profile right away in case a CPU chose the profile during its cpu_init(). But we're using the user callback option to do so, setting profile->user_set. Create a new helper that does all the grunt work to enable/disable a given profile. Use this new helper in the cases where we want a CPU to be compatible to a certain profile, leaving the user callback to be used exclusively by users. Fixes: fba92a92e3 ("target/riscv: add 'rva22u64' CPU") Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Reviewed-by: Björn Töpel <bjorn@rivosinc.com> Tested-by: Björn Töpel <bjorn@rivosinc.com> Message-ID: <20250528184407.1451983-3-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/tcg/tcg-cpu.c | 127 +++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 60 deletions(-) diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -XXX,XX +XXX,XX @@ static bool riscv_cpu_is_generic(Object *cpu_obj) return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL; } +static void riscv_cpu_set_profile(RISCVCPU *cpu, + RISCVCPUProfile *profile, + bool enabled) +{ + int i, ext_offset; + + if (profile->u_parent != NULL) { + riscv_cpu_set_profile(cpu, profile->u_parent, enabled); + } + + if (profile->s_parent != NULL) { + riscv_cpu_set_profile(cpu, profile->s_parent, enabled); + } + + profile->enabled = enabled; + + if (profile->enabled) { + cpu->env.priv_ver = profile->priv_spec; + +#ifndef CONFIG_USER_ONLY + if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { + object_property_set_bool(OBJECT(cpu), "mmu", true, NULL); + const char *satp_prop = satp_mode_str(profile->satp_mode, + riscv_cpu_is_32bit(cpu)); + object_property_set_bool(OBJECT(cpu), satp_prop, true, NULL); + } +#endif + } + + for (i = 0; misa_bits[i] != 0; i++) { + uint32_t bit = misa_bits[i]; + + if (!(profile->misa_ext & bit)) { + continue; + } + + if (bit == RVI && !profile->enabled) { + /* + * Disabling profiles will not disable the base + * ISA RV64I. + */ + continue; + } + + cpu_misa_ext_add_user_opt(bit, profile->enabled); + riscv_cpu_write_misa_bit(cpu, bit, profile->enabled); + } + + for (i = 0; profile->ext_offsets[i] != RISCV_PROFILE_EXT_LIST_END; i++) { + ext_offset = profile->ext_offsets[i]; + + if (profile->enabled) { + if (cpu_cfg_offset_is_named_feat(ext_offset)) { + riscv_cpu_enable_named_feat(cpu, ext_offset); + } + + cpu_bump_multi_ext_priv_ver(&cpu->env, ext_offset); + } + + cpu_cfg_ext_add_user_opt(ext_offset, profile->enabled); + isa_ext_update_enabled(cpu, ext_offset, profile->enabled); + } +} + /* * We'll get here via the following path: * @@ -XXX,XX +XXX,XX @@ static void cpu_set_profile(Object *obj, Visitor *v, const char *name, RISCVCPUProfile *profile = opaque; RISCVCPU *cpu = RISCV_CPU(obj); bool value; - int i, ext_offset; if (riscv_cpu_is_vendor(obj)) { error_setg(errp, "Profile %s is not available for vendor CPUs", @@ -XXX,XX +XXX,XX @@ static void cpu_set_profile(Object *obj, Visitor *v, const char *name, } profile->user_set = true; - profile->enabled = value; - - if (profile->u_parent != NULL) { - object_property_set_bool(obj, profile->u_parent->name, - profile->enabled, NULL); - } - - if (profile->s_parent != NULL) { - object_property_set_bool(obj, profile->s_parent->name, - profile->enabled, NULL); - } - - if (profile->enabled) { - cpu->env.priv_ver = profile->priv_spec; - -#ifndef CONFIG_USER_ONLY - if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { - object_property_set_bool(obj, "mmu", true, NULL); - const char *satp_prop = satp_mode_str(profile->satp_mode, - riscv_cpu_is_32bit(cpu)); - object_property_set_bool(obj, satp_prop, true, NULL); - } -#endif - } - - for (i = 0; misa_bits[i] != 0; i++) { - uint32_t bit = misa_bits[i]; - - if (!(profile->misa_ext & bit)) { - continue; - } - if (bit == RVI && !profile->enabled) { - /* - * Disabling profiles will not disable the base - * ISA RV64I. - */ - continue; - } - - cpu_misa_ext_add_user_opt(bit, profile->enabled); - riscv_cpu_write_misa_bit(cpu, bit, profile->enabled); - } - - for (i = 0; profile->ext_offsets[i] != RISCV_PROFILE_EXT_LIST_END; i++) { - ext_offset = profile->ext_offsets[i]; - - if (profile->enabled) { - if (cpu_cfg_offset_is_named_feat(ext_offset)) { - riscv_cpu_enable_named_feat(cpu, ext_offset); - } - - cpu_bump_multi_ext_priv_ver(&cpu->env, ext_offset); - } - - cpu_cfg_ext_add_user_opt(ext_offset, profile->enabled); - isa_ext_update_enabled(cpu, ext_offset, profile->enabled); - } + riscv_cpu_set_profile(cpu, profile, value); } static void cpu_get_profile(Object *obj, Visitor *v, const char *name, @@ -XXX,XX +XXX,XX @@ static void cpu_get_profile(Object *obj, Visitor *v, const char *name, static void riscv_cpu_add_profiles(Object *cpu_obj) { for (int i = 0; riscv_profiles[i] != NULL; i++) { - const RISCVCPUProfile *profile = riscv_profiles[i]; + RISCVCPUProfile *profile = riscv_profiles[i]; object_property_add(cpu_obj, profile->name, "bool", cpu_get_profile, cpu_set_profile, @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_add_profiles(Object *cpu_obj) * case. */ if (profile->enabled) { - object_property_set_bool(cpu_obj, profile->name, true, NULL); + riscv_cpu_set_profile(RISCV_CPU(cpu_obj), profile, true); } } } -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Björn reported in [1] a case where a rv64 CPU is going through the profile code path to enable satp mode. In this case,the amount of extensions on top of the rv64 CPU made it compliant with the RVA22S64 profile during the validation of CPU 0. When the subsequent CPUs were initialized the static profile object has the 'enable' flag set, enabling the profile code path for those CPUs. This happens because we are initializing and realizing each CPU before going to the next, i.e. init and realize CPU0, then init and realize CPU1 and so on. If we change any persistent state during the validation of CPU N it will interfere with the init/realization of CPU N+1. We're using the 'enabled' profile flag to do two distinct things: inform cpu_init() that we want profile extensions to be enabled, and telling QMP that a profile is currently enabled in the CPU. We want to be flexible enough to recognize profile support for all CPUs that has the extension prerequisites, but we do not want to force the profile code path if a profile wasn't set too. Add a new 'present' flag for profiles that will coexist with the 'enabled' flag. Enabling a profile means "we want to switch on all its mandatory extensions". A profile is 'present' if we asserted during validation that the CPU has the needed prerequisites. This means that the case reported by Björn now results in RVA22S64.enabled=false and RVA22S64.present=true. QMP will recognize it as a RVA22 compliant CPU and we won't force the CPU into the profile path. [1] https://lore.kernel.org/qemu-riscv/87y0usiz22.fsf@all.your.base.are.belong.to.us/ Reported-by: Björn Töpel <bjorn@kernel.org> Fixes: 2af005d610 ("target/riscv/tcg: validate profiles during finalize") Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Reviewed-by: Björn Töpel <bjorn@rivosinc.com> Tested-by: Björn Töpel <bjorn@rivosinc.com> Message-ID: <20250528184407.1451983-4-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.h | 15 +++++++++++++++ target/riscv/riscv-qmp-cmds.c | 2 +- target/riscv/tcg/tcg-cpu.c | 11 +++-------- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -XXX,XX +XXX,XX @@ typedef struct riscv_cpu_profile { struct riscv_cpu_profile *s_parent; const char *name; uint32_t misa_ext; + /* + * The profile is enabled/disabled via command line or + * via cpu_init(). Enabling a profile will add all its + * mandatory extensions in the CPU during init(). + */ bool enabled; + /* + * The profile is present in the CPU, i.e. the current set of + * CPU extensions complies with it. A profile can be enabled + * and not present (e.g. the user disabled a mandatory extension) + * and the other way around (e.g. all mandatory extensions are + * present in a non-profile CPU). + * + * QMP uses this flag. + */ + bool present; bool user_set; int priv_spec; int satp_mode; diff --git a/target/riscv/riscv-qmp-cmds.c b/target/riscv/riscv-qmp-cmds.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/riscv-qmp-cmds.c +++ b/target/riscv/riscv-qmp-cmds.c @@ -XXX,XX +XXX,XX @@ static void riscv_obj_add_profiles_qdict(Object *obj, QDict *qdict_out) for (int i = 0; riscv_profiles[i] != NULL; i++) { profile = riscv_profiles[i]; - value = QOBJECT(qbool_from_bool(profile->enabled)); + value = QOBJECT(qbool_from_bool(profile->present)); qdict_put_obj(qdict_out, profile->name, value); } diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_check_parent_profile(RISCVCPU *cpu, RISCVCPUProfile *profile, RISCVCPUProfile *parent) { - const char *parent_name; - bool parent_enabled; - - if (!profile->enabled || !parent) { + if (!profile->present || !parent) { return; } - parent_name = parent->name; - parent_enabled = object_property_get_bool(OBJECT(cpu), parent_name, NULL); - profile->enabled = parent_enabled; + profile->present = parent->present; } static void riscv_cpu_validate_profile(RISCVCPU *cpu, @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_validate_profile(RISCVCPU *cpu, } } - profile->enabled = profile_impl; + profile->present = profile_impl; riscv_cpu_check_parent_profile(cpu, profile, profile->u_parent); riscv_cpu_check_parent_profile(cpu, profile, profile->s_parent); -- 2.50.0
From: Jay Chang <jay.chang@sifive.com> According to the RISC-V Privileged Specification (version >1.12), RV32 supports 16 CSRs (pmpcfg0–pmpcfg15) to configure 64 PMP regions (pmpaddr0–pmpaddr63). Signed-off-by: Jay Chang <jay.chang@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Message-ID: <20250522081236.4050-2-jay.chang@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu_bits.h | 60 +++++++++++++++++++ target/riscv/csr.c | 124 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 182 insertions(+), 2 deletions(-) diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -XXX,XX +XXX,XX @@ #define CSR_PMPCFG1 0x3a1 #define CSR_PMPCFG2 0x3a2 #define CSR_PMPCFG3 0x3a3 +#define CSR_PMPCFG4 0x3a4 +#define CSR_PMPCFG5 0x3a5 +#define CSR_PMPCFG6 0x3a6 +#define CSR_PMPCFG7 0x3a7 +#define CSR_PMPCFG8 0x3a8 +#define CSR_PMPCFG9 0x3a9 +#define CSR_PMPCFG10 0x3aa +#define CSR_PMPCFG11 0x3ab +#define CSR_PMPCFG12 0x3ac +#define CSR_PMPCFG13 0x3ad +#define CSR_PMPCFG14 0x3ae +#define CSR_PMPCFG15 0x3af #define CSR_PMPADDR0 0x3b0 #define CSR_PMPADDR1 0x3b1 #define CSR_PMPADDR2 0x3b2 @@ -XXX,XX +XXX,XX @@ #define CSR_PMPADDR13 0x3bd #define CSR_PMPADDR14 0x3be #define CSR_PMPADDR15 0x3bf +#define CSR_PMPADDR16 0x3c0 +#define CSR_PMPADDR17 0x3c1 +#define CSR_PMPADDR18 0x3c2 +#define CSR_PMPADDR19 0x3c3 +#define CSR_PMPADDR20 0x3c4 +#define CSR_PMPADDR21 0x3c5 +#define CSR_PMPADDR22 0x3c6 +#define CSR_PMPADDR23 0x3c7 +#define CSR_PMPADDR24 0x3c8 +#define CSR_PMPADDR25 0x3c9 +#define CSR_PMPADDR26 0x3ca +#define CSR_PMPADDR27 0x3cb +#define CSR_PMPADDR28 0x3cc +#define CSR_PMPADDR29 0x3cd +#define CSR_PMPADDR30 0x3ce +#define CSR_PMPADDR31 0x3cf +#define CSR_PMPADDR32 0x3d0 +#define CSR_PMPADDR33 0x3d1 +#define CSR_PMPADDR34 0x3d2 +#define CSR_PMPADDR35 0x3d3 +#define CSR_PMPADDR36 0x3d4 +#define CSR_PMPADDR37 0x3d5 +#define CSR_PMPADDR38 0x3d6 +#define CSR_PMPADDR39 0x3d7 +#define CSR_PMPADDR40 0x3d8 +#define CSR_PMPADDR41 0x3d9 +#define CSR_PMPADDR42 0x3da +#define CSR_PMPADDR43 0x3db +#define CSR_PMPADDR44 0x3dc +#define CSR_PMPADDR45 0x3dd +#define CSR_PMPADDR46 0x3de +#define CSR_PMPADDR47 0x3df +#define CSR_PMPADDR48 0x3e0 +#define CSR_PMPADDR49 0x3e1 +#define CSR_PMPADDR50 0x3e2 +#define CSR_PMPADDR51 0x3e3 +#define CSR_PMPADDR52 0x3e4 +#define CSR_PMPADDR53 0x3e5 +#define CSR_PMPADDR54 0x3e6 +#define CSR_PMPADDR55 0x3e7 +#define CSR_PMPADDR56 0x3e8 +#define CSR_PMPADDR57 0x3e9 +#define CSR_PMPADDR58 0x3ea +#define CSR_PMPADDR59 0x3eb +#define CSR_PMPADDR60 0x3ec +#define CSR_PMPADDR61 0x3ed +#define CSR_PMPADDR62 0x3ee +#define CSR_PMPADDR63 0x3ef /* RNMI */ #define CSR_MNSCRATCH 0x740 diff --git a/target/riscv/csr.c b/target/riscv/csr.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -XXX,XX +XXX,XX @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_PMPCFG1] = { "pmpcfg1", pmp, read_pmpcfg, write_pmpcfg }, [CSR_PMPCFG2] = { "pmpcfg2", pmp, read_pmpcfg, write_pmpcfg }, [CSR_PMPCFG3] = { "pmpcfg3", pmp, read_pmpcfg, write_pmpcfg }, + [CSR_PMPCFG4] = { "pmpcfg4", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG5] = { "pmpcfg5", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG6] = { "pmpcfg6", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG7] = { "pmpcfg7", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG8] = { "pmpcfg8", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG9] = { "pmpcfg9", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG10] = { "pmpcfg10", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG11] = { "pmpcfg11", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG12] = { "pmpcfg12", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG13] = { "pmpcfg13", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG14] = { "pmpcfg14", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG15] = { "pmpcfg15", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, [CSR_PMPADDR0] = { "pmpaddr0", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR1] = { "pmpaddr1", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR2] = { "pmpaddr2", pmp, read_pmpaddr, write_pmpaddr }, @@ -XXX,XX +XXX,XX @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_PMPADDR11] = { "pmpaddr11", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR12] = { "pmpaddr12", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR13] = { "pmpaddr13", pmp, read_pmpaddr, write_pmpaddr }, - [CSR_PMPADDR14] = { "pmpaddr14", pmp, read_pmpaddr, write_pmpaddr }, - [CSR_PMPADDR15] = { "pmpaddr15", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR14] = { "pmpaddr14", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR15] = { "pmpaddr15", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR16] = { "pmpaddr16", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR17] = { "pmpaddr17", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR18] = { "pmpaddr18", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR19] = { "pmpaddr19", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR20] = { "pmpaddr20", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR21] = { "pmpaddr21", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR22] = { "pmpaddr22", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR23] = { "pmpaddr23", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR24] = { "pmpaddr24", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR25] = { "pmpaddr25", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR26] = { "pmpaddr26", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR27] = { "pmpaddr27", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR28] = { "pmpaddr28", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR29] = { "pmpaddr29", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR30] = { "pmpaddr30", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR31] = { "pmpaddr31", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR32] = { "pmpaddr32", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR33] = { "pmpaddr33", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR34] = { "pmpaddr34", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR35] = { "pmpaddr35", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR36] = { "pmpaddr36", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR37] = { "pmpaddr37", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR38] = { "pmpaddr38", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR39] = { "pmpaddr39", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR40] = { "pmpaddr40", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR41] = { "pmpaddr41", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR42] = { "pmpaddr42", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR43] = { "pmpaddr43", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR44] = { "pmpaddr44", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR45] = { "pmpaddr45", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR46] = { "pmpaddr46", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR47] = { "pmpaddr47", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR48] = { "pmpaddr48", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR49] = { "pmpaddr49", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR50] = { "pmpaddr50", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR51] = { "pmpaddr51", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR52] = { "pmpaddr52", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR53] = { "pmpaddr53", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR54] = { "pmpaddr54", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR55] = { "pmpaddr55", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR56] = { "pmpaddr56", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR57] = { "pmpaddr57", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR58] = { "pmpaddr58", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR59] = { "pmpaddr59", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR60] = { "pmpaddr60", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR61] = { "pmpaddr61", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR62] = { "pmpaddr62", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR63] = { "pmpaddr63", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, /* Debug CSRs */ [CSR_TSELECT] = { "tselect", debug, read_tselect, write_tselect }, -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> These properties were deprecated in QEMU 8.2, commit 8043effd9b. Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250530134608.1806922-1-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.h | 1 - target/riscv/cpu.c | 17 ----------------- target/riscv/tcg/tcg-cpu.c | 31 +------------------------------ 3 files changed, 1 insertion(+), 48 deletions(-) diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -XXX,XX +XXX,XX @@ extern const RISCVCPUMultiExtConfig riscv_cpu_extensions[]; extern const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[]; extern const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[]; extern const RISCVCPUMultiExtConfig riscv_cpu_named_features[]; -extern const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[]; typedef struct isa_ext_data { const char *name; diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ const RISCVCPUMultiExtConfig riscv_cpu_named_features[] = { { }, }; -/* Deprecated entries marked for future removal */ -const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[] = { - MULTI_EXT_CFG_BOOL("Zifencei", ext_zifencei, true), - MULTI_EXT_CFG_BOOL("Zicsr", ext_zicsr, true), - MULTI_EXT_CFG_BOOL("Zihintntl", ext_zihintntl, true), - MULTI_EXT_CFG_BOOL("Zihintpause", ext_zihintpause, true), - MULTI_EXT_CFG_BOOL("Zawrs", ext_zawrs, true), - MULTI_EXT_CFG_BOOL("Zfa", ext_zfa, true), - MULTI_EXT_CFG_BOOL("Zfh", ext_zfh, false), - MULTI_EXT_CFG_BOOL("Zfhmin", ext_zfhmin, false), - MULTI_EXT_CFG_BOOL("Zve32f", ext_zve32f, false), - MULTI_EXT_CFG_BOOL("Zve64f", ext_zve64f, false), - MULTI_EXT_CFG_BOOL("Zve64d", ext_zve64d, false), - - { }, -}; - static void cpu_set_prop_err(RISCVCPU *cpu, const char *propname, Error **errp) { diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_add_profiles(Object *cpu_obj) } } -static bool cpu_ext_is_deprecated(const char *ext_name) -{ - return isupper(ext_name[0]); -} - -/* - * String will be allocated in the heap. Caller is responsible - * for freeing it. - */ -static char *cpu_ext_to_lower(const char *ext_name) -{ - char *ret = g_malloc0(strlen(ext_name) + 1); - - strcpy(ret, ext_name); - ret[0] = tolower(ret[0]); - - return ret; -} - static void cpu_set_multi_ext_cfg(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -XXX,XX +XXX,XX @@ static void cpu_set_multi_ext_cfg(Object *obj, Visitor *v, const char *name, return; } - if (cpu_ext_is_deprecated(multi_ext_cfg->name)) { - g_autofree char *lower = cpu_ext_to_lower(multi_ext_cfg->name); - - warn_report("CPU property '%s' is deprecated. Please use '%s' instead", - multi_ext_cfg->name, lower); - } - cpu_cfg_ext_add_user_opt(multi_ext_cfg->offset, value); prev_val = isa_ext_is_enabled(cpu, multi_ext_cfg->offset); @@ -XXX,XX +XXX,XX @@ static void cpu_add_multi_ext_prop(Object *cpu_obj, const RISCVCPUMultiExtConfig *multi_cfg) { bool generic_cpu = riscv_cpu_is_generic(cpu_obj); - bool deprecated_ext = cpu_ext_is_deprecated(multi_cfg->name); object_property_add(cpu_obj, multi_cfg->name, "bool", cpu_get_multi_ext_cfg, cpu_set_multi_ext_cfg, NULL, (void *)multi_cfg); - if (!generic_cpu || deprecated_ext) { + if (!generic_cpu) { return; } @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_add_user_properties(Object *obj) riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_vendor_exts); riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_experimental_exts); - riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_deprecated_exts); - riscv_cpu_add_profiles(obj); } -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> We have support for sdtrig for awhile but we are not advertising it. It is enabled by default via the 'debug' flag. Use the same flag to also advertise sdtrig. Add an exception in disable_priv_spec_isa_exts() to avoid spamming warnings for 'sdtrig' for vendor CPUs like sifive_u. Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250604174329.1147549-2-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.c | 1 + target/riscv/tcg/tcg-cpu.c | 9 +++++++++ tests/data/acpi/riscv64/virt/RHCT | Bin 400 -> 406 bytes 3 files changed, 10 insertions(+) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zvkt, PRIV_VERSION_1_12_0, ext_zvkt), ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), + ISA_EXT_DATA_ENTRY(sdtrig, PRIV_VERSION_1_12_0, debug), ISA_EXT_DATA_ENTRY(shcounterenw, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sha, PRIV_VERSION_1_12_0, ext_sha), ISA_EXT_DATA_ENTRY(shgatpa, PRIV_VERSION_1_12_0, has_priv_1_12), diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu) continue; } + /* + * cpu.debug = true is marked as 'sdtrig', priv spec 1.12. + * Skip this warning since existing CPUs with older priv + * spec and debug = true will be impacted. + */ + if (!strcmp(edata->name, "sdtrig")) { + continue; + } + isa_ext_update_enabled(cpu, edata->ext_enable_offset, false); /* diff --git a/tests/data/acpi/riscv64/virt/RHCT b/tests/data/acpi/riscv64/virt/RHCT index XXXXXXX..XXXXXXX 100644 Binary files a/tests/data/acpi/riscv64/virt/RHCT and b/tests/data/acpi/riscv64/virt/RHCT differ -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> 'ssstrict' is a RVA23 profile-defined extension defined as follows: "No non-conforming extensions are present. Attempts to execute unimplemented opcodes or access unimplemented CSRs in the standard or reserved encoding spaces raises an illegal instruction exception that results in a contained trap to the supervisor-mode trap handler." In short, we need to throw an exception when accessing unimplemented CSRs or opcodes. We do that, so let's advertise it. Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Message-ID: <20250529202315.1684198-3-dbarboza@ventanamicro.com> Message-ID: <20250604174329.1147549-3-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.c | 1 + tests/data/acpi/riscv64/virt/RHCT | Bin 406 -> 416 bytes 2 files changed, 1 insertion(+) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(ssnpm, PRIV_VERSION_1_13_0, ext_ssnpm), ISA_EXT_DATA_ENTRY(sspm, PRIV_VERSION_1_13_0, ext_sspm), ISA_EXT_DATA_ENTRY(ssstateen, PRIV_VERSION_1_12_0, ext_ssstateen), + ISA_EXT_DATA_ENTRY(ssstrict, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sstc, PRIV_VERSION_1_12_0, ext_sstc), ISA_EXT_DATA_ENTRY(sstvala, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sstvecd, PRIV_VERSION_1_12_0, has_priv_1_12), diff --git a/tests/data/acpi/riscv64/virt/RHCT b/tests/data/acpi/riscv64/virt/RHCT index XXXXXXX..XXXXXXX 100644 Binary files a/tests/data/acpi/riscv64/virt/RHCT and b/tests/data/acpi/riscv64/virt/RHCT differ -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Most of the named features are added directly in isa_edata_arr[], some of them are also added in riscv_cpu_named_features(). There is a reason for that, and the existing docs can do better explaining it. Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Message-ID: <20250529202315.1684198-4-dbarboza@ventanamicro.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250604174329.1147549-4-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = { * 'Named features' is the name we give to extensions that we * don't want to expose to users. They are either immutable * (always enabled/disable) or they'll vary depending on - * the resulting CPU state. They have riscv,isa strings - * and priv_ver like regular extensions. + * the resulting CPU state. + * + * Some of them are always enabled depending on priv version + * of the CPU and are declared directly in isa_edata_arr[]. + * The ones listed here have special checks during finalize() + * time and require their own flags like regular extensions. + * See riscv_cpu_update_named_features() for more info. */ const RISCVCPUMultiExtConfig riscv_cpu_named_features[] = { MULTI_EXT_CFG_BOOL("zic64b", ext_zic64b, true), MULTI_EXT_CFG_BOOL("ssstateen", ext_ssstateen, true), MULTI_EXT_CFG_BOOL("sha", ext_sha, true), + + /* + * 'ziccrse' has its own flag because the KVM driver + * wants to enable/disable it on its own accord. + */ MULTI_EXT_CFG_BOOL("ziccrse", ext_ziccrse, true), { }, -- 2.50.0
From: Jim Shu <jim.shu@sifive.com> Support 4-byte atomic instruction fetch when instruction is natural aligned. Current implementation is not atomic because it loads instruction twice for first and last 2 bytes. We load 4 bytes at once to keep the atomicity. This instruction preload method only applys when instruction is 4-byte aligned. If instruction is unaligned, it could be across pages so that preload will trigger additional page fault. We encounter this issue when doing pressure test of enabling & disabling Linux kernel ftrace. Ftrace with kernel preemption requires concurrent modification and execution of instruction, so non-atomic instruction fetch will cause the race condition. We may fetch the wrong instruction which is the mixing of 2 instructions. Also, RISC-V Profile wants to provide this feature by HW. RVA20U64 Ziccif protects the atomicity of instruction fetch when it is natural aligned. This commit depends on the atomic read support of translator_ld in the commit 6a9dfe1984b0c593fb0ddb52d4e70832e6201dd6. Signed-off-by: Jim Shu <jim.shu@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250508094838.19394-1-jim.shu@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/translate.c | 46 +++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/target/riscv/translate.c b/target/riscv/translate.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -XXX,XX +XXX,XX @@ const RISCVDecoder decoder_table[] = { const size_t decoder_table_size = ARRAY_SIZE(decoder_table); -static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) +static void decode_opc(CPURISCVState *env, DisasContext *ctx) { + uint32_t opcode; + bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0); + ctx->virt_inst_excp = false; - ctx->cur_insn_len = insn_len(opcode); + if (pc_is_4byte_align) { + /* + * Load 4 bytes at once to make instruction fetch atomically. + * + * Note: When pc is 4-byte aligned, 4-byte instruction wouldn't be + * across pages. We could preload 4 bytes instruction no matter + * real one is 2 or 4 bytes. Instruction preload wouldn't trigger + * additional page fault. + */ + opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next); + } else { + /* + * For unaligned pc, instruction preload may trigger additional + * page fault so we only load 2 bytes here. + */ + opcode = (uint32_t) translator_lduw(env, &ctx->base, ctx->base.pc_next); + } + ctx->ol = ctx->xl; + + ctx->cur_insn_len = insn_len((uint16_t)opcode); /* Check for compressed insn */ if (ctx->cur_insn_len == 2) { - ctx->opcode = opcode; + ctx->opcode = (uint16_t)opcode; /* * The Zca extension is added as way to refer to instructions in the C * extension that do not include the floating-point loads and stores @@ -XXX,XX +XXX,XX @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) return; } } else { - uint32_t opcode32 = opcode; - opcode32 = deposit32(opcode32, 16, 16, - translator_lduw(env, &ctx->base, - ctx->base.pc_next + 2)); - ctx->opcode = opcode32; + if (!pc_is_4byte_align) { + /* Load last 2 bytes of instruction here */ + opcode = deposit32(opcode, 16, 16, + translator_lduw(env, &ctx->base, + ctx->base.pc_next + 2)); + } + ctx->opcode = opcode; for (guint i = 0; i < ctx->decoders->len; ++i) { riscv_cpu_decode_fn func = g_ptr_array_index(ctx->decoders, i); - if (func(ctx, opcode32)) { + if (func(ctx, opcode)) { return; } } @@ -XXX,XX +XXX,XX @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *ctx = container_of(dcbase, DisasContext, base); CPURISCVState *env = cpu_env(cpu); - uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next); - ctx->ol = ctx->xl; - decode_opc(env, ctx, opcode16); + decode_opc(env, ctx); ctx->base.pc_next += ctx->cur_insn_len; /* -- 2.50.0
From: Meng Zhuo <mengzhuo@iscas.ac.cn> This patch adds max_satp_mode from host kvm cpu setting. Tested on: Milkv Megrez (Eswin 7700x) Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2931 Signed-off-by: Meng Zhuo <mengzhuo@iscas.ac.cn> Message-ID: <20250606034250.181707-1-mengzhuo@iscas.ac.cn> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/kvm/kvm-cpu.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -XXX,XX +XXX,XX @@ static void kvm_riscv_destroy_scratch_vcpu(KVMScratchCPU *scratch) close(scratch->kvmfd); } +static void kvm_riscv_init_max_satp_mode(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) +{ + struct kvm_one_reg reg; + int ret; + + reg.id = RISCV_CONFIG_REG(satp_mode); + reg.addr = (uint64_t)&cpu->cfg.max_satp_mode; + ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); + if (ret != 0) { + error_report("Unable to retrieve satp mode from host, error %d", ret); + } +} + static void kvm_riscv_init_machine_ids(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) { struct kvm_one_reg reg; @@ -XXX,XX +XXX,XX @@ static void riscv_init_kvm_registers(Object *cpu_obj) kvm_riscv_init_machine_ids(cpu, &kvmcpu); kvm_riscv_init_misa_ext_mask(cpu, &kvmcpu); kvm_riscv_init_cfg(cpu, &kvmcpu); + kvm_riscv_init_max_satp_mode(cpu, &kvmcpu); kvm_riscv_destroy_scratch_vcpu(&kvmcpu); } @@ -XXX,XX +XXX,XX @@ static bool kvm_cpu_realize(CPUState *cs, Error **errp) } } - return true; + return true; } void riscv_kvm_cpu_finalize_features(RISCVCPU *cpu, Error **errp) -- 2.50.0
From: Jay Chang <jay.chang@sifive.com> Previously, the number of PMP regions was hardcoded to 16 in QEMU. This patch replaces the fixed value with a new `pmp_regions` field, allowing platforms to configure the number of PMP regions. If no specific value is provided, the default number of PMP regions remains 16 to preserve the existing behavior. A new CPU parameter num-pmp-regions has been introduced to the QEMU command line. For example: -cpu rv64, g=true, c=true, pmp=true, num-pmp-regions=8 Signed-off-by: Jay Chang <jay.chang@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250606072525.17313-3-jay.chang@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.h | 3 +- target/riscv/cpu_cfg_fields.h.inc | 1 + target/riscv/cpu.c | 48 +++++++++++++++++++++++++++++-- target/riscv/csr.c | 5 +++- target/riscv/machine.c | 3 +- target/riscv/pmp.c | 28 ++++++++++++------ 6 files changed, 74 insertions(+), 14 deletions(-) diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -XXX,XX +XXX,XX @@ extern RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[]; #define MMU_USER_IDX 3 -#define MAX_RISCV_PMPS (16) +#define MAX_RISCV_PMPS (64) +#define OLD_MAX_RISCV_PMPS (16) #if !defined(CONFIG_USER_ONLY) #include "pmp.h" diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_cfg_fields.h.inc +++ b/target/riscv/cpu_cfg_fields.h.inc @@ -XXX,XX +XXX,XX @@ TYPED_FIELD(uint16_t, elen, 0) TYPED_FIELD(uint16_t, cbom_blocksize, 0) TYPED_FIELD(uint16_t, cbop_blocksize, 0) TYPED_FIELD(uint16_t, cboz_blocksize, 0) +TYPED_FIELD(uint8_t, pmp_regions, 0) TYPED_FIELD(int8_t, max_satp_mode, -1) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_init(Object *obj) cpu->cfg.cbom_blocksize = 64; cpu->cfg.cbop_blocksize = 64; cpu->cfg.cboz_blocksize = 64; + cpu->cfg.pmp_regions = 16; cpu->env.vext_ver = VEXT_VERSION_1_00_0; cpu->cfg.max_satp_mode = -1; @@ -XXX,XX +XXX,XX @@ static const PropertyInfo prop_pmp = { .set = prop_pmp_set, }; +static void prop_num_pmp_regions_set(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVCPU *cpu = RISCV_CPU(obj); + uint8_t value; + + visit_type_uint8(v, name, &value, errp); + + if (cpu->cfg.pmp_regions != value && riscv_cpu_is_vendor(obj)) { + cpu_set_prop_err(cpu, name, errp); + return; + } + + if (cpu->env.priv_ver < PRIV_VERSION_1_12_0 && value > OLD_MAX_RISCV_PMPS) { + error_setg(errp, "Number of PMP regions exceeds maximum available"); + return; + } else if (value > MAX_RISCV_PMPS) { + error_setg(errp, "Number of PMP regions exceeds maximum available"); + return; + } + + cpu_option_add_user_setting(name, value); + cpu->cfg.pmp_regions = value; +} + +static void prop_num_pmp_regions_get(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t value = RISCV_CPU(obj)->cfg.pmp_regions; + + visit_type_uint8(v, name, &value, errp); +} + +static const PropertyInfo prop_num_pmp_regions = { + .type = "uint8", + .description = "num-pmp-regions", + .get = prop_num_pmp_regions_get, + .set = prop_num_pmp_regions_set, +}; + static int priv_spec_from_str(const char *priv_spec_str) { int priv_version = -1; @@ -XXX,XX +XXX,XX @@ static const Property riscv_cpu_properties[] = { {.name = "mmu", .info = &prop_mmu}, {.name = "pmp", .info = &prop_pmp}, + {.name = "num-pmp-regions", .info = &prop_num_pmp_regions}, {.name = "priv_spec", .info = &prop_priv_spec}, {.name = "vext_spec", .info = &prop_vext_spec}, @@ -XXX,XX +XXX,XX @@ static const TypeInfo riscv_cpu_type_infos[] = { .cfg.max_satp_mode = VM_1_10_MBARE, .cfg.ext_zifencei = true, .cfg.ext_zicsr = true, - .cfg.pmp = true + .cfg.pmp = true, + .cfg.pmp_regions = 8 ), DEFINE_ABSTRACT_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_U, TYPE_RISCV_VENDOR_CPU, @@ -XXX,XX +XXX,XX @@ static const TypeInfo riscv_cpu_type_infos[] = { .cfg.ext_zifencei = true, .cfg.ext_zicsr = true, .cfg.mmu = true, - .cfg.pmp = true + .cfg.pmp = true, + .cfg.pmp_regions = 8 ), #if defined(TARGET_RISCV32) || \ diff --git a/target/riscv/csr.c b/target/riscv/csr.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -XXX,XX +XXX,XX @@ static RISCVException dbltrp_hmode(CPURISCVState *env, int csrno) static RISCVException pmp(CPURISCVState *env, int csrno) { if (riscv_cpu_cfg(env)->pmp) { - if (csrno <= CSR_PMPCFG3) { + int max_pmpcfg = (env->priv_ver >= PRIV_VERSION_1_12_0) ? ++ CSR_PMPCFG15 : CSR_PMPCFG3; + + if (csrno <= max_pmpcfg) { uint32_t reg_index = csrno - CSR_PMPCFG0; /* TODO: RV128 restriction check */ diff --git a/target/riscv/machine.c b/target/riscv/machine.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/machine.c +++ b/target/riscv/machine.c @@ -XXX,XX +XXX,XX @@ static int pmp_post_load(void *opaque, int version_id) RISCVCPU *cpu = opaque; CPURISCVState *env = &cpu->env; int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { pmp_update_rule_addr(env, i); } pmp_update_rule_nums(env); diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/pmp.c +++ b/target/riscv/pmp.c @@ -XXX,XX +XXX,XX @@ uint32_t pmp_get_num_rules(CPURISCVState *env) */ static inline uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t pmp_index) { - if (pmp_index < MAX_RISCV_PMPS) { + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; + + if (pmp_index < pmp_regions) { return env->pmp_state.pmp[pmp_index].cfg_reg; } @@ -XXX,XX +XXX,XX @@ static inline uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t pmp_index) */ static bool pmp_write_cfg(CPURISCVState *env, uint32_t pmp_index, uint8_t val) { - if (pmp_index < MAX_RISCV_PMPS) { + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; + + if (pmp_index < pmp_regions) { if (env->pmp_state.pmp[pmp_index].cfg_reg == val) { /* no change */ return false; @@ -XXX,XX +XXX,XX @@ void pmp_update_rule_addr(CPURISCVState *env, uint32_t pmp_index) void pmp_update_rule_nums(CPURISCVState *env) { int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; env->pmp_state.num_rules = 0; - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { const uint8_t a_field = pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg); if (PMP_AMATCH_OFF != a_field) { @@ -XXX,XX +XXX,XX @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, int pmp_size = 0; hwaddr s = 0; hwaddr e = 0; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* Short cut if no rules */ if (0 == pmp_get_num_rules(env)) { @@ -XXX,XX +XXX,XX @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, * 1.10 draft priv spec states there is an implicit order * from low to high */ - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { s = pmp_is_in_range(env, i, addr); e = pmp_is_in_range(env, i, addr + pmp_size - 1); @@ -XXX,XX +XXX,XX @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, { trace_pmpaddr_csr_write(env->mhartid, addr_index, val); bool is_next_cfg_tor = false; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - if (addr_index < MAX_RISCV_PMPS) { + if (addr_index < pmp_regions) { if (env->pmp_state.pmp[addr_index].addr_reg == val) { /* no change */ return; @@ -XXX,XX +XXX,XX @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, * In TOR mode, need to check the lock bit of the next pmp * (if there is a next). */ - if (addr_index + 1 < MAX_RISCV_PMPS) { + if (addr_index + 1 < pmp_regions) { uint8_t pmp_cfg = env->pmp_state.pmp[addr_index + 1].cfg_reg; is_next_cfg_tor = PMP_AMATCH_TOR == pmp_get_a_field(pmp_cfg); @@ -XXX,XX +XXX,XX @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, target_ulong pmpaddr_csr_read(CPURISCVState *env, uint32_t addr_index) { target_ulong val = 0; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - if (addr_index < MAX_RISCV_PMPS) { + if (addr_index < pmp_regions) { val = env->pmp_state.pmp[addr_index].addr_reg; trace_pmpaddr_csr_read(env->mhartid, addr_index, val); } else { @@ -XXX,XX +XXX,XX @@ void mseccfg_csr_write(CPURISCVState *env, target_ulong val) { int i; uint64_t mask = MSECCFG_MMWP | MSECCFG_MML; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* Update PMM field only if the value is valid according to Zjpm v1.0 */ if (riscv_cpu_cfg(env)->ext_smmpm && riscv_cpu_mxl(env) == MXL_RV64 && @@ -XXX,XX +XXX,XX @@ void mseccfg_csr_write(CPURISCVState *env, target_ulong val) /* RLB cannot be enabled if it's already 0 and if any regions are locked */ if (!MSECCFG_RLB_ISSET(env)) { - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { if (pmp_is_locked(env, i)) { val &= ~MSECCFG_RLB; break; @@ -XXX,XX +XXX,XX @@ target_ulong pmp_get_tlb_size(CPURISCVState *env, hwaddr addr) hwaddr tlb_sa = addr & ~(TARGET_PAGE_SIZE - 1); hwaddr tlb_ea = tlb_sa + TARGET_PAGE_SIZE - 1; int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* * If PMP is not supported or there are no PMP rules, the TLB page will not @@ -XXX,XX +XXX,XX @@ target_ulong pmp_get_tlb_size(CPURISCVState *env, hwaddr addr) return TARGET_PAGE_SIZE; } - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { if (pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg) == PMP_AMATCH_OFF) { continue; } -- 2.50.0
From: Nutty Liu <liujingqi@lanxincomputing.com> The original implementation incorrectly performed a bitwise AND operation between the PPN of iova and PPN Mask, leading to an incorrect PPN field in Translation-reponse register. The PPN of iova should be set entirely in the PPN field of Translation-reponse register. Also remove the code that was used to clear S field since this field is already zero. Signed-off-by: Nutty Liu <liujingqi@lanxincomputing.com> Reviewed-by: Tomasz Jeznach <tjeznach@rivosinc.com> Message-ID: <20250605124848.1248-1-liujingqi@lanxincomputing.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/riscv-iommu.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/riscv-iommu.c +++ b/hw/riscv/riscv-iommu.c @@ -XXX,XX +XXX,XX @@ static void riscv_iommu_process_dbg(RISCVIOMMUState *s) iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); } else { iova = iotlb.translated_addr & ~iotlb.addr_mask; - iova >>= TARGET_PAGE_BITS; - iova &= RISCV_IOMMU_TR_RESPONSE_PPN; - - /* We do not support superpages (> 4kbs) for now */ - iova &= ~RISCV_IOMMU_TR_RESPONSE_S; + iova = set_field(0, RISCV_IOMMU_TR_RESPONSE_PPN, PPN_DOWN(iova)); } riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); } -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> The SBI spec states, for console write byte: "This is a blocking SBI call and it will only return after writing the specified byte to the debug console. It will also return, with SBI_ERR_FAILED, if there are I/O errors." Being a blocker call will either succeed writing the byte or error out, it's feasible to use the blocking qemu_chr_fe_write_all() instead of qemu_chr_fe_write(). Last but not the least, we will duck possible changes in qemu_chr_fe_write() where ret = 0 will have a 'zero byte written' semantic [1] - something that we're not ready to deal in this current state. [1] https://lore.kernel.org/qemu-devel/CAFEAcA_kEndvNtw4EHySXWwQPoGs029yAzZGGBcV=zGHaj7KUQ@mail.gmail.com/ Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> Message-ID: <20250605094456.1385105-2-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/kvm/kvm-cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -XXX,XX +XXX,XX @@ static void kvm_riscv_handle_sbi_dbcn(CPUState *cs, struct kvm_run *run) break; case SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: ch = run->riscv_sbi.args[0]; - ret = qemu_chr_fe_write(serial_hd(0)->be, &ch, sizeof(ch)); + ret = qemu_chr_fe_write_all(serial_hd(0)->be, &ch, sizeof(ch)); if (ret < 0) { error_report("SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: error when " -- 2.50.0
From: Anton Blanchard <antonb@tenstorrent.com> fcvt.s.bf16 uses the FP16 check_nanbox_h() which returns an FP16 quiet NaN. Add check_nanbox_bf16() which returns a BF16 quiet NaN. Signed-off-by: Anton Blanchard <antonb@tenstorrent.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250501114253.594887-1-antonb@tenstorrent.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/internals.h | 16 ++++++++++++++++ target/riscv/fpu_helper.c | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/target/riscv/internals.h b/target/riscv/internals.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -XXX,XX +XXX,XX @@ static inline float16 check_nanbox_h(CPURISCVState *env, uint64_t f) } } +static inline float16 check_nanbox_bf16(CPURISCVState *env, uint64_t f) +{ + /* Disable nanbox check when enable zfinx */ + if (env_archcpu(env)->cfg.ext_zfinx) { + return (uint16_t)f; + } + + uint64_t mask = MAKE_64BIT_MASK(16, 48); + + if (likely((f & mask) == mask)) { + return (uint16_t)f; + } else { + return 0x7FC0u; /* default qnan */ + } +} + #ifndef CONFIG_USER_ONLY /* Our implementation of SysemuCPUOps::has_work */ bool riscv_cpu_has_work(CPUState *cs); diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/fpu_helper.c +++ b/target/riscv/fpu_helper.c @@ -XXX,XX +XXX,XX @@ uint64_t helper_fcvt_bf16_s(CPURISCVState *env, uint64_t rs1) uint64_t helper_fcvt_s_bf16(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_h(env, rs1); + float16 frs1 = check_nanbox_bf16(env, rs1); return nanbox_s(env, bfloat16_to_float32(frs1, &env->fp_status)); } -- 2.50.0
From: Florian Lugou <florian.lugou@provenrun.com> The current handler for TXFIFO writes schedules an async callback to pop characters from the queue. When software writes to TXFIFO faster than the async callback delay (100ns), the timer may be pushed back while the previous character has not be dequeued yet. This happens in particular when using -icount with small shift values. This is especially worrysome when software repetitively issues amoor.w instructions (as suggested by SiFive specification) and the FIFO is full, leading to the callback being infinitly pushed back. This commit fixes the issue by never pushing back the timer, only updating it if it is not already active. Signed-off-by: Florian Lugou <florian.lugou@provenrun.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250605101255.797162-1-florian.lugou@provenrun.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/char/sifive_uart.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/char/sifive_uart.c b/hw/char/sifive_uart.c index XXXXXXX..XXXXXXX 100644 --- a/hw/char/sifive_uart.c +++ b/hw/char/sifive_uart.c @@ -XXX,XX +XXX,XX @@ static void sifive_uart_write_tx_fifo(SiFiveUARTState *s, const uint8_t *buf, s->txfifo |= SIFIVE_UART_TXFIFO_FULL; } - timer_mod(s->fifo_trigger_handle, current_time + - TX_INTERRUPT_TRIGGER_DELAY_NS); + if (!timer_pending(s->fifo_trigger_handle)) { + timer_mod(s->fifo_trigger_handle, current_time + + TX_INTERRUPT_TRIGGER_DELAY_NS); + } } static uint64_t -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The address is a hardware address, so use hwaddr for consistency with the rest of the machine. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-2-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_clint(RISCVVirtState *s, int cpu; g_autofree char *clint_name = NULL; g_autofree uint32_t *clint_cells = NULL; - unsigned long clint_addr; + hwaddr clint_addr; MachineState *ms = MACHINE(s); static const char * const clint_compat[2] = { "sifive,clint0", "riscv,clint0" @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_clint(RISCVVirtState *s, } clint_addr = s->memmap[VIRT_CLINT].base + - (s->memmap[VIRT_CLINT].size * socket); - clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr); + s->memmap[VIRT_CLINT].size * socket; + clint_name = g_strdup_printf("/soc/clint@%"HWADDR_PRIx, clint_addr); qemu_fdt_add_subnode(ms->fdt, clint_name); qemu_fdt_setprop_string_array(ms->fdt, clint_name, "compatible", (char **)&clint_compat, -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-3-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_clint(RISCVVirtState *s, qemu_fdt_setprop_string_array(ms->fdt, clint_name, "compatible", (char **)&clint_compat, ARRAY_SIZE(clint_compat)); - qemu_fdt_setprop_cells(ms->fdt, clint_name, "reg", - 0x0, clint_addr, 0x0, s->memmap[VIRT_CLINT].size); + qemu_fdt_setprop_sized_cells(ms->fdt, clint_name, "reg", + 2, clint_addr, 2, s->memmap[VIRT_CLINT].size); qemu_fdt_setprop(ms->fdt, clint_name, "interrupts-extended", clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); riscv_socket_fdt_write_id(ms, clint_name, socket); -- 2.50.0
From: Joel Stanley <joel@jms.id.au> Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-4-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_memory(RISCVVirtState *s, int socket) size = riscv_socket_mem_size(ms, socket); mem_name = g_strdup_printf("/memory@%"HWADDR_PRIx, addr); qemu_fdt_add_subnode(ms->fdt, mem_name); - qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg", - addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_sized_cells(ms->fdt, mem_name, "reg", 2, addr, 2, size); qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory"); riscv_socket_fdt_write_id(ms, mem_name, socket); } -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-5-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle); } - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", - 0x0, aplic_addr, 0x0, aplic_size); + qemu_fdt_setprop_sized_cells(ms->fdt, aplic_name, "reg", + 2, aplic_addr, 2, aplic_size); qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", VIRT_IRQCHIP_NUM_SOURCES); -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-6-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aclint(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-mswi"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr, 2, RISCV_ACLINT_SWI_SIZE); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_mswi_cells, aclint_cells_size); qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0); @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aclint(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-mtimer"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr + RISCV_ACLINT_DEFAULT_MTIME, - 0x0, size - RISCV_ACLINT_DEFAULT_MTIME, - 0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP, - 0x0, RISCV_ACLINT_DEFAULT_MTIME); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr + RISCV_ACLINT_DEFAULT_MTIME, + 2, size - RISCV_ACLINT_DEFAULT_MTIME, + 2, addr + RISCV_ACLINT_DEFAULT_MTIMECMP, + 2, RISCV_ACLINT_DEFAULT_MTIME); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_mtimer_cells, aclint_cells_size); riscv_socket_fdt_write_id(ms, name, socket); @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aclint(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-sswi"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, 0x0, s->memmap[VIRT_ACLINT_SSWI].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr, 2, s->memmap[VIRT_ACLINT_SSWI].size); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_sswi_cells, aclint_cells_size); qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0); -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-7-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_plic(RISCVVirtState *s, s->soc[socket].num_harts * sizeof(uint32_t) * 4); } - qemu_fdt_setprop_cells(ms->fdt, plic_name, "reg", - 0x0, plic_addr, 0x0, s->memmap[VIRT_PLIC].size); + qemu_fdt_setprop_sized_cells(ms->fdt, plic_name, "reg", + 2, plic_addr, 2, s->memmap[VIRT_PLIC].size); qemu_fdt_setprop_cell(ms->fdt, plic_name, "riscv,ndev", VIRT_IRQCHIP_NUM_SOURCES - 1); riscv_socket_fdt_write_id(ms, plic_name, socket); -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-8-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_virtio(RISCVVirtState *s, uint32_t irq_virtio_phandle) qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "virtio,mmio"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, - 0x0, size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2, addr, 2, size); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_virtio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-9-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_reset(RISCVVirtState *s, uint32_t *phandle) qemu_fdt_setprop_string_array(ms->fdt, name, "compatible", (char **)&compat, ARRAY_SIZE(compat)); } - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, s->memmap[VIRT_TEST].base, 0x0, s->memmap[VIRT_TEST].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_TEST].base, + 2, s->memmap[VIRT_TEST].size); qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle); test_phandle = qemu_fdt_get_phandle(ms->fdt, name); g_free(name); -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-10-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_uart(RISCVVirtState *s, s->memmap[VIRT_UART0].base); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, s->memmap[VIRT_UART0].base, - 0x0, s->memmap[VIRT_UART0].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_UART0].base, + 2, s->memmap[VIRT_UART0].size); qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-11-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_rtc(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "google,goldfish-rtc"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, s->memmap[VIRT_RTC].base, 0x0, s->memmap[VIRT_RTC].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_RTC].base, + 2, s->memmap[VIRT_RTC].size); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-12-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip, qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1); qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle); - qemu_fdt_setprop_cells(fdt, iommu_node, "reg", - addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_sized_cells(fdt, iommu_node, "reg", 2, addr, 2, size); qemu_fdt_setprop_cell(fdt, iommu_node, "interrupt-parent", irq_chip); qemu_fdt_setprop_cells(fdt, iommu_node, "interrupts", -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-13-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_pcie(RISCVVirtState *s, if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent", msi_pcie_phandle); } - qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0, - s->memmap[VIRT_PCIE_ECAM].base, 0, s->memmap[VIRT_PCIE_ECAM].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2, + s->memmap[VIRT_PCIE_ECAM].base, 2, s->memmap[VIRT_PCIE_ECAM].size); qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges", 1, FDT_PCI_RANGE_IOPORT, 2, 0, 2, s->memmap[VIRT_PCIE_PIO].base, 2, s->memmap[VIRT_PCIE_PIO].size, -- 2.50.0
From: Huang Borong <3543977024@qq.com> Add a CPU entry for the Xiangshan Kunminghu CPU, an open-source, high-performance RISC-V processor. More details can be found at: https://github.com/OpenXiangShan/XiangShan Note: The ISA extensions supported by the Xiangshan Kunminghu CPU are categorized based on four RISC-V specifications: Volume I: Unprivileged Architecture, Volume II: Privileged Architecture, AIA, and RVA23. The extensions within each category are organized according to the chapter order in the specifications. Signed-off-by: Yu Hu <huyu@bosc.ac.cn> Signed-off-by: Ran Wang <wangran@bosc.ac.cn> Signed-off-by: Borong Huang <3543977024@qq.com> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250425122212.364-1-wangran@bosc.ac.cn> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu-qom.h | 1 + target/riscv/cpu.c | 58 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu-qom.h +++ b/target/riscv/cpu-qom.h @@ -XXX,XX +XXX,XX @@ #define TYPE_RISCV_CPU_VEYRON_V1 RISCV_CPU_TYPE_NAME("veyron-v1") #define TYPE_RISCV_CPU_TT_ASCALON RISCV_CPU_TYPE_NAME("tt-ascalon") #define TYPE_RISCV_CPU_XIANGSHAN_NANHU RISCV_CPU_TYPE_NAME("xiangshan-nanhu") +#define TYPE_RISCV_CPU_XIANGSHAN_KMH RISCV_CPU_TYPE_NAME("xiangshan-kunminghu") #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host") OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ static const TypeInfo riscv_cpu_type_infos[] = { .cfg.max_satp_mode = VM_1_10_SV39, ), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_XIANGSHAN_KMH, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVG | RVC | RVB | RVS | RVU | RVH | RVV, + .priv_spec = PRIV_VERSION_1_13_0, + /* + * The RISC-V Instruction Set Manual: Volume I + * Unprivileged Architecture + */ + .cfg.ext_zicntr = true, + .cfg.ext_zihpm = true, + .cfg.ext_zihintntl = true, + .cfg.ext_zihintpause = true, + .cfg.ext_zimop = true, + .cfg.ext_zcmop = true, + .cfg.ext_zicond = true, + .cfg.ext_zawrs = true, + .cfg.ext_zacas = true, + .cfg.ext_zfh = true, + .cfg.ext_zfa = true, + .cfg.ext_zcb = true, + .cfg.ext_zbc = true, + .cfg.ext_zvfh = true, + .cfg.ext_zkn = true, + .cfg.ext_zks = true, + .cfg.ext_zkt = true, + .cfg.ext_zvbb = true, + .cfg.ext_zvkt = true, + /* + * The RISC-V Instruction Set Manual: Volume II + * Privileged Architecture + */ + .cfg.ext_smstateen = true, + .cfg.ext_smcsrind = true, + .cfg.ext_sscsrind = true, + .cfg.ext_svnapot = true, + .cfg.ext_svpbmt = true, + .cfg.ext_svinval = true, + .cfg.ext_sstc = true, + .cfg.ext_sscofpmf = true, + .cfg.ext_ssdbltrp = true, + .cfg.ext_ssnpm = true, + .cfg.ext_smnpm = true, + .cfg.ext_smmpm = true, + .cfg.ext_sspm = true, + .cfg.ext_supm = true, + /* The RISC-V Advanced Interrupt Architecture */ + .cfg.ext_smaia = true, + .cfg.ext_ssaia = true, + /* RVA23 Profiles */ + .cfg.ext_zicbom = true, + .cfg.ext_zicbop = true, + .cfg.ext_zicboz = true, + .cfg.ext_svade = true, + .cfg.mmu = true, + .cfg.pmp = true, + .cfg.max_satp_mode = VM_1_10_SV48, + ), + #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) DEFINE_RISCV_CPU(TYPE_RISCV_CPU_BASE128, TYPE_RISCV_DYNAMIC_CPU, .cfg.max_satp_mode = VM_1_10_SV57, -- 2.50.0
From: Huang Borong <3543977024@qq.com> This implementation provides emulation for the Xiangshan Kunminghu FPGA prototype platform, including support for UART, CLINT, IMSIC, and APLIC devices. More details can be found at https://github.com/OpenXiangShan/XiangShan Signed-off-by: qinshaoqing <qinshaoqing@bosc.ac.cn> Signed-off-by: Yang Wang <wangyang@bosc.ac.cn> Signed-off-by: Yu Hu <819258943@qq.com> Signed-off-by: Ran Wang <wangran@bosc.ac.cn> Signed-off-by: Borong Huang <3543977024@qq.com> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Message-ID: <20250617074222.17618-1-wangran@bosc.ac.cn> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- MAINTAINERS | 7 + docs/system/riscv/xiangshan-kunminghu.rst | 39 ++++ docs/system/target-riscv.rst | 1 + configs/devices/riscv64-softmmu/default.mak | 1 + include/hw/riscv/xiangshan_kmh.h | 68 ++++++ hw/riscv/xiangshan_kmh.c | 220 ++++++++++++++++++++ hw/riscv/Kconfig | 9 + hw/riscv/meson.build | 1 + 8 files changed, 346 insertions(+) create mode 100644 docs/system/riscv/xiangshan-kunminghu.rst create mode 100644 include/hw/riscv/xiangshan_kmh.h create mode 100644 hw/riscv/xiangshan_kmh.c diff --git a/MAINTAINERS b/MAINTAINERS index XXXXXXX..XXXXXXX 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -XXX,XX +XXX,XX @@ S: Maintained F: hw/riscv/microblaze-v-generic.c F: docs/system/riscv/microblaze-v-generic.rst +Xiangshan Kunminghu +M: Ran Wang <wangran@bosc.ac.cn> +S: Maintained +F: docs/system/riscv/xiangshan-kunminghu.rst +F: hw/riscv/xiangshan_kmh.c +F: include/hw/riscv/xiangshan_kmh.h + RX Machines ----------- rx-gdbsim diff --git a/docs/system/riscv/xiangshan-kunminghu.rst b/docs/system/riscv/xiangshan-kunminghu.rst new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/docs/system/riscv/xiangshan-kunminghu.rst @@ -XXX,XX +XXX,XX @@ +BOSC Xiangshan Kunminghu FPGA prototype platform (``xiangshan-kunminghu``) +========================================================================== +The ``xiangshan-kunminghu`` machine is compatible with our FPGA prototype +platform. + +XiangShan is an open-source high-performance RISC-V processor project. +The third generation processor is called Kunminghu. Kunminghu is a 64-bit +RV64GCBSUHV processor core. More information can be found in our Github +repository: +https://github.com/OpenXiangShan/XiangShan + +Supported devices +----------------- +The ``xiangshan-kunminghu`` machine supports the following devices: + +* Up to 16 xiangshan-kunminghu cores +* Core Local Interruptor (CLINT) +* Incoming MSI Controller (IMSIC) +* Advanced Platform-Level Interrupt Controller (APLIC) +* 1 UART + +Boot options +------------ +The ``xiangshan-kunminghu`` machine can start using the standard ``-bios`` +functionality for loading the boot image. You need to compile and link +the firmware, kernel, and Device Tree (FDT) into a single binary file, +such as ``fw_payload.bin``. + +Running +------- +Below is an example command line for running the ``xiangshan-kunminghu`` +machine: + +.. code-block:: bash + + $ qemu-system-riscv64 -machine xiangshan-kunminghu \ + -smp 16 -m 16G \ + -bios path/to/opensbi/platform/generic/firmware/fw_payload.bin \ + -nographic diff --git a/docs/system/target-riscv.rst b/docs/system/target-riscv.rst index XXXXXXX..XXXXXXX 100644 --- a/docs/system/target-riscv.rst +++ b/docs/system/target-riscv.rst @@ -XXX,XX +XXX,XX @@ undocumented; you can get a complete list by running riscv/shakti-c riscv/sifive_u riscv/virt + riscv/xiangshan-kunminghu RISC-V CPU firmware ------------------- diff --git a/configs/devices/riscv64-softmmu/default.mak b/configs/devices/riscv64-softmmu/default.mak index XXXXXXX..XXXXXXX 100644 --- a/configs/devices/riscv64-softmmu/default.mak +++ b/configs/devices/riscv64-softmmu/default.mak @@ -XXX,XX +XXX,XX @@ # CONFIG_RISCV_VIRT=n # CONFIG_MICROCHIP_PFSOC=n # CONFIG_SHAKTI_C=n +# CONFIG_XIANGSHAN_KUNMINGHU=n diff --git a/include/hw/riscv/xiangshan_kmh.h b/include/hw/riscv/xiangshan_kmh.h new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/include/hw/riscv/xiangshan_kmh.h @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * QEMU RISC-V Board Compatible with the Xiangshan Kunminghu + * FPGA prototype platform + * + * Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) + * + */ + +#ifndef HW_XIANGSHAN_KMH_H +#define HW_XIANGSHAN_KMH_H + +#include "hw/boards.h" +#include "hw/riscv/riscv_hart.h" + +#define XIANGSHAN_KMH_MAX_CPUS 16 + +typedef struct XiangshanKmhSoCState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + RISCVHartArrayState cpus; + DeviceState *irqchip; + MemoryRegion rom; +} XiangshanKmhSoCState; + +#define TYPE_XIANGSHAN_KMH_SOC "xiangshan.kunminghu.soc" +DECLARE_INSTANCE_CHECKER(XiangshanKmhSoCState, XIANGSHAN_KMH_SOC, + TYPE_XIANGSHAN_KMH_SOC) + +typedef struct XiangshanKmhState { + /*< private >*/ + MachineState parent_obj; + + /*< public >*/ + XiangshanKmhSoCState soc; +} XiangshanKmhState; + +#define TYPE_XIANGSHAN_KMH_MACHINE MACHINE_TYPE_NAME("xiangshan-kunminghu") +DECLARE_INSTANCE_CHECKER(XiangshanKmhState, XIANGSHAN_KMH_MACHINE, + TYPE_XIANGSHAN_KMH_MACHINE) + +enum { + XIANGSHAN_KMH_ROM, + XIANGSHAN_KMH_UART0, + XIANGSHAN_KMH_CLINT, + XIANGSHAN_KMH_APLIC_M, + XIANGSHAN_KMH_APLIC_S, + XIANGSHAN_KMH_IMSIC_M, + XIANGSHAN_KMH_IMSIC_S, + XIANGSHAN_KMH_DRAM, +}; + +enum { + XIANGSHAN_KMH_UART0_IRQ = 10, +}; + +/* Indicating Timebase-freq (1MHZ) */ +#define XIANGSHAN_KMH_CLINT_TIMEBASE_FREQ 1000000 + +#define XIANGSHAN_KMH_IMSIC_NUM_IDS 255 +#define XIANGSHAN_KMH_IMSIC_NUM_GUESTS 7 +#define XIANGSHAN_KMH_IMSIC_GUEST_BITS 3 + +#define XIANGSHAN_KMH_APLIC_NUM_SOURCES 96 + +#endif diff --git a/hw/riscv/xiangshan_kmh.c b/hw/riscv/xiangshan_kmh.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/hw/riscv/xiangshan_kmh.c @@ -XXX,XX +XXX,XX @@ +/* + * QEMU RISC-V Board Compatible with the Xiangshan Kunminghu + * FPGA prototype platform + * + * Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Provides a board compatible with the Xiangshan Kunminghu + * FPGA prototype platform: + * + * 0) UART (16550A) + * 1) CLINT (Core-Local Interruptor) + * 2) IMSIC (Incoming MSI Controller) + * 3) APLIC (Advanced Platform-Level Interrupt Controller) + * + * More information can be found in our Github repository: + * https://github.com/OpenXiangShan/XiangShan + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "system/address-spaces.h" +#include "hw/boards.h" +#include "hw/char/serial-mm.h" +#include "hw/intc/riscv_aclint.h" +#include "hw/intc/riscv_aplic.h" +#include "hw/intc/riscv_imsic.h" +#include "hw/qdev-properties.h" +#include "hw/riscv/boot.h" +#include "hw/riscv/xiangshan_kmh.h" +#include "hw/riscv/riscv_hart.h" +#include "system/system.h" + +static const MemMapEntry xiangshan_kmh_memmap[] = { + [XIANGSHAN_KMH_ROM] = { 0x1000, 0xF000 }, + [XIANGSHAN_KMH_UART0] = { 0x310B0000, 0x10000 }, + [XIANGSHAN_KMH_CLINT] = { 0x38000000, 0x10000 }, + [XIANGSHAN_KMH_APLIC_M] = { 0x31100000, 0x4000 }, + [XIANGSHAN_KMH_APLIC_S] = { 0x31120000, 0x4000 }, + [XIANGSHAN_KMH_IMSIC_M] = { 0x3A800000, 0x10000 }, + [XIANGSHAN_KMH_IMSIC_S] = { 0x3B000000, 0x80000 }, + [XIANGSHAN_KMH_DRAM] = { 0x80000000, 0x0 }, +}; + +static DeviceState *xiangshan_kmh_create_aia(uint32_t num_harts) +{ + int i; + const MemMapEntry *memmap = xiangshan_kmh_memmap; + hwaddr addr = 0; + DeviceState *aplic_m = NULL; + + /* M-level IMSICs */ + addr = memmap[XIANGSHAN_KMH_IMSIC_M].base; + for (i = 0; i < num_harts; i++) { + riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), i, true, + 1, XIANGSHAN_KMH_IMSIC_NUM_IDS); + } + + /* S-level IMSICs */ + addr = memmap[XIANGSHAN_KMH_IMSIC_S].base; + for (i = 0; i < num_harts; i++) { + riscv_imsic_create(addr + + i * IMSIC_HART_SIZE(XIANGSHAN_KMH_IMSIC_GUEST_BITS), + i, false, 1 + XIANGSHAN_KMH_IMSIC_GUEST_BITS, + XIANGSHAN_KMH_IMSIC_NUM_IDS); + } + + /* M-level APLIC */ + aplic_m = riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_M].base, + memmap[XIANGSHAN_KMH_APLIC_M].size, + 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES, + 1, true, true, NULL); + + /* S-level APLIC */ + riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_S].base, + memmap[XIANGSHAN_KMH_APLIC_S].size, + 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES, + 1, true, false, aplic_m); + + return aplic_m; +} + +static void xiangshan_kmh_soc_realize(DeviceState *dev, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(dev); + const MemMapEntry *memmap = xiangshan_kmh_memmap; + MemoryRegion *system_memory = get_system_memory(); + uint32_t num_harts = ms->smp.cpus; + + qdev_prop_set_uint32(DEVICE(&s->cpus), "num-harts", num_harts); + qdev_prop_set_uint32(DEVICE(&s->cpus), "hartid-base", 0); + qdev_prop_set_string(DEVICE(&s->cpus), "cpu-type", + TYPE_RISCV_CPU_XIANGSHAN_KMH); + sysbus_realize(SYS_BUS_DEVICE(&s->cpus), &error_fatal); + + /* AIA */ + s->irqchip = xiangshan_kmh_create_aia(num_harts); + + /* UART */ + serial_mm_init(system_memory, memmap[XIANGSHAN_KMH_UART0].base, 2, + qdev_get_gpio_in(s->irqchip, XIANGSHAN_KMH_UART0_IRQ), + 115200, serial_hd(0), DEVICE_LITTLE_ENDIAN); + + /* CLINT */ + riscv_aclint_swi_create(memmap[XIANGSHAN_KMH_CLINT].base, + 0, num_harts, false); + riscv_aclint_mtimer_create(memmap[XIANGSHAN_KMH_CLINT].base + + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, + 0, num_harts, RISCV_ACLINT_DEFAULT_MTIMECMP, + RISCV_ACLINT_DEFAULT_MTIME, + XIANGSHAN_KMH_CLINT_TIMEBASE_FREQ, true); + + /* ROM */ + memory_region_init_rom(&s->rom, OBJECT(dev), "xiangshan.kunminghu.rom", + memmap[XIANGSHAN_KMH_ROM].size, &error_fatal); + memory_region_add_subregion(system_memory, + memmap[XIANGSHAN_KMH_ROM].base, &s->rom); +} + +static void xiangshan_kmh_soc_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = xiangshan_kmh_soc_realize; + dc->user_creatable = false; +} + +static void xiangshan_kmh_soc_instance_init(Object *obj) +{ + XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(obj); + + object_initialize_child(obj, "cpus", &s->cpus, TYPE_RISCV_HART_ARRAY); +} + +static const TypeInfo xiangshan_kmh_soc_info = { + .name = TYPE_XIANGSHAN_KMH_SOC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(XiangshanKmhSoCState), + .instance_init = xiangshan_kmh_soc_instance_init, + .class_init = xiangshan_kmh_soc_class_init, +}; + +static void xiangshan_kmh_soc_register_types(void) +{ + type_register_static(&xiangshan_kmh_soc_info); +} +type_init(xiangshan_kmh_soc_register_types) + +static void xiangshan_kmh_machine_init(MachineState *machine) +{ + XiangshanKmhState *s = XIANGSHAN_KMH_MACHINE(machine); + const MemMapEntry *memmap = xiangshan_kmh_memmap; + MemoryRegion *system_memory = get_system_memory(); + hwaddr start_addr = memmap[XIANGSHAN_KMH_DRAM].base; + + /* Initialize SoC */ + object_initialize_child(OBJECT(machine), "soc", &s->soc, + TYPE_XIANGSHAN_KMH_SOC); + qdev_realize(DEVICE(&s->soc), NULL, &error_fatal); + + /* Register RAM */ + memory_region_add_subregion(system_memory, + memmap[XIANGSHAN_KMH_DRAM].base, + machine->ram); + + /* ROM reset vector */ + riscv_setup_rom_reset_vec(machine, &s->soc.cpus, + start_addr, + memmap[XIANGSHAN_KMH_ROM].base, + memmap[XIANGSHAN_KMH_ROM].size, 0, 0); + if (machine->firmware) { + riscv_load_firmware(machine->firmware, &start_addr, NULL); + } + + /* Note: dtb has been integrated into firmware(OpenSBI) when compiling */ +} + +static void xiangshan_kmh_machine_class_init(ObjectClass *klass, const void *data) +{ + MachineClass *mc = MACHINE_CLASS(klass); + static const char *const valid_cpu_types[] = { + TYPE_RISCV_CPU_XIANGSHAN_KMH, + NULL + }; + + mc->desc = "RISC-V Board compatible with the Xiangshan " \ + "Kunminghu FPGA prototype platform"; + mc->init = xiangshan_kmh_machine_init; + mc->max_cpus = XIANGSHAN_KMH_MAX_CPUS; + mc->default_cpu_type = TYPE_RISCV_CPU_XIANGSHAN_KMH; + mc->valid_cpu_types = valid_cpu_types; + mc->default_ram_id = "xiangshan.kunminghu.ram"; +} + +static const TypeInfo xiangshan_kmh_machine_info = { + .name = TYPE_XIANGSHAN_KMH_MACHINE, + .parent = TYPE_MACHINE, + .instance_size = sizeof(XiangshanKmhState), + .class_init = xiangshan_kmh_machine_class_init, +}; + +static void xiangshan_kmh_machine_register_types(void) +{ + type_register_static(&xiangshan_kmh_machine_info); +} +type_init(xiangshan_kmh_machine_register_types) diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -XXX,XX +XXX,XX @@ config SPIKE select HTIF select RISCV_ACLINT select SIFIVE_PLIC + +config XIANGSHAN_KUNMINGHU + bool + default y + depends on RISCV64 + select RISCV_ACLINT + select RISCV_APLIC + select RISCV_IMSIC + select SERIAL_MM diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/meson.build +++ b/hw/riscv/meson.build @@ -XXX,XX +XXX,XX @@ riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c')) riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files( 'riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c', 'riscv-iommu-hpm.c')) riscv_ss.add(when: 'CONFIG_MICROBLAZE_V', if_true: files('microblaze-v-generic.c')) +riscv_ss.add(when: 'CONFIG_XIANGSHAN_KUNMINGHU', if_true: files('xiangshan_kmh.c')) hw_arch += {'riscv': riscv_ss} -- 2.50.0
From: Max Chou <max.chou@sifive.com> According to the V spec, the vector fault-only-first load instructions may change the VL CSR. So the ldff_trans TCG translation function should generate the lookup_and_goto_ptr flow as the vsetvl/vsetvli translation function to make sure the vl_eq_vlmax TB flag is correct. Signed-off-by: Max Chou <max.chou@sifive.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-ID: <20250627133013.443997-1-max.chou@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/insn_trans/trans_rvv.c.inc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, fn(dest, mask, base, tcg_env, desc); finalize_rvv_inst(s); + + /* vector unit-stride fault-only-first load may modify vl CSR */ + gen_update_pc(s, s->cur_insn_len); + lookup_and_goto_ptr(s); + s->base.is_jmp = DISAS_NORETURN; + return true; } -- 2.50.0
From: "liu.xuemei1@zte.com.cn" <liu.xuemei1@zte.com.cn> Address an error in migration when aia is configured as 'aplic-imsic' in riscv kvm vm by adding riscv_aplic_state_needed() and riscv_imsic_state_needed() to determine whether the corresponding sates are needed. Previously, the fields in the vmsds of 'riscv_aplic' and 'riscv_imsic' can only be initialized under certain special conditions in commit 95a97b3fd2. However, the corresponding ses of these vmsds are inserted into the savevm_state.handlers unconditionally. This led to migration failure characterized by uninitialized fields when save vm state: qemu-system-riscv64: ../migration/vmstate.c:433: vmstate_save_state_v: Assertion 'first_elem || !n_elems || !size' failed. Fixes: 95a97b3fd2 ("target/riscv: update APLIC and IMSIC to support KVM AIA") Signed-off-by: Xuemei Liu <liu.xuemei1@zte.com.cn> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250616150034827wuHs_ffe3Qm8cqFXT7HeW@zte.com.cn> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/intc/riscv_aplic.c | 12 ++++++++++-- hw/intc/riscv_imsic.c | 10 ++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index XXXXXXX..XXXXXXX 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -XXX,XX +XXX,XX @@ static const Property riscv_aplic_properties[] = { DEFINE_PROP_BOOL("mmode", RISCVAPLICState, mmode, 0), }; +static bool riscv_aplic_state_needed(void *opaque) +{ + RISCVAPLICState *aplic = opaque; + + return riscv_use_emulated_aplic(aplic->msimode); +} + static const VMStateDescription vmstate_riscv_aplic = { .name = "riscv_aplic", - .version_id = 2, - .minimum_version_id = 2, + .version_id = 3, + .minimum_version_id = 3, + .needed = riscv_aplic_state_needed, .fields = (const VMStateField[]) { VMSTATE_UINT32(domaincfg, RISCVAPLICState), VMSTATE_UINT32(mmsicfgaddr, RISCVAPLICState), diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c index XXXXXXX..XXXXXXX 100644 --- a/hw/intc/riscv_imsic.c +++ b/hw/intc/riscv_imsic.c @@ -XXX,XX +XXX,XX @@ static const Property riscv_imsic_properties[] = { DEFINE_PROP_UINT32("num-irqs", RISCVIMSICState, num_irqs, 0), }; +static bool riscv_imsic_state_needed(void *opaque) +{ + return !kvm_irqchip_in_kernel(); +} + static const VMStateDescription vmstate_riscv_imsic = { .name = "riscv_imsic", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, + .needed = riscv_imsic_state_needed, .fields = (const VMStateField[]) { VMSTATE_VARRAY_UINT32(eidelivery, RISCVIMSICState, num_pages, 0, -- 2.50.0
From: Charalampos Mitrodimas <charmitro@posteo.net> According to the RISC-V Privileged Architecture specification, the low bit of MEPC/SEPC must always be zero. When IALIGN=32, the two low bits must be zero. This commit fixes the behavior of MEPC/SEPC CSR reads and writes, and the implicit reads by MRET/SRET instructions to properly mask the lowest bit(s) based on whether the C extension is enabled: - When C extension is enabled (IALIGN=16): mask bit 0 - When C extension is disabled (IALIGN=32): mask bits [1:0] Previously, when vectored mode bits from STVEC (which sets bit 0 for vectored mode) were written to MEPC, the bits would not be cleared correctly, causing incorrect behavior on MRET. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2855 Signed-off-by: Charalampos Mitrodimas <charmitro@posteo.net> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Message-ID: <20250703182157.281320-2-charmitro@posteo.net> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/internals.h | 11 +++++++++++ target/riscv/csr.c | 8 ++++---- target/riscv/op_helper.c | 4 ++-- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/target/riscv/internals.h b/target/riscv/internals.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -XXX,XX +XXX,XX @@ static inline float16 check_nanbox_bf16(CPURISCVState *env, uint64_t f) } } +static inline target_ulong get_xepc_mask(CPURISCVState *env) +{ + /* When IALIGN=32, both low bits must be zero. + * When IALIGN=16 (has C extension), only bit 0 must be zero. */ + if (riscv_has_ext(env, RVC)) { + return ~(target_ulong)1; + } else { + return ~(target_ulong)3; + } +} + #ifndef CONFIG_USER_ONLY /* Our implementation of SysemuCPUOps::has_work */ bool riscv_cpu_has_work(CPUState *cs); diff --git a/target/riscv/csr.c b/target/riscv/csr.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -XXX,XX +XXX,XX @@ static RISCVException write_mscratch(CPURISCVState *env, int csrno, static RISCVException read_mepc(CPURISCVState *env, int csrno, target_ulong *val) { - *val = env->mepc; + *val = env->mepc & get_xepc_mask(env); return RISCV_EXCP_NONE; } static RISCVException write_mepc(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { - env->mepc = val; + env->mepc = val & get_xepc_mask(env); return RISCV_EXCP_NONE; } @@ -XXX,XX +XXX,XX @@ static RISCVException write_sscratch(CPURISCVState *env, int csrno, static RISCVException read_sepc(CPURISCVState *env, int csrno, target_ulong *val) { - *val = env->sepc; + *val = env->sepc & get_xepc_mask(env); return RISCV_EXCP_NONE; } static RISCVException write_sepc(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { - env->sepc = val; + env->sepc = val & get_xepc_mask(env); return RISCV_EXCP_NONE; } diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/op_helper.c +++ b/target/riscv/op_helper.c @@ -XXX,XX +XXX,XX @@ target_ulong helper_sret(CPURISCVState *env) riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); } - target_ulong retpc = env->sepc; + target_ulong retpc = env->sepc & get_xepc_mask(env); if (!riscv_cpu_allow_16bit_insn(&env_archcpu(env)->cfg, env->priv_ver, env->misa_ext) && (retpc & 0x3)) { @@ -XXX,XX +XXX,XX @@ static target_ulong ssdbltrp_mxret(CPURISCVState *env, target_ulong mstatus, target_ulong helper_mret(CPURISCVState *env) { - target_ulong retpc = env->mepc; + target_ulong retpc = env->mepc & get_xepc_mask(env); uint64_t mstatus = env->mstatus; target_ulong prev_priv = get_field(mstatus, MSTATUS_MPP); -- 2.50.0
From: Charalampos Mitrodimas <charmitro@posteo.net> Add a regression test to verify that MEPC properly masks the lower bits when an address with mode bits is written to it, as required by the RISC-V Privileged Architecture specification. The test sets STVEC to an address with bit 0 set (vectored mode), triggers an illegal instruction exception, copies STVEC to MEPC in the trap handler, and verifies that MEPC masks bits [1:0] correctly for IALIGN=32. Without the fix, MEPC retains the mode bits (returns non-zero/FAIL). With the fix, MEPC clears bits [1:0] (returns 0/PASS). Signed-off-by: Charalampos Mitrodimas <charmitro@posteo.net> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Message-ID: <20250703182157.281320-3-charmitro@posteo.net> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- tests/tcg/riscv64/Makefile.softmmu-target | 4 ++ tests/tcg/riscv64/test-mepc-masking.S | 73 +++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 tests/tcg/riscv64/test-mepc-masking.S diff --git a/tests/tcg/riscv64/Makefile.softmmu-target b/tests/tcg/riscv64/Makefile.softmmu-target index XXXXXXX..XXXXXXX 100644 --- a/tests/tcg/riscv64/Makefile.softmmu-target +++ b/tests/tcg/riscv64/Makefile.softmmu-target @@ -XXX,XX +XXX,XX @@ EXTRA_RUNS += run-issue1060 run-issue1060: issue1060 $(call run-test, $<, $(QEMU) $(QEMU_OPTS)$<) +EXTRA_RUNS += run-test-mepc-masking +run-test-mepc-masking: test-mepc-masking + $(call run-test, $<, $(QEMU) $(QEMU_OPTS)$<) + # We don't currently support the multiarch system tests undefine MULTIARCH_TESTS diff --git a/tests/tcg/riscv64/test-mepc-masking.S b/tests/tcg/riscv64/test-mepc-masking.S new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/tests/tcg/riscv64/test-mepc-masking.S @@ -XXX,XX +XXX,XX @@ +/* + * Test for MEPC masking bug fix + * + * This test verifies that MEPC properly masks the lower bits according + * to the RISC-V specification when vectored mode bits from STVEC are + * written to MEPC. + */ + + .option norvc + + .text + .global _start +_start: + /* Set up machine trap vector */ + lla t0, machine_trap_handler + csrw mtvec, t0 + + /* Set STVEC with vectored mode (mode bits = 01) */ + li t0, 0x80004001 + csrw stvec, t0 + + /* Clear medeleg to handle exceptions in M-mode */ + csrw medeleg, zero + + /* Trigger illegal instruction exception */ + .word 0xffffffff + +test_completed: + /* Exit with result in a0 */ + /* a0 = 0: success (bits [1:0] were masked) */ + /* a0 != 0: failure (some bits were not masked) */ + j _exit + +machine_trap_handler: + /* Check if illegal instruction (mcause = 2) */ + csrr t0, mcause + li t1, 2 + bne t0, t1, skip_test + + /* Test: Copy STVEC (with mode bits) to MEPC */ + csrr t0, stvec /* t0 = 0x80004001 */ + csrw mepc, t0 /* Write to MEPC */ + csrr t1, mepc /* Read back MEPC */ + + /* Check if bits [1:0] are masked (IALIGN=32 without RVC) */ + andi a0, t1, 3 /* a0 = 0 if both bits masked correctly */ + + /* Set correct return address */ + lla t0, test_completed + csrw mepc, t0 + +skip_test: + mret + +/* Exit with semihosting */ +_exit: + lla a1, semiargs + li t0, 0x20026 /* ADP_Stopped_ApplicationExit */ + sd t0, 0(a1) + sd a0, 8(a1) + li a0, 0x20 /* TARGET_SYS_EXIT_EXTENDED */ + + /* Semihosting call sequence */ + .balign 16 + slli zero, zero, 0x1f + ebreak + srai zero, zero, 0x7 + j . + + .data + .balign 8 +semiargs: + .space 16 -- 2.50.0
From: Vasilis Liaskovitis <vliaskovitis@suse.com> Usage of vsetvli instruction is reserved if VLMAX is changed when vsetvli rs1 and rd arguments are x0. In this case, if the new property is true, only the vill bit will be set. See https://github.com/riscv/riscv-isa-manual/blob/main/src/v-st-ext.adoc#avl-encoding According to the spec, the above use cases are reserved, and "Implementations may set vill in either case." Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2422 Signed-off-by: Vasilis Liaskovitis <vliaskovitis@suse.com> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Message-ID: <20250618213542.22873-1-vliaskovitis@suse.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 2 +- target/riscv/cpu_cfg_fields.h.inc | 1 + target/riscv/cpu.c | 1 + target/riscv/vector_helper.c | 12 +++++++++++- target/riscv/insn_trans/trans_rvv.c.inc | 4 ++-- 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(hyp_hsv_d, TCG_CALL_NO_WG, void, env, tl, tl) #endif /* Vector functions */ -DEF_HELPER_3(vsetvl, tl, env, tl, tl) +DEF_HELPER_4(vsetvl, tl, env, tl, tl, tl) DEF_HELPER_5(vle8_v, void, ptr, ptr, tl, env, i32) DEF_HELPER_5(vle16_v, void, ptr, ptr, tl, env, i32) DEF_HELPER_5(vle32_v, void, ptr, ptr, tl, env, i32) diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_cfg_fields.h.inc +++ b/target/riscv/cpu_cfg_fields.h.inc @@ -XXX,XX +XXX,XX @@ BOOL_FIELD(ext_supm) BOOL_FIELD(rvv_ta_all_1s) BOOL_FIELD(rvv_ma_all_1s) BOOL_FIELD(rvv_vl_half_avl) +BOOL_FIELD(rvv_vsetvl_x0_vill) /* Named features */ BOOL_FIELD(ext_svade) BOOL_FIELD(ext_zic64b) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ static const Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false), DEFINE_PROP_BOOL("rvv_ma_all_1s", RISCVCPU, cfg.rvv_ma_all_1s, false), DEFINE_PROP_BOOL("rvv_vl_half_avl", RISCVCPU, cfg.rvv_vl_half_avl, false), + DEFINE_PROP_BOOL("rvv_vsetvl_x0_vill", RISCVCPU, cfg.rvv_vsetvl_x0_vill, false), /* * write_misa() is marked as experimental for now so mark diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ #include <math.h> target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, - target_ulong s2) + target_ulong s2, target_ulong x0) { int vlmax, vl; RISCVCPU *cpu = env_archcpu(env); @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, } else { vl = vlmax; } + + if (cpu->cfg.rvv_vsetvl_x0_vill && x0 && (env->vl != vl)) { + /* only set vill bit. */ + env->vill = 1; + env->vtype = 0; + env->vl = 0; + env->vstart = 0; + return 0; + } + env->vl = vl; env->vtype = s2; env->vstart = 0; diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2) s1 = get_gpr(s, rs1, EXT_ZERO); } - gen_helper_vsetvl(dst, tcg_env, s1, s2); + gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl((int) (rd == 0 && rs1 == 0))); gen_set_gpr(s, rd, dst); finalize_rvv_inst(s); @@ -XXX,XX +XXX,XX @@ static bool do_vsetivli(DisasContext *s, int rd, TCGv s1, TCGv s2) dst = dest_gpr(s, rd); - gen_helper_vsetvl(dst, tcg_env, s1, s2); + gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl(0)); gen_set_gpr(s, rd, dst); finalize_rvv_inst(s); gen_update_pc(s, s->cur_insn_len); -- 2.50.0
From: Alexandre Ghiti <alexghiti@rivosinc.com> The Svrsw60t59b extension allows to free the PTE reserved bits 60 and 59 for software to use. Reviewed-by: Deepak Gupta <debug@rivosinc.com> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Nutty Liu<liujingqi@lanxincomputing.com> Message-ID: <20250702-dev-alex-svrsw60b59b_v2-v2-1-504ddf0f8530@rivosinc.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/riscv-iommu-bits.h | 1 + target/riscv/cpu_bits.h | 3 ++- target/riscv/cpu_cfg_fields.h.inc | 1 + hw/riscv/riscv-iommu.c | 3 ++- target/riscv/cpu.c | 2 ++ target/riscv/cpu_helper.c | 3 ++- target/riscv/tcg/tcg-cpu.c | 8 ++++++++ 7 files changed, 18 insertions(+), 3 deletions(-) diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/riscv-iommu-bits.h +++ b/hw/riscv/riscv-iommu-bits.h @@ -XXX,XX +XXX,XX @@ struct riscv_iommu_pq_record { #define RISCV_IOMMU_CAP_SV39 BIT_ULL(9) #define RISCV_IOMMU_CAP_SV48 BIT_ULL(10) #define RISCV_IOMMU_CAP_SV57 BIT_ULL(11) +#define RISCV_IOMMU_CAP_SVRSW60T59B BIT_ULL(14) #define RISCV_IOMMU_CAP_SV32X4 BIT_ULL(16) #define RISCV_IOMMU_CAP_SV39X4 BIT_ULL(17) #define RISCV_IOMMU_CAP_SV48X4 BIT_ULL(18) diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -XXX,XX +XXX,XX @@ typedef enum { #define PTE_SOFT 0x300 /* Reserved for Software */ #define PTE_PBMT 0x6000000000000000ULL /* Page-based memory types */ #define PTE_N 0x8000000000000000ULL /* NAPOT translation */ -#define PTE_RESERVED 0x1FC0000000000000ULL /* Reserved bits */ +#define PTE_RESERVED(svrsw60t59b) \ + (svrsw60t59b ? 0x07C0000000000000ULL : 0x1FC0000000000000ULL) /* Reserved bits */ #define PTE_ATTR (PTE_N | PTE_PBMT) /* All attributes bits */ /* Page table PPN shift amount */ diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_cfg_fields.h.inc +++ b/target/riscv/cpu_cfg_fields.h.inc @@ -XXX,XX +XXX,XX @@ BOOL_FIELD(ext_svadu) BOOL_FIELD(ext_svinval) BOOL_FIELD(ext_svnapot) BOOL_FIELD(ext_svpbmt) +BOOL_FIELD(ext_svrsw60t59b) BOOL_FIELD(ext_svvptc) BOOL_FIELD(ext_svukte) BOOL_FIELD(ext_zdinx) diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/riscv-iommu.c +++ b/hw/riscv/riscv-iommu.c @@ -XXX,XX +XXX,XX @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp) } if (s->enable_g_stage) { s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | - RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; + RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4 | + RISCV_IOMMU_CAP_SVRSW60T59B; } if (s->hpm_cntrs > 0) { diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(svinval, PRIV_VERSION_1_12_0, ext_svinval), ISA_EXT_DATA_ENTRY(svnapot, PRIV_VERSION_1_12_0, ext_svnapot), ISA_EXT_DATA_ENTRY(svpbmt, PRIV_VERSION_1_12_0, ext_svpbmt), + ISA_EXT_DATA_ENTRY(svrsw60t59b, PRIV_VERSION_1_13_0, ext_svrsw60t59b), ISA_EXT_DATA_ENTRY(svukte, PRIV_VERSION_1_13_0, ext_svukte), ISA_EXT_DATA_ENTRY(svvptc, PRIV_VERSION_1_13_0, ext_svvptc), ISA_EXT_DATA_ENTRY(xtheadba, PRIV_VERSION_1_11_0, ext_xtheadba), @@ -XXX,XX +XXX,XX @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = { MULTI_EXT_CFG_BOOL("svinval", ext_svinval, false), MULTI_EXT_CFG_BOOL("svnapot", ext_svnapot, false), MULTI_EXT_CFG_BOOL("svpbmt", ext_svpbmt, false), + MULTI_EXT_CFG_BOOL("svrsw60t59b", ext_svrsw60t59b, false), MULTI_EXT_CFG_BOOL("svvptc", ext_svvptc, true), MULTI_EXT_CFG_BOOL("zicntr", ext_zicntr, true), diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -XXX,XX +XXX,XX @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, bool svade = riscv_cpu_cfg(env)->ext_svade; bool svadu = riscv_cpu_cfg(env)->ext_svadu; bool adue = svadu ? env->menvcfg & MENVCFG_ADUE : !svade; + bool svrsw60t59b = riscv_cpu_cfg(env)->ext_svrsw60t59b; if (first_stage && two_stage && env->virt_enabled) { pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE); @@ -XXX,XX +XXX,XX @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, if (riscv_cpu_sxl(env) == MXL_RV32) { ppn = pte >> PTE_PPN_SHIFT; } else { - if (pte & PTE_RESERVED) { + if (pte & PTE_RESERVED(svrsw60t59b)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: reserved bits set in PTE: " "addr: 0x%" HWADDR_PRIx " pte: 0x" TARGET_FMT_lx "\n", __func__, pte_addr, pte); diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) cpu->cfg.ext_ssctr = false; } + if (cpu->cfg.ext_svrsw60t59b && + (!cpu->cfg.mmu || mcc->def->misa_mxl_max == MXL_RV32)) { + error_setg(errp, "svrsw60t59b is not supported on RV32 and MMU-less platforms"); + return; + } + /* * Disable isa extensions based on priv spec after we * validated and set everything we need. @@ -XXX,XX +XXX,XX @@ static void riscv_init_max_cpu_extensions(Object *obj) if (env->misa_mxl != MXL_RV32) { isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_zcf), false); + } else { + isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_svrsw60t59b), false); } /* -- 2.50.0