:p
atchew
Login
The following changes since commit 553cf5d7c47bee05a3dec9461c1f8430316d516b: Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200626' into staging (2020-06-26 18:22:36 +0100) are available in the Git repository at: git@github.com:alistair23/qemu.git tags/pull-riscv-to-apply-20200626-1 for you to fetch changes up to b39d59434ea10649fdb9e0a339c30c76e38c5e17: target/riscv: configure and turn on vector extension from command line (2020-06-26 14:22:15 -0700) ---------------------------------------------------------------- This PR contains two patches to improve PLIC support in QEMU. The rest of the PR is adding support for the v0.7.1 RISC-V vector extensions. This is experimental support as the vector extensions are still in a draft state. ---------------------------------------------------------------- Jessica Clarke (2): riscv: plic: Honour source priorities riscv: plic: Add a couple of mising sifive_plic_update calls LIU Zhiwei (61): target/riscv: add vector extension field in CPURISCVState target/riscv: implementation-defined constant parameters target/riscv: support vector extension csr target/riscv: add vector configure instruction target/riscv: add an internals.h header target/riscv: add vector stride load and store instructions target/riscv: add vector index load and store instructions target/riscv: add fault-only-first unit stride load target/riscv: add vector amo operations target/riscv: vector single-width integer add and subtract target/riscv: vector widening integer add and subtract target/riscv: vector integer add-with-carry / subtract-with-borrow instructions target/riscv: vector bitwise logical instructions target/riscv: vector single-width bit shift instructions target/riscv: vector narrowing integer right shift instructions target/riscv: vector integer comparison instructions target/riscv: vector integer min/max instructions target/riscv: vector single-width integer multiply instructions target/riscv: vector integer divide instructions target/riscv: vector widening integer multiply instructions target/riscv: vector single-width integer multiply-add instructions target/riscv: vector widening integer multiply-add instructions target/riscv: vector integer merge and move instructions target/riscv: vector single-width saturating add and subtract target/riscv: vector single-width averaging add and subtract target/riscv: vector single-width fractional multiply with rounding and saturation target/riscv: vector widening saturating scaled multiply-add target/riscv: vector single-width scaling shift instructions target/riscv: vector narrowing fixed-point clip instructions target/riscv: vector single-width floating-point add/subtract instructions target/riscv: vector widening floating-point add/subtract instructions target/riscv: vector single-width floating-point multiply/divide instructions target/riscv: vector widening floating-point multiply target/riscv: vector single-width floating-point fused multiply-add instructions target/riscv: vector widening floating-point fused multiply-add instructions target/riscv: vector floating-point square-root instruction target/riscv: vector floating-point min/max instructions target/riscv: vector floating-point sign-injection instructions target/riscv: vector floating-point compare instructions target/riscv: vector floating-point classify instructions target/riscv: vector floating-point merge instructions target/riscv: vector floating-point/integer type-convert instructions target/riscv: widening floating-point/integer type-convert instructions target/riscv: narrowing floating-point/integer type-convert instructions target/riscv: vector single-width integer reduction instructions target/riscv: vector wideing integer reduction instructions target/riscv: vector single-width floating-point reduction instructions target/riscv: vector widening floating-point reduction instructions target/riscv: vector mask-register logical instructions target/riscv: vector mask population count vmpopc target/riscv: vmfirst find-first-set mask bit target/riscv: set-X-first mask bit target/riscv: vector iota instruction target/riscv: vector element index instruction target/riscv: integer extract instruction target/riscv: integer scalar move instruction target/riscv: floating-point scalar move instructions target/riscv: vector slide instructions target/riscv: vector register gather instruction target/riscv: vector compress instruction target/riscv: configure and turn on vector extension from command line target/riscv/cpu.h | 82 +- target/riscv/cpu_bits.h | 15 + target/riscv/helper.h | 1069 +++++++ target/riscv/internals.h | 41 + target/riscv/insn32-64.decode | 11 + target/riscv/insn32.decode | 372 +++ hw/riscv/sifive_plic.c | 20 +- target/riscv/cpu.c | 50 + target/riscv/csr.c | 75 +- target/riscv/fpu_helper.c | 33 +- target/riscv/insn_trans/trans_rvv.inc.c | 2888 ++++++++++++++++++ target/riscv/translate.c | 27 +- target/riscv/vector_helper.c | 4899 +++++++++++++++++++++++++++++++ target/riscv/Makefile.objs | 2 +- 14 files changed, 9534 insertions(+), 50 deletions(-) create mode 100644 target/riscv/internals.h create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c create mode 100644 target/riscv/vector_helper.c
From: Jessica Clarke <jrtc27@jrtc27.com> The source priorities can be used to order sources with respect to other sources, not just as a way to enable/disable them based off a threshold. We must therefore always claim the highest-priority source, rather than the first source we find. Signed-off-by: Jessica Clarke <jrtc27@jrtc27.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <20200618202343.20455-1-jrtc27@jrtc27.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/sifive_plic.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/hw/riscv/sifive_plic.c b/hw/riscv/sifive_plic.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/sifive_plic.c +++ b/hw/riscv/sifive_plic.c @@ -XXX,XX +XXX,XX @@ static void sifive_plic_update(SiFivePLICState *plic) static uint32_t sifive_plic_claim(SiFivePLICState *plic, uint32_t addrid) { int i, j; + uint32_t max_irq = 0; + uint32_t max_prio = plic->target_priority[addrid]; + for (i = 0; i < plic->bitfield_words; i++) { uint32_t pending_enabled_not_claimed = (plic->pending[i] & ~plic->claimed[i]) & @@ -XXX,XX +XXX,XX @@ static uint32_t sifive_plic_claim(SiFivePLICState *plic, uint32_t addrid) int irq = (i << 5) + j; uint32_t prio = plic->source_priority[irq]; int enabled = pending_enabled_not_claimed & (1 << j); - if (enabled && prio > plic->target_priority[addrid]) { - sifive_plic_set_pending(plic, irq, false); - sifive_plic_set_claimed(plic, irq, true); - return irq; + if (enabled && prio > max_prio) { + max_irq = irq; + max_prio = prio; } } } - return 0; + + if (max_irq) { + sifive_plic_set_pending(plic, max_irq, false); + sifive_plic_set_claimed(plic, max_irq, true); + } + return max_irq; } static uint64_t sifive_plic_read(void *opaque, hwaddr addr, unsigned size) -- 2.27.0
From: Jessica Clarke <jrtc27@jrtc27.com> Claiming an interrupt and changing the source priority both potentially affect whether an interrupt is pending, thus we must re-compute xEIP. Note that we don't put the sifive_plic_update inside sifive_plic_claim so that the logging of a claim (and the resulting IRQ) happens before the state update, making the causal effect clear, and that we drop the explicit call to sifive_plic_print_state when claiming since sifive_plic_update already does that automatically at the end for us. This can result in both spurious interrupt storms if you fail to complete an IRQ before enabling interrupts (and no other actions occur that result in a call to sifive_plic_update), but also more importantly lost interrupts if a disabled interrupt is pending and then becomes enabled. Signed-off-by: Jessica Clarke <jrtc27@jrtc27.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200618210649.22451-1-jrtc27@jrtc27.com Message-Id: <20200618210649.22451-1-jrtc27@jrtc27.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/sifive_plic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/riscv/sifive_plic.c b/hw/riscv/sifive_plic.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/sifive_plic.c +++ b/hw/riscv/sifive_plic.c @@ -XXX,XX +XXX,XX @@ static uint64_t sifive_plic_read(void *opaque, hwaddr addr, unsigned size) plic->addr_config[addrid].hartid, mode_to_char(plic->addr_config[addrid].mode), value); - sifive_plic_print_state(plic); } + sifive_plic_update(plic); return value; } } @@ -XXX,XX +XXX,XX @@ static void sifive_plic_write(void *opaque, hwaddr addr, uint64_t value, qemu_log("plic: write priority: irq=%d priority=%d\n", irq, plic->source_priority[irq]); } + sifive_plic_update(plic); return; } else if (addr >= plic->pending_base && /* 1 bit per source */ addr < plic->pending_base + (plic->num_sources >> 3)) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> The 32 vector registers will be viewed as a continuous memory block. It avoids the convension between element index and (regno, offset). Thus elements can be directly accessed by offset from the first vector base address. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-2-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.h | 12 ++++++++++++ target/riscv/translate.c | 3 ++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -XXX,XX +XXX,XX @@ #define RVA RV('A') #define RVF RV('F') #define RVD RV('D') +#define RVV RV('V') #define RVC RV('C') #define RVS RV('S') #define RVU RV('U') @@ -XXX,XX +XXX,XX @@ typedef struct CPURISCVState CPURISCVState; #include "pmp.h" +#define RV_VLEN_MAX 512 + struct CPURISCVState { target_ulong gpr[32]; uint64_t fpr[32]; /* assume both F and D extensions */ + + /* vector coprocessor state. */ + uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16); + target_ulong vxrm; + target_ulong vxsat; + target_ulong vl; + target_ulong vstart; + target_ulong vtype; + target_ulong pc; target_ulong load_res; target_ulong load_val; diff --git a/target/riscv/translate.c b/target/riscv/translate.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -XXX,XX +XXX,XX @@ #include "instmap.h" /* global register indices */ -static TCGv cpu_gpr[32], cpu_pc; +static TCGv cpu_gpr[32], cpu_pc, cpu_vl; static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */ static TCGv load_res; static TCGv load_val; @@ -XXX,XX +XXX,XX @@ void riscv_translate_init(void) } cpu_pc = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, pc), "pc"); + cpu_vl = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, vl), "vl"); load_res = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_res), "load_res"); load_val = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_val), -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> vlen is the vector register length in bits. elen is the max element size in bits. vext_spec is the vector specification version, default value is v0.7.1. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-3-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.h | 5 +++++ target/riscv/cpu.c | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -XXX,XX +XXX,XX @@ enum { #define PRIV_VERSION_1_10_0 0x00011000 #define PRIV_VERSION_1_11_0 0x00011100 +#define VEXT_VERSION_0_07_1 0x00000701 + #define TRANSLATE_PMP_FAIL 2 #define TRANSLATE_FAIL 1 #define TRANSLATE_SUCCESS 0 @@ -XXX,XX +XXX,XX @@ struct CPURISCVState { target_ulong guest_phys_fault_addr; target_ulong priv_ver; + target_ulong vext_ver; target_ulong misa; target_ulong misa_mask; @@ -XXX,XX +XXX,XX @@ typedef struct RISCVCPU { char *priv_spec; char *user_spec; + uint16_t vlen; + uint16_t elen; bool mmu; bool pmp; } cfg; diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ static void set_priv_version(CPURISCVState *env, int priv_ver) env->priv_ver = priv_ver; } +static void set_vext_version(CPURISCVState *env, int vext_ver) +{ + env->vext_ver = vext_ver; +} + static void set_feature(CPURISCVState *env, int feature) { env->features |= (1ULL << feature); @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) CPURISCVState *env = &cpu->env; RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev); int priv_version = PRIV_VERSION_1_11_0; + int vext_version = VEXT_VERSION_0_07_1; target_ulong target_misa = 0; Error *local_err = NULL; @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) } set_priv_version(env, priv_version); + set_vext_version(env, vext_version); if (cpu->cfg.mmu) { set_feature(env, RISCV_FEATURE_MMU); -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> The v0.7.1 specification does not define vector status within mstatus. A future revision will define the privileged portion of the vector status. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-4-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu_bits.h | 15 +++++++++ target/riscv/csr.c | 75 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -XXX,XX +XXX,XX @@ #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA) +/* Vector Fixed-Point round model */ +#define FSR_VXRM_SHIFT 9 +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT) + +/* Vector Fixed-Point saturation flag */ +#define FSR_VXSAT_SHIFT 8 +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT) + /* Control and Status Registers */ /* User Trap Setup */ @@ -XXX,XX +XXX,XX @@ #define CSR_FRM 0x002 #define CSR_FCSR 0x003 +/* User Vector CSRs */ +#define CSR_VSTART 0x008 +#define CSR_VXSAT 0x009 +#define CSR_VXRM 0x00a +#define CSR_VL 0xc20 +#define CSR_VTYPE 0xc21 + /* User Timers and Counters */ #define CSR_CYCLE 0xc00 #define CSR_TIME 0xc01 diff --git a/target/riscv/csr.c b/target/riscv/csr.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -XXX,XX +XXX,XX @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops) static int fs(CPURISCVState *env, int csrno) { #if !defined(CONFIG_USER_ONLY) + /* loose check condition for fcsr in vector extension */ + if ((csrno == CSR_FCSR) && (env->misa & RVV)) { + return 0; + } if (!env->debugger && !riscv_cpu_fp_enabled(env)) { return -1; } @@ -XXX,XX +XXX,XX @@ static int fs(CPURISCVState *env, int csrno) return 0; } +static int vs(CPURISCVState *env, int csrno) +{ + if (env->misa & RVV) { + return 0; + } + return -1; +} + static int ctr(CPURISCVState *env, int csrno) { #if !defined(CONFIG_USER_ONLY) @@ -XXX,XX +XXX,XX @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val) #endif *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) | (env->frm << FSR_RD_SHIFT); + if (vs(env, csrno) >= 0) { + *val |= (env->vxrm << FSR_VXRM_SHIFT) + | (env->vxsat << FSR_VXSAT_SHIFT); + } return 0; } @@ -XXX,XX +XXX,XX @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val) env->mstatus |= MSTATUS_FS; #endif env->frm = (val & FSR_RD) >> FSR_RD_SHIFT; + if (vs(env, csrno) >= 0) { + env->vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT; + env->vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT; + } riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT); return 0; } +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vtype; + return 0; +} + +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vl; + return 0; +} + +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vxrm; + return 0; +} + +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val) +{ + env->vxrm = val; + return 0; +} + +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vxsat; + return 0; +} + +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val) +{ + env->vxsat = val; + return 0; +} + +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vstart; + return 0; +} + +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val) +{ + env->vstart = val; + return 0; +} + /* User Timers and Counters */ static int read_instret(CPURISCVState *env, int csrno, target_ulong *val) { @@ -XXX,XX +XXX,XX @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_FFLAGS] = { fs, read_fflags, write_fflags }, [CSR_FRM] = { fs, read_frm, write_frm }, [CSR_FCSR] = { fs, read_fcsr, write_fcsr }, - + /* Vector CSRs */ + [CSR_VSTART] = { vs, read_vstart, write_vstart }, + [CSR_VXSAT] = { vs, read_vxsat, write_vxsat }, + [CSR_VXRM] = { vs, read_vxrm, write_vxrm }, + [CSR_VL] = { vs, read_vl }, + [CSR_VTYPE] = { vs, read_vtype }, /* User Timers and Counters */ [CSR_CYCLE] = { ctr, read_instret }, [CSR_INSTRET] = { ctr, read_instret }, -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> vsetvl and vsetvli are two configure instructions for vl, vtype. TB flags should update after configure instructions. The (ill, lmul, sew ) of vtype and the bit of (VSTART == 0 && VL == VLMAX) will be placed within tb_flags. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-5-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.h | 63 +++++++++++++++++--- target/riscv/helper.h | 3 + target/riscv/insn32.decode | 5 ++ target/riscv/insn_trans/trans_rvv.inc.c | 79 +++++++++++++++++++++++++ target/riscv/translate.c | 17 +++++- target/riscv/vector_helper.c | 53 +++++++++++++++++ target/riscv/Makefile.objs | 2 +- 7 files changed, 210 insertions(+), 12 deletions(-) create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c create mode 100644 target/riscv/vector_helper.c diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -XXX,XX +XXX,XX @@ #define RISCV_CPU_H #include "hw/core/cpu.h" +#include "hw/registerfields.h" #include "exec/cpu-defs.h" #include "fpu/softfloat-types.h" @@ -XXX,XX +XXX,XX @@ typedef struct CPURISCVState CPURISCVState; #define RV_VLEN_MAX 512 +FIELD(VTYPE, VLMUL, 0, 2) +FIELD(VTYPE, VSEW, 2, 3) +FIELD(VTYPE, VEDIV, 5, 2) +FIELD(VTYPE, RESERVED, 7, sizeof(target_ulong) * 8 - 9) +FIELD(VTYPE, VILL, sizeof(target_ulong) * 8 - 2, 1) + struct CPURISCVState { target_ulong gpr[32]; uint64_t fpr[32]; /* assume both F and D extensions */ @@ -XXX,XX +XXX,XX @@ void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong); #define TB_FLAGS_MMU_MASK 3 #define TB_FLAGS_MSTATUS_FS MSTATUS_FS +typedef CPURISCVState CPUArchState; +typedef RISCVCPU ArchCPU; +#include "exec/cpu-all.h" + +FIELD(TB_FLAGS, VL_EQ_VLMAX, 2, 1) +FIELD(TB_FLAGS, LMUL, 3, 2) +FIELD(TB_FLAGS, SEW, 5, 3) +FIELD(TB_FLAGS, VILL, 8, 1) + +/* + * A simplification for VLMAX + * = (1 << LMUL) * VLEN / (8 * (1 << SEW)) + * = (VLEN << LMUL) / (8 << SEW) + * = (VLEN << LMUL) >> (SEW + 3) + * = VLEN >> (SEW + 3 - LMUL) + */ +static inline uint32_t vext_get_vlmax(RISCVCPU *cpu, target_ulong vtype) +{ + uint8_t sew, lmul; + + sew = FIELD_EX64(vtype, VTYPE, VSEW); + lmul = FIELD_EX64(vtype, VTYPE, VLMUL); + return cpu->cfg.vlen >> (sew + 3 - lmul); +} + static inline void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) + target_ulong *cs_base, uint32_t *pflags) { + uint32_t flags = 0; + *pc = env->pc; *cs_base = 0; + + if (riscv_has_ext(env, RVV)) { + uint32_t vlmax = vext_get_vlmax(env_archcpu(env), env->vtype); + bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl); + flags = FIELD_DP32(flags, TB_FLAGS, VILL, + FIELD_EX64(env->vtype, VTYPE, VILL)); + flags = FIELD_DP32(flags, TB_FLAGS, SEW, + FIELD_EX64(env->vtype, VTYPE, VSEW)); + flags = FIELD_DP32(flags, TB_FLAGS, LMUL, + FIELD_EX64(env->vtype, VTYPE, VLMUL)); + flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax); + } else { + flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1); + } + #ifdef CONFIG_USER_ONLY - *flags = TB_FLAGS_MSTATUS_FS; + flags |= TB_FLAGS_MSTATUS_FS; #else - *flags = cpu_mmu_index(env, 0); + flags |= cpu_mmu_index(env, 0); if (riscv_cpu_fp_enabled(env)) { - *flags |= env->mstatus & MSTATUS_FS; + flags |= env->mstatus & MSTATUS_FS; } #endif + *pflags = flags; } int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value, @@ -XXX,XX +XXX,XX @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops); void riscv_cpu_register_gdb_regs_for_features(CPUState *cs); -typedef CPURISCVState CPUArchState; -typedef RISCVCPU ArchCPU; - -#include "exec/cpu-all.h" - #endif /* RISCV_CPU_H */ diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_1(tlb_flush, void, env) #ifndef CONFIG_USER_ONLY DEF_HELPER_1(hyp_tlb_flush, void, env) #endif + +/* Vector functions */ +DEF_HELPER_3(vsetvl, tl, env, tl, tl) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd @r2 ....... ..... ..... ... ..... ....... %rs1 %rd +@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd @hfence_gvma ....... ..... ..... ... ..... ....... %rs2 %rs1 @hfence_vvma ....... ..... ..... ... ..... ....... %rs2 %rs1 @@ -XXX,XX +XXX,XX @@ fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm # *** RV32H Base Instruction Set *** hfence_gvma 0110001 ..... ..... 000 00000 1110011 @hfence_gvma hfence_vvma 0010001 ..... ..... 000 00000 1110011 @hfence_vvma + +# *** RV32V Extension *** +vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm +vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ +/* + * RISC-V translation routines for the RVV Standard Extension. + * + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a) +{ + TCGv s1, s2, dst; + + if (!has_ext(ctx, RVV)) { + return false; + } + + s2 = tcg_temp_new(); + dst = tcg_temp_new(); + + /* Using x0 as the rs1 register specifier, encodes an infinite AVL */ + if (a->rs1 == 0) { + /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */ + s1 = tcg_const_tl(RV_VLEN_MAX); + } else { + s1 = tcg_temp_new(); + gen_get_gpr(s1, a->rs1); + } + gen_get_gpr(s2, a->rs2); + gen_helper_vsetvl(dst, cpu_env, s1, s2); + gen_set_gpr(a->rd, dst); + tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn); + lookup_and_goto_ptr(ctx); + ctx->base.is_jmp = DISAS_NORETURN; + + tcg_temp_free(s1); + tcg_temp_free(s2); + tcg_temp_free(dst); + return true; +} + +static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a) +{ + TCGv s1, s2, dst; + + if (!has_ext(ctx, RVV)) { + return false; + } + + s2 = tcg_const_tl(a->zimm); + dst = tcg_temp_new(); + + /* Using x0 as the rs1 register specifier, encodes an infinite AVL */ + if (a->rs1 == 0) { + /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */ + s1 = tcg_const_tl(RV_VLEN_MAX); + } else { + s1 = tcg_temp_new(); + gen_get_gpr(s1, a->rs1); + } + gen_helper_vsetvl(dst, cpu_env, s1, s2); + gen_set_gpr(a->rd, dst); + gen_goto_tb(ctx, 0, ctx->pc_succ_insn); + ctx->base.is_jmp = DISAS_NORETURN; + + tcg_temp_free(s1); + tcg_temp_free(s2); + tcg_temp_free(dst); + return true; +} diff --git a/target/riscv/translate.c b/target/riscv/translate.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { to reset this known value. */ int frm; bool ext_ifencei; + /* vector extension */ + bool vill; + uint8_t lmul; + uint8_t sew; + uint16_t vlen; + bool vl_eq_vlmax; } DisasContext; #ifdef TARGET_RISCV64 @@ -XXX,XX +XXX,XX @@ static bool gen_shift(DisasContext *ctx, arg_r *a, #include "insn_trans/trans_rvf.inc.c" #include "insn_trans/trans_rvd.inc.c" #include "insn_trans/trans_rvh.inc.c" +#include "insn_trans/trans_rvv.inc.c" #include "insn_trans/trans_privileged.inc.c" /* Include the auto-generated decoder for 16 bit insn */ @@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) DisasContext *ctx = container_of(dcbase, DisasContext, base); CPURISCVState *env = cs->env_ptr; RISCVCPU *cpu = RISCV_CPU(cs); + uint32_t tb_flags = ctx->base.tb->flags; ctx->pc_succ_insn = ctx->base.pc_first; - ctx->mem_idx = ctx->base.tb->flags & TB_FLAGS_MMU_MASK; - ctx->mstatus_fs = ctx->base.tb->flags & TB_FLAGS_MSTATUS_FS; + ctx->mem_idx = tb_flags & TB_FLAGS_MMU_MASK; + ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS; ctx->priv_ver = env->priv_ver; #if !defined(CONFIG_USER_ONLY) if (riscv_has_ext(env, RVH)) { @@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->misa = env->misa; ctx->frm = -1; /* unknown rounding mode */ ctx->ext_ifencei = cpu->cfg.ext_ifencei; + ctx->vlen = cpu->cfg.vlen; + ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL); + ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); + ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL); + ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); } static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ +/* + * RISC-V Vector Extension Helpers for QEMU. + * + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include <math.h> + +target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, + target_ulong s2) +{ + int vlmax, vl; + RISCVCPU *cpu = env_archcpu(env); + uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); + uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); + bool vill = FIELD_EX64(s2, VTYPE, VILL); + target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); + + if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { + /* only set vill bit. */ + env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); + env->vl = 0; + env->vstart = 0; + return 0; + } + + vlmax = vext_get_vlmax(cpu, s2); + if (s1 <= vlmax) { + vl = s1; + } else { + vl = vlmax; + } + env->vl = vl; + env->vtype = s2; + env->vstart = 0; + return vl; +} diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/Makefile.objs +++ b/target/riscv/Makefile.objs @@ -XXX,XX +XXX,XX @@ -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o gdbstub.o +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o vector_helper.o gdbstub.o obj-$(CONFIG_SOFTMMU) += pmp.o ifeq ($(CONFIG_SOFTMMU),y) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> The internals.h keeps things that are not relevant to the actual architecture, only to the implementation, separate. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-6-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/internals.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 target/riscv/internals.h diff --git a/target/riscv/internals.h b/target/riscv/internals.h new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/target/riscv/internals.h @@ -XXX,XX +XXX,XX @@ +/* + * QEMU RISC-V CPU -- internal functions and types + * + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef RISCV_CPU_INTERNALS_H +#define RISCV_CPU_INTERNALS_H + +#include "hw/registerfields.h" + +#endif -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Vector strided operations access the first memory element at the base address, and then access subsequent elements at address increments given by the byte offset contained in the x register specified by rs2. Vector unit-stride operations access elements stored contiguously in memory starting from the base effective address. It can been seen as a special case of strided operations. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-7-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 105 ++++++ target/riscv/internals.h | 5 + target/riscv/insn32.decode | 32 ++ target/riscv/insn_trans/trans_rvv.inc.c | 355 ++++++++++++++++++++ target/riscv/translate.c | 7 + target/riscv/vector_helper.c | 410 ++++++++++++++++++++++++ 6 files changed, 914 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_1(hyp_tlb_flush, void, env) /* Vector functions */ DEF_HELPER_3(vsetvl, tl, env, tl, tl) +DEF_HELPER_5(vlb_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlw_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlw_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlw_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlw_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwu_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwu_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwu_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwu_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsw_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsw_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsw_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsw_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_6(vlsb_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsb_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsb_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsb_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsh_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsh_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsh_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsw_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsw_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlse_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlse_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlse_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlse_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsbu_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsbu_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsbu_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsbu_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlshu_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlshu_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlshu_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlswu_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlswu_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssb_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssb_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssb_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssb_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssh_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssh_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssh_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssw_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssw_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vsse_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vsse_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vsse_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vsse_v_d, void, ptr, ptr, tl, tl, env, i32) diff --git a/target/riscv/internals.h b/target/riscv/internals.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -XXX,XX +XXX,XX @@ #include "hw/registerfields.h" +/* share data between vector helpers and decode code */ +FIELD(VDATA, MLEN, 0, 8) +FIELD(VDATA, VM, 8, 1) +FIELD(VDATA, LMUL, 9, 2) +FIELD(VDATA, NF, 11, 4) #endif diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ %sh10 20:10 %csr 20:12 %rm 12:3 +%nf 29:3 !function=ex_plus_1 # immediates: %imm_i 20:s12 @@ -XXX,XX +XXX,XX @@ &u imm rd &shift shamt rs1 rd &atomic aq rl rs2 rs1 rd +&r2nfvm vm rd rs1 nf +&rnfvm vm rd rs1 rs2 nf # Formats 32: @r ....... ..... ..... ... ..... ....... &r %rs2 %rs1 %rd @@ -XXX,XX +XXX,XX @@ @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd @r2 ....... ..... ..... ... ..... ....... %rs1 %rd +@r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd +@r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd @hfence_gvma ....... ..... ..... ... ..... ....... %rs2 %rs1 @@ -XXX,XX +XXX,XX @@ hfence_gvma 0110001 ..... ..... 000 00000 1110011 @hfence_gvma hfence_vvma 0010001 ..... ..... 000 00000 1110011 @hfence_vvma # *** RV32V Extension *** + +# *** Vector loads and stores are encoded within LOADFP/STORE-FP *** +vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm +vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm +vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm +vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm +vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm +vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm +vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm +vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm +vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm +vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm +vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm + +vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm +vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm +vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm +vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm +vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm +vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm +vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm +vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm +vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm +vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm +vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm + +# *** new major opcode OP-V *** vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ * You should have received a copy of the GNU General Public License along with * this program. If not, see <http://www.gnu.org/licenses/>. */ +#include "tcg/tcg-op-gvec.h" +#include "tcg/tcg-gvec-desc.h" +#include "internals.h" static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a) { @@ -XXX,XX +XXX,XX @@ static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a) tcg_temp_free(dst); return true; } + +/* vector register offset from env */ +static uint32_t vreg_ofs(DisasContext *s, int reg) +{ + return offsetof(CPURISCVState, vreg) + reg * s->vlen / 8; +} + +/* check functions */ + +/* + * In cpu_get_tb_cpu_state(), set VILL if RVV was not present. + * So RVV is also be checked in this function. + */ +static bool vext_check_isa_ill(DisasContext *s) +{ + return !s->vill; +} + +/* + * There are two rules check here. + * + * 1. Vector register numbers are multiples of LMUL. (Section 3.2) + * + * 2. For all widening instructions, the destination LMUL value must also be + * a supported LMUL value. (Section 11.2) + */ +static bool vext_check_reg(DisasContext *s, uint32_t reg, bool widen) +{ + /* + * The destination vector register group results are arranged as if both + * SEW and LMUL were at twice their current settings. (Section 11.2). + */ + int legal = widen ? 2 << s->lmul : 1 << s->lmul; + + return !((s->lmul == 0x3 && widen) || (reg % legal)); +} + +/* + * There are two rules check here. + * + * 1. The destination vector register group for a masked vector instruction can + * only overlap the source mask register (v0) when LMUL=1. (Section 5.3) + * + * 2. In widen instructions and some other insturctions, like vslideup.vx, + * there is no need to check whether LMUL=1. + */ +static bool vext_check_overlap_mask(DisasContext *s, uint32_t vd, bool vm, + bool force) +{ + return (vm != 0 || vd != 0) || (!force && (s->lmul == 0)); +} + +/* The LMUL setting must be such that LMUL * NFIELDS <= 8. (Section 7.8) */ +static bool vext_check_nf(DisasContext *s, uint32_t nf) +{ + return (1 << s->lmul) * nf <= 8; +} + +/* common translation macro */ +#define GEN_VEXT_TRANS(NAME, SEQ, ARGTYPE, OP, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE *a)\ +{ \ + if (CHECK(s, a)) { \ + return OP(s, a, SEQ); \ + } \ + return false; \ +} + +/* + *** unit stride load and store + */ +typedef void gen_helper_ldst_us(TCGv_ptr, TCGv_ptr, TCGv, + TCGv_env, TCGv_i32); + +static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data, + gen_helper_ldst_us *fn, DisasContext *s) +{ + TCGv_ptr dest, mask; + TCGv base; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + base = tcg_temp_new(); + + /* + * As simd_desc supports at most 256 bytes, and in this implementation, + * the max vector group length is 2048 bytes. So split it into two parts. + * + * The first part is vlen in bytes, encoded in maxsz of simd_desc. + * The second part is lmul, encoded in data of simd_desc. + */ + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(base, rs1); + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + fn(dest, mask, base, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_ptr(mask); + tcg_temp_free(base); + tcg_temp_free_i32(desc); + gen_set_label(over); + return true; +} + +static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_us *fn; + static gen_helper_ldst_us * const fns[2][7][4] = { + /* masked unit stride load */ + { { gen_helper_vlb_v_b_mask, gen_helper_vlb_v_h_mask, + gen_helper_vlb_v_w_mask, gen_helper_vlb_v_d_mask }, + { NULL, gen_helper_vlh_v_h_mask, + gen_helper_vlh_v_w_mask, gen_helper_vlh_v_d_mask }, + { NULL, NULL, + gen_helper_vlw_v_w_mask, gen_helper_vlw_v_d_mask }, + { gen_helper_vle_v_b_mask, gen_helper_vle_v_h_mask, + gen_helper_vle_v_w_mask, gen_helper_vle_v_d_mask }, + { gen_helper_vlbu_v_b_mask, gen_helper_vlbu_v_h_mask, + gen_helper_vlbu_v_w_mask, gen_helper_vlbu_v_d_mask }, + { NULL, gen_helper_vlhu_v_h_mask, + gen_helper_vlhu_v_w_mask, gen_helper_vlhu_v_d_mask }, + { NULL, NULL, + gen_helper_vlwu_v_w_mask, gen_helper_vlwu_v_d_mask } }, + /* unmasked unit stride load */ + { { gen_helper_vlb_v_b, gen_helper_vlb_v_h, + gen_helper_vlb_v_w, gen_helper_vlb_v_d }, + { NULL, gen_helper_vlh_v_h, + gen_helper_vlh_v_w, gen_helper_vlh_v_d }, + { NULL, NULL, + gen_helper_vlw_v_w, gen_helper_vlw_v_d }, + { gen_helper_vle_v_b, gen_helper_vle_v_h, + gen_helper_vle_v_w, gen_helper_vle_v_d }, + { gen_helper_vlbu_v_b, gen_helper_vlbu_v_h, + gen_helper_vlbu_v_w, gen_helper_vlbu_v_d }, + { NULL, gen_helper_vlhu_v_h, + gen_helper_vlhu_v_w, gen_helper_vlhu_v_d }, + { NULL, NULL, + gen_helper_vlwu_v_w, gen_helper_vlwu_v_d } } + }; + + fn = fns[a->vm][seq][s->sew]; + if (fn == NULL) { + return false; + } + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, NF, a->nf); + return ldst_us_trans(a->rd, a->rs1, data, fn, s); +} + +static bool ld_us_check(DisasContext *s, arg_r2nfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vlb_v, 0, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlh_v, 1, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlw_v, 2, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vle_v, 3, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlbu_v, 4, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlhu_v, 5, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlwu_v, 6, r2nfvm, ld_us_op, ld_us_check) + +static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_us *fn; + static gen_helper_ldst_us * const fns[2][4][4] = { + /* masked unit stride load and store */ + { { gen_helper_vsb_v_b_mask, gen_helper_vsb_v_h_mask, + gen_helper_vsb_v_w_mask, gen_helper_vsb_v_d_mask }, + { NULL, gen_helper_vsh_v_h_mask, + gen_helper_vsh_v_w_mask, gen_helper_vsh_v_d_mask }, + { NULL, NULL, + gen_helper_vsw_v_w_mask, gen_helper_vsw_v_d_mask }, + { gen_helper_vse_v_b_mask, gen_helper_vse_v_h_mask, + gen_helper_vse_v_w_mask, gen_helper_vse_v_d_mask } }, + /* unmasked unit stride store */ + { { gen_helper_vsb_v_b, gen_helper_vsb_v_h, + gen_helper_vsb_v_w, gen_helper_vsb_v_d }, + { NULL, gen_helper_vsh_v_h, + gen_helper_vsh_v_w, gen_helper_vsh_v_d }, + { NULL, NULL, + gen_helper_vsw_v_w, gen_helper_vsw_v_d }, + { gen_helper_vse_v_b, gen_helper_vse_v_h, + gen_helper_vse_v_w, gen_helper_vse_v_d } } + }; + + fn = fns[a->vm][seq][s->sew]; + if (fn == NULL) { + return false; + } + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, NF, a->nf); + return ldst_us_trans(a->rd, a->rs1, data, fn, s); +} + +static bool st_us_check(DisasContext *s, arg_r2nfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vsb_v, 0, r2nfvm, st_us_op, st_us_check) +GEN_VEXT_TRANS(vsh_v, 1, r2nfvm, st_us_op, st_us_check) +GEN_VEXT_TRANS(vsw_v, 2, r2nfvm, st_us_op, st_us_check) +GEN_VEXT_TRANS(vse_v, 3, r2nfvm, st_us_op, st_us_check) + +/* + *** stride load and store + */ +typedef void gen_helper_ldst_stride(TCGv_ptr, TCGv_ptr, TCGv, + TCGv, TCGv_env, TCGv_i32); + +static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, + uint32_t data, gen_helper_ldst_stride *fn, + DisasContext *s) +{ + TCGv_ptr dest, mask; + TCGv base, stride; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + base = tcg_temp_new(); + stride = tcg_temp_new(); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(base, rs1); + gen_get_gpr(stride, rs2); + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + fn(dest, mask, base, stride, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_ptr(mask); + tcg_temp_free(base); + tcg_temp_free(stride); + tcg_temp_free_i32(desc); + gen_set_label(over); + return true; +} + +static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_stride *fn; + static gen_helper_ldst_stride * const fns[7][4] = { + { gen_helper_vlsb_v_b, gen_helper_vlsb_v_h, + gen_helper_vlsb_v_w, gen_helper_vlsb_v_d }, + { NULL, gen_helper_vlsh_v_h, + gen_helper_vlsh_v_w, gen_helper_vlsh_v_d }, + { NULL, NULL, + gen_helper_vlsw_v_w, gen_helper_vlsw_v_d }, + { gen_helper_vlse_v_b, gen_helper_vlse_v_h, + gen_helper_vlse_v_w, gen_helper_vlse_v_d }, + { gen_helper_vlsbu_v_b, gen_helper_vlsbu_v_h, + gen_helper_vlsbu_v_w, gen_helper_vlsbu_v_d }, + { NULL, gen_helper_vlshu_v_h, + gen_helper_vlshu_v_w, gen_helper_vlshu_v_d }, + { NULL, NULL, + gen_helper_vlswu_v_w, gen_helper_vlswu_v_d }, + }; + + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, NF, a->nf); + return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} + +static bool ld_stride_check(DisasContext *s, arg_rnfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vlsb_v, 0, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlsh_v, 1, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlsw_v, 2, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlse_v, 3, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlsbu_v, 4, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlshu_v, 5, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlswu_v, 6, rnfvm, ld_stride_op, ld_stride_check) + +static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_stride *fn; + static gen_helper_ldst_stride * const fns[4][4] = { + /* masked stride store */ + { gen_helper_vssb_v_b, gen_helper_vssb_v_h, + gen_helper_vssb_v_w, gen_helper_vssb_v_d }, + { NULL, gen_helper_vssh_v_h, + gen_helper_vssh_v_w, gen_helper_vssh_v_d }, + { NULL, NULL, + gen_helper_vssw_v_w, gen_helper_vssw_v_d }, + { gen_helper_vsse_v_b, gen_helper_vsse_v_h, + gen_helper_vsse_v_w, gen_helper_vsse_v_d } + }; + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, NF, a->nf); + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} + +static bool st_stride_check(DisasContext *s, arg_rnfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vssb_v, 0, rnfvm, st_stride_op, st_stride_check) +GEN_VEXT_TRANS(vssh_v, 1, rnfvm, st_stride_op, st_stride_check) +GEN_VEXT_TRANS(vssw_v, 2, rnfvm, st_stride_op, st_stride_check) +GEN_VEXT_TRANS(vsse_v, 3, rnfvm, st_stride_op, st_stride_check) diff --git a/target/riscv/translate.c b/target/riscv/translate.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { uint8_t lmul; uint8_t sew; uint16_t vlen; + uint16_t mlen; bool vl_eq_vlmax; } DisasContext; @@ -XXX,XX +XXX,XX @@ static void decode_RV32_64C(DisasContext *ctx, uint16_t opcode) } } +static int ex_plus_1(DisasContext *ctx, int nf) +{ + return nf + 1; +} + #define EX_SH(amount) \ static int ex_shift_##amount(DisasContext *ctx, int imm) \ { \ @@ -XXX,XX +XXX,XX @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL); ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL); + ctx->mlen = 1 << (ctx->sew + 3 - ctx->lmul); ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); } diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ #include "qemu/osdep.h" #include "cpu.h" +#include "exec/memop.h" #include "exec/exec-all.h" #include "exec/helper-proto.h" +#include "tcg/tcg-gvec-desc.h" +#include "internals.h" #include <math.h> target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, env->vstart = 0; return vl; } + +/* + * Note that vector data is stored in host-endian 64-bit chunks, + * so addressing units smaller than that needs a host-endian fixup. + */ +#ifdef HOST_WORDS_BIGENDIAN +#define H1(x) ((x) ^ 7) +#define H1_2(x) ((x) ^ 6) +#define H1_4(x) ((x) ^ 4) +#define H2(x) ((x) ^ 3) +#define H4(x) ((x) ^ 1) +#define H8(x) ((x)) +#else +#define H1(x) (x) +#define H1_2(x) (x) +#define H1_4(x) (x) +#define H2(x) (x) +#define H4(x) (x) +#define H8(x) (x) +#endif + +static inline uint32_t vext_nf(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, NF); +} + +static inline uint32_t vext_mlen(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, MLEN); +} + +static inline uint32_t vext_vm(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VM); +} + +static inline uint32_t vext_lmul(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, LMUL); +} + +/* + * Get vector group length in bytes. Its range is [64, 2048]. + * + * As simd_desc support at most 256, the max vlen is 512 bits. + * So vlen in bytes is encoded as maxsz. + */ +static inline uint32_t vext_maxsz(uint32_t desc) +{ + return simd_maxsz(desc) << vext_lmul(desc); +} + +/* + * This function checks watchpoint before real load operation. + * + * In softmmu mode, the TLB API probe_access is enough for watchpoint check. + * In user mode, there is no watchpoint support now. + * + * It will trigger an exception if there is no mapping in TLB + * and page table walk can't fill the TLB entry. Then the guest + * software can return here after process the exception or never return. + */ +static void probe_pages(CPURISCVState *env, target_ulong addr, + target_ulong len, uintptr_t ra, + MMUAccessType access_type) +{ + target_ulong pagelen = -(addr | TARGET_PAGE_MASK); + target_ulong curlen = MIN(pagelen, len); + + probe_access(env, addr, curlen, access_type, + cpu_mmu_index(env, false), ra); + if (len > curlen) { + addr += curlen; + curlen = len - curlen; + probe_access(env, addr, curlen, access_type, + cpu_mmu_index(env, false), ra); + } +} + +#ifdef HOST_WORDS_BIGENDIAN +static void vext_clear(void *tail, uint32_t cnt, uint32_t tot) +{ + /* + * Split the remaining range to two parts. + * The first part is in the last uint64_t unit. + * The second part start from the next uint64_t unit. + */ + int part1 = 0, part2 = tot - cnt; + if (cnt % 8) { + part1 = 8 - (cnt % 8); + part2 = tot - cnt - part1; + memset(tail & ~(7ULL), 0, part1); + memset((tail + 8) & ~(7ULL), 0, part2); + } else { + memset(tail, 0, part2); + } +} +#else +static void vext_clear(void *tail, uint32_t cnt, uint32_t tot) +{ + memset(tail, 0, tot - cnt); +} +#endif + +static void clearb(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) +{ + int8_t *cur = ((int8_t *)vd + H1(idx)); + vext_clear(cur, cnt, tot); +} + +static void clearh(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) +{ + int16_t *cur = ((int16_t *)vd + H2(idx)); + vext_clear(cur, cnt, tot); +} + +static void clearl(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) +{ + int32_t *cur = ((int32_t *)vd + H4(idx)); + vext_clear(cur, cnt, tot); +} + +static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) +{ + int64_t *cur = (int64_t *)vd + idx; + vext_clear(cur, cnt, tot); +} + + +static inline int vext_elem_mask(void *v0, int mlen, int index) +{ + int idx = (index * mlen) / 64; + int pos = (index * mlen) % 64; + return (((uint64_t *)v0)[idx] >> pos) & 1; +} + +/* elements operations for load and store */ +typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, + uint32_t idx, void *vd, uintptr_t retaddr); +typedef void clear_fn(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot); + +#define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF) \ +static void NAME(CPURISCVState *env, abi_ptr addr, \ + uint32_t idx, void *vd, uintptr_t retaddr)\ +{ \ + MTYPE data; \ + ETYPE *cur = ((ETYPE *)vd + H(idx)); \ + data = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ + *cur = data; \ +} \ + +GEN_VEXT_LD_ELEM(ldb_b, int8_t, int8_t, H1, ldsb) +GEN_VEXT_LD_ELEM(ldb_h, int8_t, int16_t, H2, ldsb) +GEN_VEXT_LD_ELEM(ldb_w, int8_t, int32_t, H4, ldsb) +GEN_VEXT_LD_ELEM(ldb_d, int8_t, int64_t, H8, ldsb) +GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw) +GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw) +GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw) +GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl) +GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl) +GEN_VEXT_LD_ELEM(lde_b, int8_t, int8_t, H1, ldsb) +GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw) +GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl) +GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq) +GEN_VEXT_LD_ELEM(ldbu_b, uint8_t, uint8_t, H1, ldub) +GEN_VEXT_LD_ELEM(ldbu_h, uint8_t, uint16_t, H2, ldub) +GEN_VEXT_LD_ELEM(ldbu_w, uint8_t, uint32_t, H4, ldub) +GEN_VEXT_LD_ELEM(ldbu_d, uint8_t, uint64_t, H8, ldub) +GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw) +GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw) +GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw) +GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl) +GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl) + +#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ +static void NAME(CPURISCVState *env, abi_ptr addr, \ + uint32_t idx, void *vd, uintptr_t retaddr)\ +{ \ + ETYPE data = *((ETYPE *)vd + H(idx)); \ + cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ +} + +GEN_VEXT_ST_ELEM(stb_b, int8_t, H1, stb) +GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb) +GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb) +GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb) +GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw) +GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw) +GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw) +GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl) +GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl) +GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) +GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) +GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) +GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) + +/* + *** stride: access vector element from strided memory + */ +static void +vext_ldst_stride(void *vd, void *v0, target_ulong base, + target_ulong stride, CPURISCVState *env, + uint32_t desc, uint32_t vm, + vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra, + MMUAccessType access_type) +{ + uint32_t i, k; + uint32_t nf = vext_nf(desc); + uint32_t mlen = vext_mlen(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + + /* probe every access*/ + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + probe_pages(env, base + stride * i, nf * msz, ra, access_type); + } + /* do real access */ + for (i = 0; i < env->vl; i++) { + k = 0; + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + while (k < nf) { + target_ulong addr = base + stride * i + k * msz; + ldst_elem(env, addr, i + k * vlmax, vd, ra); + k++; + } + } + /* clear tail elements */ + if (clear_elem) { + for (k = 0; k < nf; k++) { + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); + } + } +} + +#define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ + target_ulong stride, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vm = vext_vm(desc); \ + vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ + CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_LOAD); \ +} + +GEN_VEXT_LD_STRIDE(vlsb_v_b, int8_t, int8_t, ldb_b, clearb) +GEN_VEXT_LD_STRIDE(vlsb_v_h, int8_t, int16_t, ldb_h, clearh) +GEN_VEXT_LD_STRIDE(vlsb_v_w, int8_t, int32_t, ldb_w, clearl) +GEN_VEXT_LD_STRIDE(vlsb_v_d, int8_t, int64_t, ldb_d, clearq) +GEN_VEXT_LD_STRIDE(vlsh_v_h, int16_t, int16_t, ldh_h, clearh) +GEN_VEXT_LD_STRIDE(vlsh_v_w, int16_t, int32_t, ldh_w, clearl) +GEN_VEXT_LD_STRIDE(vlsh_v_d, int16_t, int64_t, ldh_d, clearq) +GEN_VEXT_LD_STRIDE(vlsw_v_w, int32_t, int32_t, ldw_w, clearl) +GEN_VEXT_LD_STRIDE(vlsw_v_d, int32_t, int64_t, ldw_d, clearq) +GEN_VEXT_LD_STRIDE(vlse_v_b, int8_t, int8_t, lde_b, clearb) +GEN_VEXT_LD_STRIDE(vlse_v_h, int16_t, int16_t, lde_h, clearh) +GEN_VEXT_LD_STRIDE(vlse_v_w, int32_t, int32_t, lde_w, clearl) +GEN_VEXT_LD_STRIDE(vlse_v_d, int64_t, int64_t, lde_d, clearq) +GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t, uint8_t, ldbu_b, clearb) +GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t, uint16_t, ldbu_h, clearh) +GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t, uint32_t, ldbu_w, clearl) +GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t, uint64_t, ldbu_d, clearq) +GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h, clearh) +GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w, clearl) +GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d, clearq) +GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w, clearl) +GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d, clearq) + +#define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + target_ulong stride, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vm = vext_vm(desc); \ + vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ + NULL, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_STORE); \ +} + +GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t, int8_t, stb_b) +GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t, int16_t, stb_h) +GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t, int32_t, stb_w) +GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t, int64_t, stb_d) +GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h) +GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w) +GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d) +GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w) +GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d) +GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t, int8_t, ste_b) +GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h) +GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w) +GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d) + +/* + *** unit-stride: access elements stored contiguously in memory + */ + +/* unmasked unit-stride load and store operation*/ +static void +vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, + vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra, + MMUAccessType access_type) +{ + uint32_t i, k; + uint32_t nf = vext_nf(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + + /* probe every access */ + probe_pages(env, base, env->vl * nf * msz, ra, access_type); + /* load bytes from guest memory */ + for (i = 0; i < env->vl; i++) { + k = 0; + while (k < nf) { + target_ulong addr = base + (i * nf + k) * msz; + ldst_elem(env, addr, i + k * vlmax, vd, ra); + k++; + } + } + /* clear tail elements */ + if (clear_elem) { + for (k = 0; k < nf; k++) { + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); + } + } +} + +/* + * masked unit-stride load and store operation will be a special case of stride, + * stride = NF * sizeof (MTYPE) + */ + +#define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ +void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ + vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ + CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_LOAD); \ +} \ + \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldst_us(vd, base, env, desc, LOAD_FN, CLEAR_FN, \ + sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \ +} + +GEN_VEXT_LD_US(vlb_v_b, int8_t, int8_t, ldb_b, clearb) +GEN_VEXT_LD_US(vlb_v_h, int8_t, int16_t, ldb_h, clearh) +GEN_VEXT_LD_US(vlb_v_w, int8_t, int32_t, ldb_w, clearl) +GEN_VEXT_LD_US(vlb_v_d, int8_t, int64_t, ldb_d, clearq) +GEN_VEXT_LD_US(vlh_v_h, int16_t, int16_t, ldh_h, clearh) +GEN_VEXT_LD_US(vlh_v_w, int16_t, int32_t, ldh_w, clearl) +GEN_VEXT_LD_US(vlh_v_d, int16_t, int64_t, ldh_d, clearq) +GEN_VEXT_LD_US(vlw_v_w, int32_t, int32_t, ldw_w, clearl) +GEN_VEXT_LD_US(vlw_v_d, int32_t, int64_t, ldw_d, clearq) +GEN_VEXT_LD_US(vle_v_b, int8_t, int8_t, lde_b, clearb) +GEN_VEXT_LD_US(vle_v_h, int16_t, int16_t, lde_h, clearh) +GEN_VEXT_LD_US(vle_v_w, int32_t, int32_t, lde_w, clearl) +GEN_VEXT_LD_US(vle_v_d, int64_t, int64_t, lde_d, clearq) +GEN_VEXT_LD_US(vlbu_v_b, uint8_t, uint8_t, ldbu_b, clearb) +GEN_VEXT_LD_US(vlbu_v_h, uint8_t, uint16_t, ldbu_h, clearh) +GEN_VEXT_LD_US(vlbu_v_w, uint8_t, uint32_t, ldbu_w, clearl) +GEN_VEXT_LD_US(vlbu_v_d, uint8_t, uint64_t, ldbu_d, clearq) +GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h, clearh) +GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w, clearl) +GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d, clearq) +GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w, clearl) +GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d, clearq) + +#define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN) \ +void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ + vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ + NULL, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_STORE); \ +} \ + \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldst_us(vd, base, env, desc, STORE_FN, NULL, \ + sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\ +} + +GEN_VEXT_ST_US(vsb_v_b, int8_t, int8_t , stb_b) +GEN_VEXT_ST_US(vsb_v_h, int8_t, int16_t, stb_h) +GEN_VEXT_ST_US(vsb_v_w, int8_t, int32_t, stb_w) +GEN_VEXT_ST_US(vsb_v_d, int8_t, int64_t, stb_d) +GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h) +GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w) +GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d) +GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w) +GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d) +GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b) +GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h) +GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w) +GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Vector indexed operations add the contents of each element of the vector offset operand specified by vs2 to the base effective address to give the effective address of each element. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-8-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 35 +++++++ target/riscv/insn32.decode | 13 +++ target/riscv/insn_trans/trans_rvv.inc.c | 129 ++++++++++++++++++++++++ target/riscv/vector_helper.c | 116 +++++++++++++++++++++ 4 files changed, 293 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vsse_v_b, void, ptr, ptr, tl, tl, env, i32) DEF_HELPER_6(vsse_v_h, void, ptr, ptr, tl, tl, env, i32) DEF_HELPER_6(vsse_v_w, void, ptr, ptr, tl, tl, env, i32) DEF_HELPER_6(vsse_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlxb_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxb_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxb_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxb_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxh_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxh_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxh_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxe_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxe_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxe_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxe_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxbu_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxbu_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxbu_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxbu_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxhu_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxhu_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxhu_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxwu_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxwu_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxb_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxb_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxb_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxb_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxh_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxh_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxh_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxe_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxe_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxe_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxe_v_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm +vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm +vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm +vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm +vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm +vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm +vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm +vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm +# Vector ordered-indexed and unordered-indexed store insns. +vsxb_v ... -11 . ..... ..... 000 ..... 0100111 @r_nfvm +vsxh_v ... -11 . ..... ..... 101 ..... 0100111 @r_nfvm +vsxw_v ... -11 . ..... ..... 110 ..... 0100111 @r_nfvm +vsxe_v ... -11 . ..... ..... 111 ..... 0100111 @r_nfvm + # *** new major opcode OP-V *** vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_TRANS(vssb_v, 0, rnfvm, st_stride_op, st_stride_check) GEN_VEXT_TRANS(vssh_v, 1, rnfvm, st_stride_op, st_stride_check) GEN_VEXT_TRANS(vssw_v, 2, rnfvm, st_stride_op, st_stride_check) GEN_VEXT_TRANS(vsse_v, 3, rnfvm, st_stride_op, st_stride_check) + +/* + *** index load and store + */ +typedef void gen_helper_ldst_index(TCGv_ptr, TCGv_ptr, TCGv, + TCGv_ptr, TCGv_env, TCGv_i32); + +static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, + uint32_t data, gen_helper_ldst_index *fn, + DisasContext *s) +{ + TCGv_ptr dest, mask, index; + TCGv base; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + index = tcg_temp_new_ptr(); + base = tcg_temp_new(); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(base, rs1); + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(index, cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + fn(dest, mask, base, index, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_ptr(mask); + tcg_temp_free_ptr(index); + tcg_temp_free(base); + tcg_temp_free_i32(desc); + gen_set_label(over); + return true; +} + +static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_index *fn; + static gen_helper_ldst_index * const fns[7][4] = { + { gen_helper_vlxb_v_b, gen_helper_vlxb_v_h, + gen_helper_vlxb_v_w, gen_helper_vlxb_v_d }, + { NULL, gen_helper_vlxh_v_h, + gen_helper_vlxh_v_w, gen_helper_vlxh_v_d }, + { NULL, NULL, + gen_helper_vlxw_v_w, gen_helper_vlxw_v_d }, + { gen_helper_vlxe_v_b, gen_helper_vlxe_v_h, + gen_helper_vlxe_v_w, gen_helper_vlxe_v_d }, + { gen_helper_vlxbu_v_b, gen_helper_vlxbu_v_h, + gen_helper_vlxbu_v_w, gen_helper_vlxbu_v_d }, + { NULL, gen_helper_vlxhu_v_h, + gen_helper_vlxhu_v_w, gen_helper_vlxhu_v_d }, + { NULL, NULL, + gen_helper_vlxwu_v_w, gen_helper_vlxwu_v_d }, + }; + + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, NF, a->nf); + return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} + +static bool ld_index_check(DisasContext *s, arg_rnfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vlxb_v, 0, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxh_v, 1, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxw_v, 2, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxe_v, 3, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxbu_v, 4, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxhu_v, 5, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxwu_v, 6, rnfvm, ld_index_op, ld_index_check) + +static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_index *fn; + static gen_helper_ldst_index * const fns[4][4] = { + { gen_helper_vsxb_v_b, gen_helper_vsxb_v_h, + gen_helper_vsxb_v_w, gen_helper_vsxb_v_d }, + { NULL, gen_helper_vsxh_v_h, + gen_helper_vsxh_v_w, gen_helper_vsxh_v_d }, + { NULL, NULL, + gen_helper_vsxw_v_w, gen_helper_vsxw_v_d }, + { gen_helper_vsxe_v_b, gen_helper_vsxe_v_h, + gen_helper_vsxe_v_w, gen_helper_vsxe_v_d } + }; + + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, NF, a->nf); + return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} + +static bool st_index_check(DisasContext *s, arg_rnfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vsxb_v, 0, rnfvm, st_index_op, st_index_check) +GEN_VEXT_TRANS(vsxh_v, 1, rnfvm, st_index_op, st_index_check) +GEN_VEXT_TRANS(vsxw_v, 2, rnfvm, st_index_op, st_index_check) +GEN_VEXT_TRANS(vsxe_v, 3, rnfvm, st_index_op, st_index_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b) GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h) GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w) GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d) + +/* + *** index: access vector element from indexed memory + */ +typedef target_ulong vext_get_index_addr(target_ulong base, + uint32_t idx, void *vs2); + +#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ +static target_ulong NAME(target_ulong base, \ + uint32_t idx, void *vs2) \ +{ \ + return (base + *((ETYPE *)vs2 + H(idx))); \ +} + +GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t, H1) +GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2) +GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4) +GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8) + +static inline void +vext_ldst_index(void *vd, void *v0, target_ulong base, + void *vs2, CPURISCVState *env, uint32_t desc, + vext_get_index_addr get_index_addr, + vext_ldst_elem_fn *ldst_elem, + clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra, + MMUAccessType access_type) +{ + uint32_t i, k; + uint32_t nf = vext_nf(desc); + uint32_t vm = vext_vm(desc); + uint32_t mlen = vext_mlen(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + + /* probe every access*/ + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra, + access_type); + } + /* load bytes from guest memory */ + for (i = 0; i < env->vl; i++) { + k = 0; + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + while (k < nf) { + abi_ptr addr = get_index_addr(base, i, vs2) + k * msz; + ldst_elem(env, addr, i + k * vlmax, vd, ra); + k++; + } + } + /* clear tail elements */ + if (clear_elem) { + for (k = 0; k < nf; k++) { + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); + } + } +} + +#define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ + LOAD_FN, CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_LOAD); \ +} + +GEN_VEXT_LD_INDEX(vlxb_v_b, int8_t, int8_t, idx_b, ldb_b, clearb) +GEN_VEXT_LD_INDEX(vlxb_v_h, int8_t, int16_t, idx_h, ldb_h, clearh) +GEN_VEXT_LD_INDEX(vlxb_v_w, int8_t, int32_t, idx_w, ldb_w, clearl) +GEN_VEXT_LD_INDEX(vlxb_v_d, int8_t, int64_t, idx_d, ldb_d, clearq) +GEN_VEXT_LD_INDEX(vlxh_v_h, int16_t, int16_t, idx_h, ldh_h, clearh) +GEN_VEXT_LD_INDEX(vlxh_v_w, int16_t, int32_t, idx_w, ldh_w, clearl) +GEN_VEXT_LD_INDEX(vlxh_v_d, int16_t, int64_t, idx_d, ldh_d, clearq) +GEN_VEXT_LD_INDEX(vlxw_v_w, int32_t, int32_t, idx_w, ldw_w, clearl) +GEN_VEXT_LD_INDEX(vlxw_v_d, int32_t, int64_t, idx_d, ldw_d, clearq) +GEN_VEXT_LD_INDEX(vlxe_v_b, int8_t, int8_t, idx_b, lde_b, clearb) +GEN_VEXT_LD_INDEX(vlxe_v_h, int16_t, int16_t, idx_h, lde_h, clearh) +GEN_VEXT_LD_INDEX(vlxe_v_w, int32_t, int32_t, idx_w, lde_w, clearl) +GEN_VEXT_LD_INDEX(vlxe_v_d, int64_t, int64_t, idx_d, lde_d, clearq) +GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t, uint8_t, idx_b, ldbu_b, clearb) +GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t, uint16_t, idx_h, ldbu_h, clearh) +GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t, uint32_t, idx_w, ldbu_w, clearl) +GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t, uint64_t, idx_d, ldbu_d, clearq) +GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h, clearh) +GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w, clearl) +GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d, clearq) +GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w, clearl) +GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d, clearq) + +#define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ + STORE_FN, NULL, sizeof(ETYPE), sizeof(MTYPE),\ + GETPC(), MMU_DATA_STORE); \ +} + +GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t, int8_t, idx_b, stb_b) +GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t, int16_t, idx_h, stb_h) +GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t, int32_t, idx_w, stb_w) +GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t, int64_t, idx_d, stb_d) +GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h) +GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w) +GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d) +GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w) +GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d) +GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b) +GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h) +GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w) +GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> The unit-stride fault-only-fault load instructions are used to vectorize loops with data-dependent exit conditions(while loops). These instructions execute as a regular load except that they will only take a trap on element 0. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-9-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 22 +++++ target/riscv/insn32.decode | 7 ++ target/riscv/insn_trans/trans_rvv.inc.c | 73 ++++++++++++++++ target/riscv/vector_helper.c | 110 ++++++++++++++++++++++++ 4 files changed, 212 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vsxe_v_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vsxe_v_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vsxe_v_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vsxe_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_5(vlbff_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vleff_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vleff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vleff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vleff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbuff_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbuff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbuff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbuff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhuff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhuff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhuff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwuff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwuff_v_d, void, ptr, ptr, tl, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm +vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm +vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm +vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm +vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm +vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm +vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm +vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_TRANS(vsxb_v, 0, rnfvm, st_index_op, st_index_check) GEN_VEXT_TRANS(vsxh_v, 1, rnfvm, st_index_op, st_index_check) GEN_VEXT_TRANS(vsxw_v, 2, rnfvm, st_index_op, st_index_check) GEN_VEXT_TRANS(vsxe_v, 3, rnfvm, st_index_op, st_index_check) + +/* + *** unit stride fault-only-first load + */ +static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, + gen_helper_ldst_us *fn, DisasContext *s) +{ + TCGv_ptr dest, mask; + TCGv base; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + base = tcg_temp_new(); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(base, rs1); + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + fn(dest, mask, base, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_ptr(mask); + tcg_temp_free(base); + tcg_temp_free_i32(desc); + gen_set_label(over); + return true; +} + +static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_us *fn; + static gen_helper_ldst_us * const fns[7][4] = { + { gen_helper_vlbff_v_b, gen_helper_vlbff_v_h, + gen_helper_vlbff_v_w, gen_helper_vlbff_v_d }, + { NULL, gen_helper_vlhff_v_h, + gen_helper_vlhff_v_w, gen_helper_vlhff_v_d }, + { NULL, NULL, + gen_helper_vlwff_v_w, gen_helper_vlwff_v_d }, + { gen_helper_vleff_v_b, gen_helper_vleff_v_h, + gen_helper_vleff_v_w, gen_helper_vleff_v_d }, + { gen_helper_vlbuff_v_b, gen_helper_vlbuff_v_h, + gen_helper_vlbuff_v_w, gen_helper_vlbuff_v_d }, + { NULL, gen_helper_vlhuff_v_h, + gen_helper_vlhuff_v_w, gen_helper_vlhuff_v_d }, + { NULL, NULL, + gen_helper_vlwuff_v_w, gen_helper_vlwuff_v_d } + }; + + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, NF, a->nf); + return ldff_trans(a->rd, a->rs1, data, fn, s); +} + +GEN_VEXT_TRANS(vlbff_v, 0, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlhff_v, 1, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlwff_v, 2, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vleff_v, 3, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlbuff_v, 4, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlhuff_v, 5, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlwuff_v, 6, r2nfvm, ldff_op, ld_us_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b) GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h) GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w) GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d) + +/* + *** unit-stride fault-only-fisrt load instructions + */ +static inline void +vext_ldff(void *vd, void *v0, target_ulong base, + CPURISCVState *env, uint32_t desc, + vext_ldst_elem_fn *ldst_elem, + clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra) +{ + void *host; + uint32_t i, k, vl = 0; + uint32_t mlen = vext_mlen(desc); + uint32_t nf = vext_nf(desc); + uint32_t vm = vext_vm(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + target_ulong addr, offset, remain; + + /* probe every access*/ + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + addr = base + nf * i * msz; + if (i == 0) { + probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); + } else { + /* if it triggers an exception, no need to check watchpoint */ + remain = nf * msz; + while (remain > 0) { + offset = -(addr | TARGET_PAGE_MASK); + host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, + cpu_mmu_index(env, false)); + if (host) { +#ifdef CONFIG_USER_ONLY + if (page_check_range(addr, nf * msz, PAGE_READ) < 0) { + vl = i; + goto ProbeSuccess; + } +#else + probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); +#endif + } else { + vl = i; + goto ProbeSuccess; + } + if (remain <= offset) { + break; + } + remain -= offset; + addr += offset; + } + } + } +ProbeSuccess: + /* load bytes from guest memory */ + if (vl != 0) { + env->vl = vl; + } + for (i = 0; i < env->vl; i++) { + k = 0; + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + while (k < nf) { + target_ulong addr = base + (i * nf + k) * msz; + ldst_elem(env, addr, i + k * vlmax, vd, ra); + k++; + } + } + /* clear tail elements */ + if (vl != 0) { + return; + } + for (k = 0; k < nf; k++) { + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); + } +} + +#define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldff(vd, v0, base, env, desc, LOAD_FN, CLEAR_FN, \ + sizeof(ETYPE), sizeof(MTYPE), GETPC()); \ +} + +GEN_VEXT_LDFF(vlbff_v_b, int8_t, int8_t, ldb_b, clearb) +GEN_VEXT_LDFF(vlbff_v_h, int8_t, int16_t, ldb_h, clearh) +GEN_VEXT_LDFF(vlbff_v_w, int8_t, int32_t, ldb_w, clearl) +GEN_VEXT_LDFF(vlbff_v_d, int8_t, int64_t, ldb_d, clearq) +GEN_VEXT_LDFF(vlhff_v_h, int16_t, int16_t, ldh_h, clearh) +GEN_VEXT_LDFF(vlhff_v_w, int16_t, int32_t, ldh_w, clearl) +GEN_VEXT_LDFF(vlhff_v_d, int16_t, int64_t, ldh_d, clearq) +GEN_VEXT_LDFF(vlwff_v_w, int32_t, int32_t, ldw_w, clearl) +GEN_VEXT_LDFF(vlwff_v_d, int32_t, int64_t, ldw_d, clearq) +GEN_VEXT_LDFF(vleff_v_b, int8_t, int8_t, lde_b, clearb) +GEN_VEXT_LDFF(vleff_v_h, int16_t, int16_t, lde_h, clearh) +GEN_VEXT_LDFF(vleff_v_w, int32_t, int32_t, lde_w, clearl) +GEN_VEXT_LDFF(vleff_v_d, int64_t, int64_t, lde_d, clearq) +GEN_VEXT_LDFF(vlbuff_v_b, uint8_t, uint8_t, ldbu_b, clearb) +GEN_VEXT_LDFF(vlbuff_v_h, uint8_t, uint16_t, ldbu_h, clearh) +GEN_VEXT_LDFF(vlbuff_v_w, uint8_t, uint32_t, ldbu_w, clearl) +GEN_VEXT_LDFF(vlbuff_v_d, uint8_t, uint64_t, ldbu_d, clearq) +GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h, clearh) +GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl) +GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq) +GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl) +GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Vector AMOs operate as if aq and rl bits were zero on each element with regard to ordering relative to other instructions in the same hart. Vector AMOs provide no ordering guarantee between element operations in the same vector AMO instruction Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-10-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 29 +++++ target/riscv/internals.h | 1 + target/riscv/insn32-64.decode | 11 ++ target/riscv/insn32.decode | 13 +++ target/riscv/insn_trans/trans_rvv.inc.c | 138 ++++++++++++++++++++++ target/riscv/vector_helper.c | 147 ++++++++++++++++++++++++ 6 files changed, 339 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vlhuff_v_w, void, ptr, ptr, tl, env, i32) DEF_HELPER_5(vlhuff_v_d, void, ptr, ptr, tl, env, i32) DEF_HELPER_5(vlwuff_v_w, void, ptr, ptr, tl, env, i32) DEF_HELPER_5(vlwuff_v_d, void, ptr, ptr, tl, env, i32) +#ifdef TARGET_RISCV64 +DEF_HELPER_6(vamoswapw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoswapd_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoaddw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoaddd_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoxorw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoxord_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoandw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoandd_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoorw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoord_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomind_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxd_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominuw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominud_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxuw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxud_v_d, void, ptr, ptr, tl, ptr, env, i32) +#endif +DEF_HELPER_6(vamoswapw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoaddw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoxorw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoandw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoorw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/internals.h b/target/riscv/internals.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -XXX,XX +XXX,XX @@ FIELD(VDATA, MLEN, 0, 8) FIELD(VDATA, VM, 8, 1) FIELD(VDATA, LMUL, 9, 2) FIELD(VDATA, NF, 11, 4) +FIELD(VDATA, WD, 11, 1) #endif diff --git a/target/riscv/insn32-64.decode b/target/riscv/insn32-64.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32-64.decode +++ b/target/riscv/insn32-64.decode @@ -XXX,XX +XXX,XX @@ amomax_d 10100 . . ..... ..... 011 ..... 0101111 @atom_st amominu_d 11000 . . ..... ..... 011 ..... 0101111 @atom_st amomaxu_d 11100 . . ..... ..... 011 ..... 0101111 @atom_st +#*** Vector AMO operations (in addition to Zvamo) *** +vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm + # *** RV64F Standard Extension (in addition to RV32F) *** fcvt_l_s 1100000 00010 ..... ... ..... 1010011 @r2_rm fcvt_lu_s 1100000 00011 ..... ... ..... 1010011 @r2_rm diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ &u imm rd &shift shamt rs1 rd &atomic aq rl rs2 rs1 rd +&rwdvm vm wd rd rs1 rs2 &r2nfvm vm rd rs1 nf &rnfvm vm rd rs1 rs2 nf @@ -XXX,XX +XXX,XX @@ @r2 ....... ..... ..... ... ..... ....... %rs1 %rd @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd +@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd @hfence_gvma ....... ..... ..... ... ..... ....... %rs2 %rs1 @@ -XXX,XX +XXX,XX @@ vsxh_v ... -11 . ..... ..... 101 ..... 0100111 @r_nfvm vsxw_v ... -11 . ..... ..... 110 ..... 0100111 @r_nfvm vsxe_v ... -11 . ..... ..... 111 ..... 0100111 @r_nfvm +#*** Vector AMO operations are encoded under the standard AMO major opcode *** +vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm + # *** new major opcode OP-V *** vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_TRANS(vleff_v, 3, r2nfvm, ldff_op, ld_us_check) GEN_VEXT_TRANS(vlbuff_v, 4, r2nfvm, ldff_op, ld_us_check) GEN_VEXT_TRANS(vlhuff_v, 5, r2nfvm, ldff_op, ld_us_check) GEN_VEXT_TRANS(vlwuff_v, 6, r2nfvm, ldff_op, ld_us_check) + +/* + *** vector atomic operation + */ +typedef void gen_helper_amo(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr, + TCGv_env, TCGv_i32); + +static bool amo_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, + uint32_t data, gen_helper_amo *fn, DisasContext *s) +{ + TCGv_ptr dest, mask, index; + TCGv base; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + index = tcg_temp_new_ptr(); + base = tcg_temp_new(); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(base, rs1); + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(index, cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + fn(dest, mask, base, index, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_ptr(mask); + tcg_temp_free_ptr(index); + tcg_temp_free(base); + tcg_temp_free_i32(desc); + gen_set_label(over); + return true; +} + +static bool amo_op(DisasContext *s, arg_rwdvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_amo *fn; + static gen_helper_amo *const fnsw[9] = { + /* no atomic operation */ + gen_helper_vamoswapw_v_w, + gen_helper_vamoaddw_v_w, + gen_helper_vamoxorw_v_w, + gen_helper_vamoandw_v_w, + gen_helper_vamoorw_v_w, + gen_helper_vamominw_v_w, + gen_helper_vamomaxw_v_w, + gen_helper_vamominuw_v_w, + gen_helper_vamomaxuw_v_w + }; +#ifdef TARGET_RISCV64 + static gen_helper_amo *const fnsd[18] = { + gen_helper_vamoswapw_v_d, + gen_helper_vamoaddw_v_d, + gen_helper_vamoxorw_v_d, + gen_helper_vamoandw_v_d, + gen_helper_vamoorw_v_d, + gen_helper_vamominw_v_d, + gen_helper_vamomaxw_v_d, + gen_helper_vamominuw_v_d, + gen_helper_vamomaxuw_v_d, + gen_helper_vamoswapd_v_d, + gen_helper_vamoaddd_v_d, + gen_helper_vamoxord_v_d, + gen_helper_vamoandd_v_d, + gen_helper_vamoord_v_d, + gen_helper_vamomind_v_d, + gen_helper_vamomaxd_v_d, + gen_helper_vamominud_v_d, + gen_helper_vamomaxud_v_d + }; +#endif + + if (tb_cflags(s->base.tb) & CF_PARALLEL) { + gen_helper_exit_atomic(cpu_env); + s->base.is_jmp = DISAS_NORETURN; + return true; + } else { + if (s->sew == 3) { +#ifdef TARGET_RISCV64 + fn = fnsd[seq]; +#else + /* Check done in amo_check(). */ + g_assert_not_reached(); +#endif + } else { + fn = fnsw[seq]; + } + } + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, WD, a->wd); + return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} +/* + * There are two rules check here. + * + * 1. SEW must be at least as wide as the AMO memory element size. + * + * 2. If SEW is greater than XLEN, an illegal instruction exception is raised. + */ +static bool amo_check(DisasContext *s, arg_rwdvm* a) +{ + return (!s->vill && has_ext(s, RVA) && + (!a->wd || vext_check_overlap_mask(s, a->rd, a->vm, false)) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + ((1 << s->sew) <= sizeof(target_ulong)) && + ((1 << s->sew) >= 4)); +} + +GEN_VEXT_TRANS(vamoswapw_v, 0, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoaddw_v, 1, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoxorw_v, 2, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoandw_v, 3, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoorw_v, 4, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamominw_v, 5, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomaxw_v, 6, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamominuw_v, 7, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomaxuw_v, 8, rwdvm, amo_op, amo_check) +#ifdef TARGET_RISCV64 +GEN_VEXT_TRANS(vamoswapd_v, 9, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoaddd_v, 10, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoxord_v, 11, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoandd_v, 12, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoord_v, 13, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomind_v, 14, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check) +#endif diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ static inline uint32_t vext_lmul(uint32_t desc) return FIELD_EX32(simd_data(desc), VDATA, LMUL); } +static uint32_t vext_wd(uint32_t desc) +{ + return (simd_data(desc) >> 11) & 0x1; +} + /* * Get vector group length in bytes. Its range is [64, 2048]. * @@ -XXX,XX +XXX,XX @@ GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl) GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq) GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl) GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq) + +/* + *** Vector AMO Operations (Zvamo) + */ +typedef void vext_amo_noatomic_fn(void *vs3, target_ulong addr, + uint32_t wd, uint32_t idx, CPURISCVState *env, + uintptr_t retaddr); + +/* no atomic opreation for vector atomic insructions */ +#define DO_SWAP(N, M) (M) +#define DO_AND(N, M) (N & M) +#define DO_XOR(N, M) (N ^ M) +#define DO_OR(N, M) (N | M) +#define DO_ADD(N, M) (N + M) + +#define GEN_VEXT_AMO_NOATOMIC_OP(NAME, ESZ, MSZ, H, DO_OP, SUF) \ +static void \ +vext_##NAME##_noatomic_op(void *vs3, target_ulong addr, \ + uint32_t wd, uint32_t idx, \ + CPURISCVState *env, uintptr_t retaddr)\ +{ \ + typedef int##ESZ##_t ETYPE; \ + typedef int##MSZ##_t MTYPE; \ + typedef uint##MSZ##_t UMTYPE __attribute__((unused)); \ + ETYPE *pe3 = (ETYPE *)vs3 + H(idx); \ + MTYPE a = cpu_ld##SUF##_data(env, addr), b = *pe3; \ + \ + cpu_st##SUF##_data(env, addr, DO_OP(a, b)); \ + if (wd) { \ + *pe3 = a; \ + } \ +} + +/* Signed min/max */ +#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) +#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) + +/* Unsigned min/max */ +#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) +#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) + +GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_w, 32, 32, H4, DO_SWAP, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_w, 32, 32, H4, DO_ADD, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_w, 32, 32, H4, DO_XOR, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_w, 32, 32, H4, DO_AND, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_w, 32, 32, H4, DO_OR, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_w, 32, 32, H4, DO_MIN, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_w, 32, 32, H4, DO_MAX, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_w, 32, 32, H4, DO_MINU, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_w, 32, 32, H4, DO_MAXU, l) +#ifdef TARGET_RISCV64 +GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_d, 64, 32, H8, DO_SWAP, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoswapd_v_d, 64, 64, H8, DO_SWAP, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_d, 64, 32, H8, DO_ADD, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoaddd_v_d, 64, 64, H8, DO_ADD, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_d, 64, 32, H8, DO_XOR, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoxord_v_d, 64, 64, H8, DO_XOR, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_d, 64, 32, H8, DO_AND, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoandd_v_d, 64, 64, H8, DO_AND, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_d, 64, 32, H8, DO_OR, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoord_v_d, 64, 64, H8, DO_OR, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_d, 64, 32, H8, DO_MIN, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomind_v_d, 64, 64, H8, DO_MIN, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_d, 64, 32, H8, DO_MAX, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxd_v_d, 64, 64, H8, DO_MAX, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_d, 64, 32, H8, DO_MINU, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamominud_v_d, 64, 64, H8, DO_MINU, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_d, 64, 32, H8, DO_MAXU, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxud_v_d, 64, 64, H8, DO_MAXU, q) +#endif + +static inline void +vext_amo_noatomic(void *vs3, void *v0, target_ulong base, + void *vs2, CPURISCVState *env, uint32_t desc, + vext_get_index_addr get_index_addr, + vext_amo_noatomic_fn *noatomic_op, + clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra) +{ + uint32_t i; + target_long addr; + uint32_t wd = vext_wd(desc); + uint32_t vm = vext_vm(desc); + uint32_t mlen = vext_mlen(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_LOAD); + probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_STORE); + } + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + addr = get_index_addr(base, i, vs2); + noatomic_op(vs3, addr, wd, i, env, ra); + } + clear_elem(vs3, env->vl, env->vl * esz, vlmax * esz); +} + +#define GEN_VEXT_AMO(NAME, MTYPE, ETYPE, INDEX_FN, CLEAR_FN) \ +void HELPER(NAME)(void *vs3, void *v0, target_ulong base, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + vext_amo_noatomic(vs3, v0, base, vs2, env, desc, \ + INDEX_FN, vext_##NAME##_noatomic_op, \ + CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC()); \ +} + +#ifdef TARGET_RISCV64 +GEN_VEXT_AMO(vamoswapw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoswapd_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoaddw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoaddd_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoxorw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoxord_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoandw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoandd_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoorw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoord_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamominw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomind_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomaxw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomaxd_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamominuw_v_d, uint32_t, uint64_t, idx_d, clearq) +GEN_VEXT_AMO(vamominud_v_d, uint64_t, uint64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomaxuw_v_d, uint32_t, uint64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomaxud_v_d, uint64_t, uint64_t, idx_d, clearq) +#endif +GEN_VEXT_AMO(vamoswapw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamoaddw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamoxorw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamoandw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamoorw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl) +GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-11-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 25 ++ target/riscv/insn32.decode | 10 + target/riscv/insn_trans/trans_rvv.inc.c | 291 ++++++++++++++++++++++++ target/riscv/vector_helper.c | 183 +++++++++++++++ 4 files changed, 509 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_FLAGS_4(vec_rsubs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vec_rsubs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vec_rsubs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vec_rsubs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ &u imm rd &shift shamt rs1 rd &atomic aq rl rs2 rs1 rd +&rmrr vm rd rs1 rs2 &rwdvm vm wd rd rs1 rs2 &r2nfvm vm rd rs1 nf &rnfvm vm rd rs1 rs2 nf @@ -XXX,XX +XXX,XX @@ @r2 ....... ..... ..... ... ..... ....... %rs1 %rd @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd +@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd @r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd @@ -XXX,XX +XXX,XX @@ vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm # *** new major opcode OP-V *** +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm + vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check) GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check) GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check) #endif + +/* + *** Vector Integer Arithmetic Instructions + */ +#define MAXSZ(s) (s->vlen >> (3 - s->lmul)) + +static bool opivv_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false)); +} + +typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t); + +static inline bool +do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, + gen_helper_gvec_4_ptr *fn) +{ + TCGLabel *over = gen_new_label(); + if (!opivv_check(s, a)) { + return false; + } + + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + if (a->vm && s->vl_eq_vlmax) { + gvec_fn(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), + MAXSZ(s), MAXSZ(s)); + } else { + uint32_t data = 0; + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), + cpu_env, 0, s->vlen / 8, data, fn); + } + gen_set_label(over); + return true; +} + +/* OPIVV with GVEC IR */ +#define GEN_OPIVV_GVEC_TRANS(NAME, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_gvec_4_ptr * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ +} + +GEN_OPIVV_GVEC_TRANS(vadd_vv, add) +GEN_OPIVV_GVEC_TRANS(vsub_vv, sub) + +typedef void gen_helper_opivx(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr, + TCGv_env, TCGv_i32); + +static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, + gen_helper_opivx *fn, DisasContext *s) +{ + TCGv_ptr dest, src2, mask; + TCGv src1; + TCGv_i32 desc; + uint32_t data = 0; + + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + src2 = tcg_temp_new_ptr(); + src1 = tcg_temp_new(); + gen_get_gpr(src1, rs1); + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + fn(dest, mask, src1, src2, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_ptr(mask); + tcg_temp_free_ptr(src2); + tcg_temp_free(src1); + tcg_temp_free_i32(desc); + gen_set_label(over); + return true; +} + +static bool opivx_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false)); +} + +typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, TCGv_i64, + uint32_t, uint32_t); + +static inline bool +do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, + gen_helper_opivx *fn) +{ + if (!opivx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + TCGv_i64 src1 = tcg_temp_new_i64(); + TCGv tmp = tcg_temp_new(); + + gen_get_gpr(tmp, a->rs1); + tcg_gen_ext_tl_i64(src1, tmp); + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + src1, MAXSZ(s), MAXSZ(s)); + + tcg_temp_free_i64(src1); + tcg_temp_free(tmp); + return true; + } + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); +} + +/* OPIVX with GVEC IR */ +#define GEN_OPIVX_GVEC_TRANS(NAME, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ +} + +GEN_OPIVX_GVEC_TRANS(vadd_vx, adds) +GEN_OPIVX_GVEC_TRANS(vsub_vx, subs) + +static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_vec_sub8_i64(d, b, a); +} + +static void gen_vec_rsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_vec_sub8_i64(d, b, a); +} + +static void gen_rsub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_sub_i32(ret, arg2, arg1); +} + +static void gen_rsub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_sub_i64(ret, arg2, arg1); +} + +static void gen_rsub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_sub_vec(vece, r, b, a); +} + +static void tcg_gen_gvec_rsubs(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen2s rsub_op[4] = { + { .fni8 = gen_vec_rsub8_i64, + .fniv = gen_rsub_vec, + .fno = gen_helper_vec_rsubs8, + .vece = MO_8 }, + { .fni8 = gen_vec_rsub16_i64, + .fniv = gen_rsub_vec, + .fno = gen_helper_vec_rsubs16, + .vece = MO_16 }, + { .fni4 = gen_rsub_i32, + .fniv = gen_rsub_vec, + .fno = gen_helper_vec_rsubs32, + .vece = MO_32 }, + { .fni8 = gen_rsub_i64, + .fniv = gen_rsub_vec, + .fno = gen_helper_vec_rsubs64, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); +} + +GEN_OPIVX_GVEC_TRANS(vrsub_vx, rsubs) + +static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, + gen_helper_opivx *fn, DisasContext *s, int zx) +{ + TCGv_ptr dest, src2, mask; + TCGv src1; + TCGv_i32 desc; + uint32_t data = 0; + + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + src2 = tcg_temp_new_ptr(); + if (zx) { + src1 = tcg_const_tl(imm); + } else { + src1 = tcg_const_tl(sextract64(imm, 0, 5)); + } + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + fn(dest, mask, src1, src2, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_ptr(mask); + tcg_temp_free_ptr(src2); + tcg_temp_free(src1); + tcg_temp_free_i32(desc); + gen_set_label(over); + return true; +} + +typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t, + uint32_t, uint32_t); + +static inline bool +do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, + gen_helper_opivx *fn, int zx) +{ + if (!opivx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + if (zx) { + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + extract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s)); + } else { + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + sextract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s)); + } + } else { + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, zx); + } + return true; +} + +/* OPIVI with GVEC IR */ +#define GEN_OPIVI_GVEC_TRANS(NAME, ZX, OPIVX, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ + }; \ + return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \ + fns[s->sew], ZX); \ +} + +GEN_OPIVI_GVEC_TRANS(vadd_vi, 0, vadd_vx, addi) + +static void tcg_gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t c, uint32_t oprsz, uint32_t maxsz) +{ + TCGv_i64 tmp = tcg_const_i64(c); + tcg_gen_gvec_rsubs(vece, dofs, aofs, tmp, oprsz, maxsz); + tcg_temp_free_i64(tmp); +} + +GEN_OPIVI_GVEC_TRANS(vrsub_vi, 0, vrsub_vx, rsubi) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl) GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl) GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl) GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) + +/* + *** Vector Integer Arithmetic Instructions + */ + +/* expand macro args before macro */ +#define RVVCALL(macro, ...) macro(__VA_ARGS__) + +/* (TD, T1, T2, TX1, TX2) */ +#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t +#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t +#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t +#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t + +/* operation of two vector elements */ +typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); + +#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1); \ +} +#define DO_SUB(N, M) (N - M) +#define DO_RSUB(N, M) (M - N) + +RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) +RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) + +static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, uint32_t desc, + uint32_t esz, uint32_t dsz, + opivv2_fn *fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + + for (i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, vs1, vs2, i); + } + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate the helpers for OPIVV */ +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq) + +typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); + +/* + * (T1)s1 gives the real operator type. + * (TX1)(T1)s1 expands the operator type of widen or narrow operations. + */ +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ +} + +RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) +RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) +RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) + +static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, uint32_t desc, + uint32_t esz, uint32_t dsz, + opivx2_fn fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + + for (i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, s1, vs2, i); + } + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate the helpers for OPIVX */ +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq) + +void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); + } +} + +void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); + } +} + +void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); + } +} + +void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); + } +} -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-12-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 49 +++++++ target/riscv/insn32.decode | 16 ++ target/riscv/insn_trans/trans_rvv.inc.c | 186 ++++++++++++++++++++++++ target/riscv/vector_helper.c | 111 ++++++++++++++ 4 files changed, 362 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(vec_rsubs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) DEF_HELPER_FLAGS_4(vec_rsubs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) DEF_HELPER_FLAGS_4(vec_rsubs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) DEF_HELPER_FLAGS_4(vec_rsubs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_6(vwaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_wv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_wv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_wv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_wv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_wx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_wx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_wx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_wx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_wx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_wx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_wx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_wx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_wx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_wx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_wx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_wx_w, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm +vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm +vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm +vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm +vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm +vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm +vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm +vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm +vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm +vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm +vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm +vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm +vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm +vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm +vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm +vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm +vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool vext_check_nf(DisasContext *s, uint32_t nf) return (1 << s->lmul) * nf <= 8; } +/* + * The destination vector register group cannot overlap a source vector register + * group of a different element width. (Section 11.2) + */ +static inline bool vext_check_overlap_group(int rd, int dlen, int rs, int slen) +{ + return ((rd >= rs + slen) || (rs >= rd + dlen)); +} /* common translation macro */ #define GEN_VEXT_TRANS(NAME, SEQ, ARGTYPE, OP, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE *a)\ @@ -XXX,XX +XXX,XX @@ static void tcg_gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs, } GEN_OPIVI_GVEC_TRANS(vrsub_vi, 0, vrsub_vx, rsubi) + +/* Vector Widening Integer Add/Subtract */ + +/* OPIVV with WIDEN */ +static bool opivv_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, + gen_helper_gvec_4_ptr *fn, + bool (*checkfn)(DisasContext *, arg_rmrr *)) +{ + if (checkfn(s, a)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs1), + vreg_ofs(s, a->rs2), + cpu_env, 0, s->vlen / 8, + data, fn); + gen_set_label(over); + return true; + } + return false; +} + +#define GEN_OPIVV_WIDEN_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_gvec_4_ptr * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return do_opivv_widen(s, a, fns[s->sew], CHECK); \ +} + +GEN_OPIVV_WIDEN_TRANS(vwaddu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwadd_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsubu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsub_vv, opivv_widen_check) + +/* OPIVX with WIDEN */ +static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +static bool do_opivx_widen(DisasContext *s, arg_rmrr *a, + gen_helper_opivx *fn) +{ + if (opivx_widen_check(s, a)) { + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); + } + return true; +} + +#define GEN_OPIVX_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return do_opivx_widen(s, a, fns[s->sew]); \ +} + +GEN_OPIVX_WIDEN_TRANS(vwaddu_vx) +GEN_OPIVX_WIDEN_TRANS(vwadd_vx) +GEN_OPIVX_WIDEN_TRANS(vwsubu_vx) +GEN_OPIVX_WIDEN_TRANS(vwsub_vx) + +/* WIDEN OPIVV with WIDEN */ +static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, true) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, + gen_helper_gvec_4_ptr *fn) +{ + if (opiwv_widen_check(s, a)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs1), + vreg_ofs(s, a->rs2), + cpu_env, 0, s->vlen / 8, data, fn); + gen_set_label(over); + return true; + } + return false; +} + +#define GEN_OPIWV_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_gvec_4_ptr * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return do_opiwv_widen(s, a, fns[s->sew]); \ +} + +GEN_OPIWV_WIDEN_TRANS(vwaddu_wv) +GEN_OPIWV_WIDEN_TRANS(vwadd_wv) +GEN_OPIWV_WIDEN_TRANS(vwsubu_wv) +GEN_OPIWV_WIDEN_TRANS(vwsub_wv) + +/* WIDEN OPIVX with WIDEN */ +static bool opiwx_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, true) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +static bool do_opiwx_widen(DisasContext *s, arg_rmrr *a, + gen_helper_opivx *fn) +{ + if (opiwx_widen_check(s, a)) { + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); + } + return false; +} + +#define GEN_OPIWX_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return do_opiwx_widen(s, a, fns[s->sew]); \ +} + +GEN_OPIWX_WIDEN_TRANS(vwaddu_wx) +GEN_OPIWX_WIDEN_TRANS(vwadd_wx) +GEN_OPIWX_WIDEN_TRANS(vwsubu_wx) +GEN_OPIWX_WIDEN_TRANS(vwsub_wx) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); } } + +/* Vector Widening Integer Add/Subtract */ +#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t +#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t +#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t +#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t +#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t +#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t +#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t +#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t +#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t +#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t +#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t +#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t +RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) +GEN_VEXT_VV(vwaddu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwaddu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwaddu_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwsubu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwsubu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwsubu_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwadd_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwadd_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwadd_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwsub_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwsub_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwsub_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwaddu_wv_b, 1, 2, clearh) +GEN_VEXT_VV(vwaddu_wv_h, 2, 4, clearl) +GEN_VEXT_VV(vwaddu_wv_w, 4, 8, clearq) +GEN_VEXT_VV(vwsubu_wv_b, 1, 2, clearh) +GEN_VEXT_VV(vwsubu_wv_h, 2, 4, clearl) +GEN_VEXT_VV(vwsubu_wv_w, 4, 8, clearq) +GEN_VEXT_VV(vwadd_wv_b, 1, 2, clearh) +GEN_VEXT_VV(vwadd_wv_h, 2, 4, clearl) +GEN_VEXT_VV(vwadd_wv_w, 4, 8, clearq) +GEN_VEXT_VV(vwsub_wv_b, 1, 2, clearh) +GEN_VEXT_VV(vwsub_wv_h, 2, 4, clearl) +GEN_VEXT_VV(vwsub_wv_w, 4, 8, clearq) + +RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) +RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) +RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) +RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) +RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) +RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) +RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) +RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) +RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) +RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) +RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) +RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) +RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) +RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) +RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) +RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) +RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) +RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) +RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) +RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) +RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) +RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) +RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) +RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) +GEN_VEXT_VX(vwaddu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwaddu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwaddu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwsubu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwsubu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwsubu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwadd_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwadd_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwadd_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwsub_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwsub_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwsub_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwaddu_wx_b, 1, 2, clearh) +GEN_VEXT_VX(vwaddu_wx_h, 2, 4, clearl) +GEN_VEXT_VX(vwaddu_wx_w, 4, 8, clearq) +GEN_VEXT_VX(vwsubu_wx_b, 1, 2, clearh) +GEN_VEXT_VX(vwsubu_wx_h, 2, 4, clearl) +GEN_VEXT_VX(vwsubu_wx_w, 4, 8, clearq) +GEN_VEXT_VX(vwadd_wx_b, 1, 2, clearh) +GEN_VEXT_VX(vwadd_wx_h, 2, 4, clearl) +GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq) +GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh) +GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl) +GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-13-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 33 ++++++ target/riscv/insn32.decode | 11 ++ target/riscv/insn_trans/trans_rvv.inc.c | 113 +++++++++++++++++++ target/riscv/vector_helper.c | 137 ++++++++++++++++++++++++ 4 files changed, 294 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwadd_wx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwsub_wx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwsub_wx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwsub_wx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vadc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsbc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsbc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsbc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsbc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsbc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsbc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsbc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsbc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd +@r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd @r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd @@ -XXX,XX +XXX,XX @@ vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm +vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r_vm_1 +vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r_vm_1 +vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r_vm_1 +vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r_vm_1 +vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r_vm_1 +vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r_vm_1 +vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r_vm_1 +vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r_vm_1 +vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r_vm_1 +vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r_vm_1 vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIWX_WIDEN_TRANS(vwaddu_wx) GEN_OPIWX_WIDEN_TRANS(vwadd_wx) GEN_OPIWX_WIDEN_TRANS(vwsubu_wx) GEN_OPIWX_WIDEN_TRANS(vwsub_wx) + +/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ +/* OPIVV without GVEC IR */ +#define GEN_OPIVV_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + TCGLabel *over = gen_new_label(); \ + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew]); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ +} + +/* + * For vadc and vsbc, an illegal instruction exception is raised if the + * destination vector register is v0 and LMUL > 1. (Section 12.3) + */ +static bool opivv_vadc_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + ((a->rd != 0) || (s->lmul == 0))); +} + +GEN_OPIVV_TRANS(vadc_vvm, opivv_vadc_check) +GEN_OPIVV_TRANS(vsbc_vvm, opivv_vadc_check) + +/* + * For vmadc and vmsbc, an illegal instruction exception is raised if the + * destination vector register overlaps a source vector register group. + */ +static bool opivv_vmadc_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) && + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)); +} + +GEN_OPIVV_TRANS(vmadc_vvm, opivv_vmadc_check) +GEN_OPIVV_TRANS(vmsbc_vvm, opivv_vmadc_check) + +static bool opivx_vadc_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + ((a->rd != 0) || (s->lmul == 0))); +} + +/* OPIVX without GVEC IR */ +#define GEN_OPIVX_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + \ + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ + } \ + return false; \ +} + +GEN_OPIVX_TRANS(vadc_vxm, opivx_vadc_check) +GEN_OPIVX_TRANS(vsbc_vxm, opivx_vadc_check) + +static bool opivx_vmadc_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)); +} + +GEN_OPIVX_TRANS(vmadc_vxm, opivx_vmadc_check) +GEN_OPIVX_TRANS(vmsbc_vxm, opivx_vmadc_check) + +/* OPIVI without GVEC IR */ +#define GEN_OPIVI_TRANS(NAME, ZX, OPIVX, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ + }; \ + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ + fns[s->sew], s, ZX); \ + } \ + return false; \ +} + +GEN_OPIVI_TRANS(vadc_vim, 0, vadc_vxm, opivx_vadc_check) +GEN_OPIVI_TRANS(vmadc_vim, 0, vmadc_vxm, opivx_vmadc_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) vext_clear(cur, cnt, tot); } +static inline void vext_set_elem_mask(void *v0, int mlen, int index, + uint8_t value) +{ + int idx = (index * mlen) / 64; + int pos = (index * mlen) % 64; + uint64_t old = ((uint64_t *)v0)[idx]; + ((uint64_t *)v0)[idx] = deposit64(old, pos, mlen, value); +} static inline int vext_elem_mask(void *v0, int mlen, int index) { @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq) GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh) GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl) GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq) + +/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ +#define DO_VADC(N, M, C) (N + M + C) +#define DO_VSBC(N, M, C) (N - M - C) + +#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + uint8_t carry = vext_elem_mask(v0, mlen, i); \ + \ + *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC, clearb) +GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC, clearh) +GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC, clearl) +GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC, clearq) + +GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC, clearb) +GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC, clearh) +GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC, clearl) +GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC, clearq) + +#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + uint8_t carry = vext_elem_mask(v0, mlen, i); \ + \ + *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC, clearb) +GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC, clearh) +GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC, clearl) +GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC, clearq) + +GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC, clearb) +GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC, clearh) +GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC, clearl) +GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC, clearq) + +#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ + (__typeof(N))(N + M) < N) +#define DO_MSBC(N, M, C) (C ? N <= M : N < M) + +#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + uint8_t carry = vext_elem_mask(v0, mlen, i); \ + \ + vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1, carry));\ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) +GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) +GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) +GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) + +GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) +GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) +GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) +GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) + +#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + uint8_t carry = vext_elem_mask(v0, mlen, i); \ + \ + vext_set_elem_mask(vd, mlen, i, \ + DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) +GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) +GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) +GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) + +GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) +GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) +GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) +GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-14-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 25 ++++++++++++ target/riscv/insn32.decode | 9 +++++ target/riscv/insn_trans/trans_rvv.inc.c | 11 ++++++ target/riscv/vector_helper.c | 51 +++++++++++++++++++++++++ 4 files changed, 96 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vand_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vand_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vand_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vand_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vor_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vor_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vor_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vor_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vxor_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vxor_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vxor_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vxor_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vand_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vand_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vand_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vand_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vor_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vor_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vor_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vor_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vxor_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vxor_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vxor_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vxor_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r_vm_1 vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r_vm_1 vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r_vm_1 vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r_vm_1 +vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm +vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm +vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm +vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm +vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm +vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm +vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm +vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm +vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ GEN_OPIVI_TRANS(vadc_vim, 0, vadc_vxm, opivx_vadc_check) GEN_OPIVI_TRANS(vmadc_vim, 0, vmadc_vxm, opivx_vmadc_check) + +/* Vector Bitwise Logical Instructions */ +GEN_OPIVV_GVEC_TRANS(vand_vv, and) +GEN_OPIVV_GVEC_TRANS(vor_vv, or) +GEN_OPIVV_GVEC_TRANS(vxor_vv, xor) +GEN_OPIVX_GVEC_TRANS(vand_vx, ands) +GEN_OPIVX_GVEC_TRANS(vor_vx, ors) +GEN_OPIVX_GVEC_TRANS(vxor_vx, xors) +GEN_OPIVI_GVEC_TRANS(vand_vi, 0, vand_vx, andi) +GEN_OPIVI_GVEC_TRANS(vor_vi, 0, vor_vx, ori) +GEN_OPIVI_GVEC_TRANS(vxor_vi, 0, vxor_vx, xori) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) + +/* Vector Bitwise Logical Instructions */ +RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) +RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) +RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) +RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) +RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) +RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) +RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) +RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) +RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) +RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) +RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) +RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) +GEN_VEXT_VV(vand_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vand_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vand_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vand_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vor_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vor_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vor_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vor_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vxor_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vxor_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vxor_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vxor_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) +RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) +RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) +RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) +RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) +RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) +RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) +RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) +RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) +RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) +RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) +RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) +GEN_VEXT_VX(vand_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vand_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vand_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vand_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vor_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vor_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vor_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vor_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vxor_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vxor_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vxor_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vxor_vx_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-15-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 25 ++++++++ target/riscv/insn32.decode | 9 +++ target/riscv/insn_trans/trans_rvv.inc.c | 52 ++++++++++++++++ target/riscv/vector_helper.c | 79 +++++++++++++++++++++++++ 4 files changed, 165 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vxor_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vxor_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vxor_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vxor_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vsll_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsll_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsll_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsll_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsrl_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsra_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsra_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsra_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsra_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsll_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsll_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsll_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsll_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsrl_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsrl_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsrl_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsra_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsra_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsra_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsra_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm +vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm +vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm +vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm +vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm +vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm +vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm +vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm +vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm +vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_GVEC_TRANS(vxor_vx, xors) GEN_OPIVI_GVEC_TRANS(vand_vi, 0, vand_vx, andi) GEN_OPIVI_GVEC_TRANS(vor_vi, 0, vor_vx, ori) GEN_OPIVI_GVEC_TRANS(vxor_vi, 0, vxor_vx, xori) + +/* Vector Single-Width Bit Shift Instructions */ +GEN_OPIVV_GVEC_TRANS(vsll_vv, shlv) +GEN_OPIVV_GVEC_TRANS(vsrl_vv, shrv) +GEN_OPIVV_GVEC_TRANS(vsra_vv, sarv) + +typedef void GVecGen2sFn32(unsigned, uint32_t, uint32_t, TCGv_i32, + uint32_t, uint32_t); + +static inline bool +do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, + gen_helper_opivx *fn) +{ + if (!opivx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + TCGv_i32 src1 = tcg_temp_new_i32(); + TCGv tmp = tcg_temp_new(); + + gen_get_gpr(tmp, a->rs1); + tcg_gen_trunc_tl_i32(src1, tmp); + tcg_gen_extract_i32(src1, src1, 0, s->sew + 3); + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + src1, MAXSZ(s), MAXSZ(s)); + + tcg_temp_free_i32(src1); + tcg_temp_free(tmp); + return true; + } + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); +} + +#define GEN_OPIVX_GVEC_SHIFT_TRANS(NAME, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + \ + return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ +} + +GEN_OPIVX_GVEC_SHIFT_TRANS(vsll_vx, shls) +GEN_OPIVX_GVEC_SHIFT_TRANS(vsrl_vx, shrs) +GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx, sars) + +GEN_OPIVI_GVEC_TRANS(vsll_vi, 1, vsll_vx, shli) +GEN_OPIVI_GVEC_TRANS(vsrl_vi, 1, vsrl_vx, shri) +GEN_OPIVI_GVEC_TRANS(vsra_vi, 1, vsra_vx, sari) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vxor_vx_b, 1, 1, clearb) GEN_VEXT_VX(vxor_vx_h, 2, 2, clearh) GEN_VEXT_VX(vxor_vx_w, 4, 4, clearl) GEN_VEXT_VX(vxor_vx_d, 8, 8, clearq) + +/* Vector Single-Width Bit Shift Instructions */ +#define DO_SLL(N, M) (N << (M)) +#define DO_SRL(N, M) (N >> (M)) + +/* generate the helpers for shift instructions with two vector operators */ +#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TS1); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ + *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7, clearb) +GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf, clearh) +GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f, clearl) +GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f, clearq) + +GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb) +GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh) +GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl) +GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq) + +GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb) +GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh) +GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl) +GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq) + +/* generate the helpers for shift instructions with one vector and one scalar */ +#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TD); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7, clearb) +GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf, clearh) +GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f, clearl) +GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f, clearq) + +GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb) +GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh) +GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl) +GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq) + +GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb) +GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh) +GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl) +GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-16-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 13 ++++ target/riscv/insn32.decode | 6 ++ target/riscv/insn_trans/trans_rvv.inc.c | 90 +++++++++++++++++++++++++ target/riscv/vector_helper.c | 14 ++++ 4 files changed, 123 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vsra_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vsra_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vsra_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vsra_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vnsrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsra_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsra_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsra_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsrl_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsrl_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsra_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsra_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsra_vx_w, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm +vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm +vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm +vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm +vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm +vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm +vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx, sars) GEN_OPIVI_GVEC_TRANS(vsll_vi, 1, vsll_vx, shli) GEN_OPIVI_GVEC_TRANS(vsrl_vi, 1, vsrl_vx, shri) GEN_OPIVI_GVEC_TRANS(vsra_vi, 1, vsra_vx, sari) + +/* Vector Narrowing Integer Right Shift Instructions */ +static bool opivv_narrow_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, true) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, + 2 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +/* OPIVV with NARROW */ +#define GEN_OPIVV_NARROW_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opivv_narrow_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(); \ + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew]); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ +} +GEN_OPIVV_NARROW_TRANS(vnsra_vv) +GEN_OPIVV_NARROW_TRANS(vnsrl_vv) + +static bool opivx_narrow_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, true) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, + 2 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +/* OPIVX with NARROW */ +#define GEN_OPIVX_NARROW_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opivx_narrow_check(s, a)) { \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + }; \ + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ + } \ + return false; \ +} + +GEN_OPIVX_NARROW_TRANS(vnsra_vx) +GEN_OPIVX_NARROW_TRANS(vnsrl_vx) + +/* OPIVI with NARROW */ +#define GEN_OPIVI_NARROW_TRANS(NAME, ZX, OPIVX) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opivx_narrow_check(s, a)) { \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##OPIVX##_b, \ + gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, \ + }; \ + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ + fns[s->sew], s, ZX); \ + } \ + return false; \ +} + +GEN_OPIVI_NARROW_TRANS(vnsra_vi, 1, vnsra_vx) +GEN_OPIVI_NARROW_TRANS(vnsrl_vi, 1, vnsrl_vx) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb) GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh) GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl) GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq) + +/* Vector Narrowing Integer Right Shift Instructions */ +GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb) +GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh) +GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl) +GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb) +GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh) +GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl) +GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb) +GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh) +GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl) +GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb) +GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh) +GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-17-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 57 +++++++++++ target/riscv/insn32.decode | 20 ++++ target/riscv/insn_trans/trans_rvv.inc.c | 46 +++++++++ target/riscv/vector_helper.c | 123 ++++++++++++++++++++++++ 4 files changed, 246 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vnsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vnsra_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vnsra_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vnsra_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmseq_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmseq_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmseq_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmseq_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsne_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsne_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsne_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsne_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsltu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsltu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsltu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsltu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmslt_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmslt_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmslt_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmslt_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsleu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsleu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsleu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsleu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsle_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsle_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsle_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsle_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmseq_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmseq_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmseq_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmseq_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsne_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsne_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsne_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsne_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsltu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsltu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsltu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsltu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmslt_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmslt_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmslt_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmslt_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsleu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsleu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsleu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsleu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsle_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsle_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsle_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsle_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgtu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgtu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgtu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgtu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgt_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgt_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgt_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgt_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm +vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm +vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm +vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm +vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm +vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm +vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm +vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm +vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm +vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm +vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm +vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm +vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm +vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm +vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm +vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm +vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm +vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm +vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm +vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm +vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ GEN_OPIVI_NARROW_TRANS(vnsra_vi, 1, vnsra_vx) GEN_OPIVI_NARROW_TRANS(vnsrl_vi, 1, vnsrl_vx) + +/* Vector Integer Comparison Instructions */ +/* + * For all comparison instructions, an illegal instruction exception is raised + * if the destination vector register overlaps a source vector register group + * and LMUL > 1. + */ +static bool opivv_cmp_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) && + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) || + (s->lmul == 0))); +} +GEN_OPIVV_TRANS(vmseq_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmsne_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmsltu_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmslt_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmsleu_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmsle_vv, opivv_cmp_check) + +static bool opivx_cmp_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) || + (s->lmul == 0))); +} + +GEN_OPIVX_TRANS(vmseq_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsne_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsltu_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmslt_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsleu_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsle_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsgtu_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsgt_vx, opivx_cmp_check) + +GEN_OPIVI_TRANS(vmseq_vi, 0, vmseq_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsne_vi, 0, vmsne_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsleu_vi, 1, vmsleu_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsle_vi, 0, vmsle_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsgtu_vi, 1, vmsgtu_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsgt_vi, 0, vmsgt_vx, opivx_cmp_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl) GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb) GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh) GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl) + +/* Vector Integer Comparison Instructions */ +#define DO_MSEQ(N, M) (N == M) +#define DO_MSNE(N, M) (N != M) +#define DO_MSLT(N, M) (N < M) +#define DO_MSLE(N, M) (N <= M) +#define DO_MSGT(N, M) (N > M) + +#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) +GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) +GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) +GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) + +GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) +GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) +GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) +GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) + +GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) +GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) +GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) +GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) + +GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) +GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) +GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) +GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) + +GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) +GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) +GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) +GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) + +GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) +GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) +GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) +GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) + +#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + vext_set_elem_mask(vd, mlen, i, \ + DO_OP(s2, (ETYPE)(target_long)s1)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) +GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) +GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) +GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) + +GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) +GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) +GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) +GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) + +GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) +GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) +GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) +GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) + +GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) +GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) +GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) +GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) + +GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) +GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) +GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) +GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) + +GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) +GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) +GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) +GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) + +GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) + +GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-18-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 33 ++++++++++++ target/riscv/insn32.decode | 8 +++ target/riscv/insn_trans/trans_rvv.inc.c | 10 ++++ target/riscv/vector_helper.c | 71 +++++++++++++++++++++++++ 4 files changed, 122 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmsgt_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmsgt_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmsgt_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmsgt_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vminu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vminu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vminu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vminu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmin_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmaxu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmaxu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmaxu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmaxu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmax_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmax_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmax_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmax_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vminu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vminu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vminu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vminu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmin_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmin_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmin_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmin_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmaxu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmaxu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmaxu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmaxu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmax_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmax_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmax_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmax_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm +vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm +vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm +vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm +vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm +vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm +vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm +vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm +vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVI_TRANS(vmsleu_vi, 1, vmsleu_vx, opivx_cmp_check) GEN_OPIVI_TRANS(vmsle_vi, 0, vmsle_vx, opivx_cmp_check) GEN_OPIVI_TRANS(vmsgtu_vi, 1, vmsgtu_vx, opivx_cmp_check) GEN_OPIVI_TRANS(vmsgt_vi, 0, vmsgt_vx, opivx_cmp_check) + +/* Vector Integer Min/Max Instructions */ +GEN_OPIVV_GVEC_TRANS(vminu_vv, umin) +GEN_OPIVV_GVEC_TRANS(vmin_vv, smin) +GEN_OPIVV_GVEC_TRANS(vmaxu_vv, umax) +GEN_OPIVV_GVEC_TRANS(vmax_vv, smax) +GEN_OPIVX_TRANS(vminu_vx, opivx_check) +GEN_OPIVX_TRANS(vmin_vx, opivx_check) +GEN_OPIVX_TRANS(vmaxu_vx, opivx_check) +GEN_OPIVX_TRANS(vmax_vx, opivx_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t +#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t +#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t +#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t +#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t /* operation of two vector elements */ typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); @@ -XXX,XX +XXX,XX @@ GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) + +/* Vector Integer Min/Max Instructions */ +RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) +RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) +RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) +RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) +RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) +RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) +RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) +RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) +RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) +RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) +RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) +RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) +RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) +RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) +RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) +RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) +GEN_VEXT_VV(vminu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vminu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vminu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vminu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmin_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmin_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmin_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmin_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmaxu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmaxu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmaxu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmaxu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmax_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmax_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmax_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmax_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) +RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) +RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) +RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) +RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) +RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) +RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) +RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) +RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) +RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) +RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) +RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) +RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) +RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) +RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) +RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) +GEN_VEXT_VX(vminu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vminu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vminu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vminu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmin_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmin_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmin_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmin_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmaxu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmaxu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmaxu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmaxu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmax_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmax_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmax_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmax_vx_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-19-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 33 +++++ target/riscv/insn32.decode | 8 ++ target/riscv/insn_trans/trans_rvv.inc.c | 10 ++ target/riscv/vector_helper.c | 163 ++++++++++++++++++++++++ 4 files changed, 214 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmax_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmax_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmax_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmax_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm +vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm +vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm +vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm +vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm +vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm +vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm +vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm +vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vminu_vx, opivx_check) GEN_OPIVX_TRANS(vmin_vx, opivx_check) GEN_OPIVX_TRANS(vmaxu_vx, opivx_check) GEN_OPIVX_TRANS(vmax_vx, opivx_check) + +/* Vector Single-Width Integer Multiply Instructions */ +GEN_OPIVV_GVEC_TRANS(vmul_vv, mul) +GEN_OPIVV_TRANS(vmulh_vv, opivv_check) +GEN_OPIVV_TRANS(vmulhu_vv, opivv_check) +GEN_OPIVV_TRANS(vmulhsu_vv, opivv_check) +GEN_OPIVX_GVEC_TRANS(vmul_vx, muls) +GEN_OPIVX_TRANS(vmulh_vx, opivx_check) +GEN_OPIVX_TRANS(vmulhu_vx, opivx_check) +GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t +#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t +#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t +#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t +#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t /* operation of two vector elements */ typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vmax_vx_b, 1, 1, clearb) GEN_VEXT_VX(vmax_vx_h, 2, 2, clearh) GEN_VEXT_VX(vmax_vx_w, 4, 4, clearl) GEN_VEXT_VX(vmax_vx_d, 8, 8, clearq) + +/* Vector Single-Width Integer Multiply Instructions */ +#define DO_MUL(N, M) (N * M) +RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) +RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) +RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) +RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) +GEN_VEXT_VV(vmul_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmul_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmul_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmul_vv_d, 8, 8, clearq) + +static int8_t do_mulh_b(int8_t s2, int8_t s1) +{ + return (int16_t)s2 * (int16_t)s1 >> 8; +} + +static int16_t do_mulh_h(int16_t s2, int16_t s1) +{ + return (int32_t)s2 * (int32_t)s1 >> 16; +} + +static int32_t do_mulh_w(int32_t s2, int32_t s1) +{ + return (int64_t)s2 * (int64_t)s1 >> 32; +} + +static int64_t do_mulh_d(int64_t s2, int64_t s1) +{ + uint64_t hi_64, lo_64; + + muls64(&lo_64, &hi_64, s1, s2); + return hi_64; +} + +static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) +{ + return (uint16_t)s2 * (uint16_t)s1 >> 8; +} + +static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) +{ + return (uint32_t)s2 * (uint32_t)s1 >> 16; +} + +static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) +{ + return (uint64_t)s2 * (uint64_t)s1 >> 32; +} + +static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) +{ + uint64_t hi_64, lo_64; + + mulu64(&lo_64, &hi_64, s2, s1); + return hi_64; +} + +static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) +{ + return (int16_t)s2 * (uint16_t)s1 >> 8; +} + +static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) +{ + return (int32_t)s2 * (uint32_t)s1 >> 16; +} + +static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) +{ + return (int64_t)s2 * (uint64_t)s1 >> 32; +} + +/* + * Let A = signed operand, + * B = unsigned operand + * P = mulu64(A, B), unsigned product + * + * LET X = 2 ** 64 - A, 2's complement of A + * SP = signed product + * THEN + * IF A < 0 + * SP = -X * B + * = -(2 ** 64 - A) * B + * = A * B - 2 ** 64 * B + * = P - 2 ** 64 * B + * ELSE + * SP = P + * THEN + * HI_P -= (A < 0 ? B : 0) + */ + +static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) +{ + uint64_t hi_64, lo_64; + + mulu64(&lo_64, &hi_64, s2, s1); + + hi_64 -= s2 < 0 ? s1 : 0; + return hi_64; +} + +RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) +RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) +RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) +RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) +RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) +RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) +RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) +RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) +RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) +RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) +RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) +RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) +GEN_VEXT_VV(vmulh_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmulh_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmulh_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmulh_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmulhu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmulhu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmulhu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmulhu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmulhsu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmulhsu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmulhsu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmulhsu_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) +RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) +RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) +RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) +RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) +RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) +RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) +RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) +RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) +RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) +RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) +RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) +RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) +RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) +RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) +RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) +GEN_VEXT_VX(vmul_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmul_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmul_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmul_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmulh_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmulh_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmulh_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmulh_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmulhu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmulhu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmulhu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmulhu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmulhsu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmulhsu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmulhsu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmulhsu_vx_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-20-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 33 +++++++++++ target/riscv/insn32.decode | 8 +++ target/riscv/insn_trans/trans_rvv.inc.c | 10 ++++ target/riscv/vector_helper.c | 74 +++++++++++++++++++++++++ 4 files changed, 125 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmulhsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmulhsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmulhsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmulhsu_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vdivu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdivu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdivu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdivu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdiv_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdiv_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdiv_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdiv_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vremu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vremu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vremu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vremu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrem_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrem_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrem_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrem_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdivu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdivu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdivu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdivu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdiv_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdiv_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdiv_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdiv_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vremu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vremu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vremu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vremu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrem_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrem_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrem_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrem_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm +vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm +vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm +vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm +vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm +vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm +vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm +vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm +vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_GVEC_TRANS(vmul_vx, muls) GEN_OPIVX_TRANS(vmulh_vx, opivx_check) GEN_OPIVX_TRANS(vmulhu_vx, opivx_check) GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check) + +/* Vector Integer Divide Instructions */ +GEN_OPIVV_TRANS(vdivu_vv, opivv_check) +GEN_OPIVV_TRANS(vdiv_vv, opivv_check) +GEN_OPIVV_TRANS(vremu_vv, opivv_check) +GEN_OPIVV_TRANS(vrem_vv, opivv_check) +GEN_OPIVX_TRANS(vdivu_vx, opivx_check) +GEN_OPIVX_TRANS(vdiv_vx, opivx_check) +GEN_OPIVX_TRANS(vremu_vx, opivx_check) +GEN_OPIVX_TRANS(vrem_vx, opivx_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vmulhsu_vx_b, 1, 1, clearb) GEN_VEXT_VX(vmulhsu_vx_h, 2, 2, clearh) GEN_VEXT_VX(vmulhsu_vx_w, 4, 4, clearl) GEN_VEXT_VX(vmulhsu_vx_d, 8, 8, clearq) + +/* Vector Integer Divide Instructions */ +#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) +#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) +#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ + unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) +#define DO_REM(N, M) (unlikely(M == 0) ? N :\ + unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) + +RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) +RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) +RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) +RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) +RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) +RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) +RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) +RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) +RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) +RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) +RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) +RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) +RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) +RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) +RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) +RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) +GEN_VEXT_VV(vdivu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vdivu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vdivu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vdivu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vdiv_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vdiv_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vdiv_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vdiv_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vremu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vremu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vremu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vremu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vrem_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vrem_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vrem_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vrem_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) +RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) +RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) +RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) +RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) +RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) +RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) +RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) +RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) +RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) +RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) +RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) +RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) +RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) +RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) +RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) +GEN_VEXT_VX(vdivu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vdivu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vdivu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vdivu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vdiv_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vdiv_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vdiv_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vdiv_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vremu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vremu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vremu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vremu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vrem_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vrem_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vrem_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vrem_vx_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-21-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 19 +++++++++ target/riscv/insn32.decode | 6 +++ target/riscv/insn_trans/trans_rvv.inc.c | 8 ++++ target/riscv/vector_helper.c | 51 +++++++++++++++++++++++++ 4 files changed, 84 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vrem_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vrem_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vrem_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vrem_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vwmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm +vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm +vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm +vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm +vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm +vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm +vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vdivu_vx, opivx_check) GEN_OPIVX_TRANS(vdiv_vx, opivx_check) GEN_OPIVX_TRANS(vremu_vx, opivx_check) GEN_OPIVX_TRANS(vrem_vx, opivx_check) + +/* Vector Widening Integer Multiply Instructions */ +GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmul_vx) +GEN_OPIVX_WIDEN_TRANS(vwmulu_vx) +GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t +#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t +#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t +#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t +#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t +#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t +#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t +#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t +#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t +#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t +#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t +#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t +#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t /* operation of two vector elements */ typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vrem_vx_b, 1, 1, clearb) GEN_VEXT_VX(vrem_vx_h, 2, 2, clearh) GEN_VEXT_VX(vrem_vx_w, 4, 4, clearl) GEN_VEXT_VX(vrem_vx_d, 8, 8, clearq) + +/* Vector Widening Integer Multiply Instructions */ +RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) +RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) +RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) +RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) +RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) +RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) +RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) +RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) +RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) +GEN_VEXT_VV(vwmul_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmul_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmul_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwmulu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmulu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmulu_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwmulsu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmulsu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmulsu_vv_w, 4, 8, clearq) + +RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) +RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) +RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) +RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) +RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) +RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) +RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) +RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) +RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) +GEN_VEXT_VX(vwmul_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmul_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmul_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmulu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmulu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmulu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmulsu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmulsu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmulsu_vx_w, 4, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-22-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 33 ++++++++++ target/riscv/insn32.decode | 8 +++ target/riscv/insn_trans/trans_rvv.inc.c | 10 +++ target/riscv/vector_helper.c | 88 +++++++++++++++++++++++++ 4 files changed, 139 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwmulu_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwmulsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwmulsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwmulsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsac_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmacc_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsac_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsac_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsac_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsac_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm +vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm +vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm +vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm +vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm +vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm +vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm +vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm +vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check) GEN_OPIVX_WIDEN_TRANS(vwmul_vx) GEN_OPIVX_WIDEN_TRANS(vwmulu_vx) GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx) + +/* Vector Single-Width Integer Multiply-Add Instructions */ +GEN_OPIVV_TRANS(vmacc_vv, opivv_check) +GEN_OPIVV_TRANS(vnmsac_vv, opivv_check) +GEN_OPIVV_TRANS(vmadd_vv, opivv_check) +GEN_OPIVV_TRANS(vnmsub_vv, opivv_check) +GEN_OPIVX_TRANS(vmacc_vx, opivx_check) +GEN_OPIVX_TRANS(vnmsac_vx, opivx_check) +GEN_OPIVX_TRANS(vmadd_vx, opivx_check) +GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vwmulu_vx_w, 4, 8, clearq) GEN_VEXT_VX(vwmulsu_vx_b, 1, 2, clearh) GEN_VEXT_VX(vwmulsu_vx_h, 2, 4, clearl) GEN_VEXT_VX(vwmulsu_vx_w, 4, 8, clearq) + +/* Vector Single-Width Integer Multiply-Add Instructions */ +#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1, d); \ +} + +#define DO_MACC(N, M, D) (M * N + D) +#define DO_NMSAC(N, M, D) (-(M * N) + D) +#define DO_MADD(N, M, D) (M * D + N) +#define DO_NMSUB(N, M, D) (-(M * D) + N) +RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) +RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) +RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) +RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) +RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) +RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) +RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) +RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) +RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) +RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) +RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) +RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) +RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) +RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) +RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) +RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) +GEN_VEXT_VV(vmacc_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmacc_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmacc_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmacc_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vnmsac_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vnmsac_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vnmsac_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vnmsac_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmadd_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vnmsub_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vnmsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vnmsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vnmsub_vv_d, 8, 8, clearq) + +#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ +} + +RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) +RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) +RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) +RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) +RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) +RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) +RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) +RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) +RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) +RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) +RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) +RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) +RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) +RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) +RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) +RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) +GEN_VEXT_VX(vmacc_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmacc_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmacc_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmacc_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vnmsac_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vnmsac_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vnmsac_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vnmsac_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmadd_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vnmsub_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vnmsub_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vnmsub_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vnmsub_vx_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-23-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 22 ++++++++++++ target/riscv/insn32.decode | 7 ++++ target/riscv/insn_trans/trans_rvv.inc.c | 9 +++++ target/riscv/vector_helper.c | 45 +++++++++++++++++++++++++ 4 files changed, 83 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vnmsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vnmsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vnmsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vnmsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vwmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm +vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm +vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm +vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm +vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm +vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm +vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm +vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vmacc_vx, opivx_check) GEN_OPIVX_TRANS(vnmsac_vx, opivx_check) GEN_OPIVX_TRANS(vmadd_vx, opivx_check) GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) + +/* Vector Widening Integer Multiply-Add Instructions */ +GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx) +GEN_OPIVX_WIDEN_TRANS(vwmacc_vx) +GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx) +GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vnmsub_vx_b, 1, 1, clearb) GEN_VEXT_VX(vnmsub_vx_h, 2, 2, clearh) GEN_VEXT_VX(vnmsub_vx_w, 4, 4, clearl) GEN_VEXT_VX(vnmsub_vx_d, 8, 8, clearq) + +/* Vector Widening Integer Multiply-Add Instructions */ +RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) +RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) +RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) +RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) +RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) +RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) +RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) +RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) +RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) +GEN_VEXT_VV(vwmaccu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmaccu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmaccu_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwmacc_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmacc_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmacc_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8, clearq) + +RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) +RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) +RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) +RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) +RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) +RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) +RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) +RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) +RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) +RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) +RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) +RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) +GEN_VEXT_VX(vwmaccu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmaccu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmaccu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmacc_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmacc_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmacc_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-24-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 17 ++++ target/riscv/insn32.decode | 7 ++ target/riscv/insn_trans/trans_rvv.inc.c | 113 ++++++++++++++++++++++++ target/riscv/vector_helper.c | 88 ++++++++++++++++++ 4 files changed, 225 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmerge_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_4(vmv_v_v_b, void, ptr, ptr, env, i32) +DEF_HELPER_4(vmv_v_v_h, void, ptr, ptr, env, i32) +DEF_HELPER_4(vmv_v_v_w, void, ptr, ptr, env, i32) +DEF_HELPER_4(vmv_v_v_d, void, ptr, ptr, env, i32) +DEF_HELPER_4(vmv_v_x_b, void, ptr, i64, env, i32) +DEF_HELPER_4(vmv_v_x_h, void, ptr, i64, env, i32) +DEF_HELPER_4(vmv_v_x_w, void, ptr, i64, env, i32) +DEF_HELPER_4(vmv_v_x_d, void, ptr, i64, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd @r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd +@r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd @r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd @@ -XXX,XX +XXX,XX @@ vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm +vmv_v_v 010111 1 00000 ..... 000 ..... 1010111 @r2 +vmv_v_x 010111 1 00000 ..... 100 ..... 1010111 @r2 +vmv_v_i 010111 1 00000 ..... 011 ..... 1010111 @r2 +vmerge_vvm 010111 0 ..... ..... 000 ..... 1010111 @r_vm_0 +vmerge_vxm 010111 0 ..... ..... 100 ..... 1010111 @r_vm_0 +vmerge_vim 010111 0 ..... ..... 011 ..... 1010111 @r_vm_0 vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx) GEN_OPIVX_WIDEN_TRANS(vwmacc_vx) GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx) GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx) + +/* Vector Integer Merge and Move Instructions */ +static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) +{ + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs1, false)) { + + if (s->vl_eq_vlmax) { + tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rs1), + MAXSZ(s), MAXSZ(s)); + } else { + uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); + static gen_helper_gvec_2_ptr * const fns[4] = { + gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, + gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, + }; + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), + cpu_env, 0, s->vlen / 8, data, fns[s->sew]); + gen_set_label(over); + } + return true; + } + return false; +} + +typedef void gen_helper_vmv_vx(TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32); +static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) +{ + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false)) { + + TCGv s1; + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + s1 = tcg_temp_new(); + gen_get_gpr(s1, a->rs1); + + if (s->vl_eq_vlmax) { + tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), s1); + } else { + TCGv_i32 desc ; + TCGv_i64 s1_i64 = tcg_temp_new_i64(); + TCGv_ptr dest = tcg_temp_new_ptr(); + uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); + static gen_helper_vmv_vx * const fns[4] = { + gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, + gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, + }; + + tcg_gen_ext_tl_i64(s1_i64, s1); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd)); + fns[s->sew](dest, s1_i64, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_i32(desc); + tcg_temp_free_i64(s1_i64); + } + + tcg_temp_free(s1); + gen_set_label(over); + return true; + } + return false; +} + +static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) +{ + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false)) { + + int64_t simm = sextract64(a->rs1, 0, 5); + if (s->vl_eq_vlmax) { + tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), simm); + } else { + TCGv_i32 desc; + TCGv_i64 s1; + TCGv_ptr dest; + uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); + static gen_helper_vmv_vx * const fns[4] = { + gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, + gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, + }; + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + s1 = tcg_const_i64(simm); + dest = tcg_temp_new_ptr(); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd)); + fns[s->sew](dest, s1, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_i32(desc); + tcg_temp_free_i64(s1); + gen_set_label(over); + } + return true; + } + return false; +} + +GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check) +GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check) +GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vadc_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq) GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh) GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl) GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq) + +/* Vector Integer Merge and Move Instructions */ +#define GEN_VEXT_VMV_VV(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + *((ETYPE *)vd + H(i)) = s1; \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1, clearb) +GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2, clearh) +GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4, clearl) +GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8, clearq) + +#define GEN_VEXT_VMV_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1, clearb) +GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2, clearh) +GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4, clearl) +GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8, clearq) + +#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE *vt = (!vext_elem_mask(v0, mlen, i) ? vs2 : vs1); \ + *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1, clearb) +GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2, clearh) +GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4, clearl) +GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8, clearq) + +#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + ETYPE d = (!vext_elem_mask(v0, mlen, i) ? s2 : \ + (ETYPE)(target_long)s1); \ + *((ETYPE *)vd + H(i)) = d; \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1, clearb) +GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh) +GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl) +GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-25-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 33 ++ target/riscv/insn32.decode | 10 + target/riscv/insn_trans/trans_rvv.inc.c | 16 + target/riscv/vector_helper.c | 385 ++++++++++++++++++++++++ 4 files changed, 444 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vmv_v_x_b, void, ptr, i64, env, i32) DEF_HELPER_4(vmv_v_x_h, void, ptr, i64, env, i32) DEF_HELPER_4(vmv_v_x_w, void, ptr, i64, env, i32) DEF_HELPER_4(vmv_v_x_d, void, ptr, i64, env, i32) + +DEF_HELPER_6(vsaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsaddu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssubu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsaddu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssubu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssubu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssubu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssubu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssub_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmv_v_i 010111 1 00000 ..... 011 ..... 1010111 @r2 vmerge_vvm 010111 0 ..... ..... 000 ..... 1010111 @r_vm_0 vmerge_vxm 010111 0 ..... ..... 100 ..... 1010111 @r_vm_0 vmerge_vim 010111 0 ..... ..... 011 ..... 1010111 @r_vm_0 +vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm +vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm +vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm +vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm +vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm +vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm +vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm +vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm +vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm +vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check) GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check) GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vadc_check) + +/* + *** Vector Fixed-Point Arithmetic Instructions + */ + +/* Vector Single-Width Saturating Add and Subtract */ +GEN_OPIVV_TRANS(vsaddu_vv, opivv_check) +GEN_OPIVV_TRANS(vsadd_vv, opivv_check) +GEN_OPIVV_TRANS(vssubu_vv, opivv_check) +GEN_OPIVV_TRANS(vssub_vv, opivv_check) +GEN_OPIVX_TRANS(vsaddu_vx, opivx_check) +GEN_OPIVX_TRANS(vsadd_vx, opivx_check) +GEN_OPIVX_TRANS(vssubu_vx, opivx_check) +GEN_OPIVX_TRANS(vssub_vx, opivx_check) +GEN_OPIVI_TRANS(vsaddu_vi, 1, vsaddu_vx, opivx_check) +GEN_OPIVI_TRANS(vsadd_vi, 0, vsadd_vx, opivx_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1, clearb) GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh) GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl) GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq) + +/* + *** Vector Fixed-Point Arithmetic Instructions + */ + +/* Vector Single-Width Saturating Add and Subtract */ + +/* + * As fixed point instructions probably have round mode and saturation, + * define common macros for fixed point here. + */ +typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, + CPURISCVState *env, int vxrm); + +#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, void *vs1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ +} + +static inline void +vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, + uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm, + opivv2_rm_fn *fn) +{ + for (uint32_t i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, vs1, vs2, i, env, vxrm); + } +} + +static inline void +vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, + uint32_t desc, uint32_t esz, uint32_t dsz, + opivv2_rm_fn *fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + + switch (env->vxrm) { + case 0: /* rnu */ + vext_vv_rm_1(vd, v0, vs1, vs2, + env, vl, vm, mlen, 0, fn); + break; + case 1: /* rne */ + vext_vv_rm_1(vd, v0, vs1, vs2, + env, vl, vm, mlen, 1, fn); + break; + case 2: /* rdn */ + vext_vv_rm_1(vd, v0, vs1, vs2, + env, vl, vm, mlen, 2, fn); + break; + default: /* rod */ + vext_vv_rm_1(vd, v0, vs1, vs2, + env, vl, vm, mlen, 3, fn); + break; + } + + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate helpers for fixed point instructions with OPIVV format */ +#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) +{ + uint8_t res = a + b; + if (res < a) { + res = UINT8_MAX; + env->vxsat = 0x1; + } + return res; +} + +static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, + uint16_t b) +{ + uint16_t res = a + b; + if (res < a) { + res = UINT16_MAX; + env->vxsat = 0x1; + } + return res; +} + +static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, + uint32_t b) +{ + uint32_t res = a + b; + if (res < a) { + res = UINT32_MAX; + env->vxsat = 0x1; + } + return res; +} + +static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, + uint64_t b) +{ + uint64_t res = a + b; + if (res < a) { + res = UINT64_MAX; + env->vxsat = 0x1; + } + return res; +} + +RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) +RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) +RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) +RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) +GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8, clearq) + +typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, + CPURISCVState *env, int vxrm); + +#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, target_long s1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ +} + +static inline void +vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, + uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm, + opivx2_rm_fn *fn) +{ + for (uint32_t i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, s1, vs2, i, env, vxrm); + } +} + +static inline void +vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, + uint32_t desc, uint32_t esz, uint32_t dsz, + opivx2_rm_fn *fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + + switch (env->vxrm) { + case 0: /* rnu */ + vext_vx_rm_1(vd, v0, s1, vs2, + env, vl, vm, mlen, 0, fn); + break; + case 1: /* rne */ + vext_vx_rm_1(vd, v0, s1, vs2, + env, vl, vm, mlen, 1, fn); + break; + case 2: /* rdn */ + vext_vx_rm_1(vd, v0, s1, vs2, + env, vl, vm, mlen, 2, fn); + break; + default: /* rod */ + vext_vx_rm_1(vd, v0, s1, vs2, + env, vl, vm, mlen, 3, fn); + break; + } + + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate helpers for fixed point instructions with OPIVX format */ +#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) +RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) +RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) +RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) +GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8, clearq) + +static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) +{ + int8_t res = a + b; + if ((res ^ a) & (res ^ b) & INT8_MIN) { + res = a > 0 ? INT8_MAX : INT8_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +{ + int16_t res = a + b; + if ((res ^ a) & (res ^ b) & INT16_MIN) { + res = a > 0 ? INT16_MAX : INT16_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + int32_t res = a + b; + if ((res ^ a) & (res ^ b) & INT32_MIN) { + res = a > 0 ? INT32_MAX : INT32_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + int64_t res = a + b; + if ((res ^ a) & (res ^ b) & INT64_MIN) { + res = a > 0 ? INT64_MAX : INT64_MIN; + env->vxsat = 0x1; + } + return res; +} + +RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) +RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) +RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) +RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) +GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) +RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) +RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) +RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) +GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8, clearq) + +static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) +{ + uint8_t res = a - b; + if (res > a) { + res = 0; + env->vxsat = 0x1; + } + return res; +} + +static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, + uint16_t b) +{ + uint16_t res = a - b; + if (res > a) { + res = 0; + env->vxsat = 0x1; + } + return res; +} + +static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, + uint32_t b) +{ + uint32_t res = a - b; + if (res > a) { + res = 0; + env->vxsat = 0x1; + } + return res; +} + +static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, + uint64_t b) +{ + uint64_t res = a - b; + if (res > a) { + res = 0; + env->vxsat = 0x1; + } + return res; +} + +RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) +RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) +RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) +RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) +GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) +RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) +RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) +RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) +GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8, clearq) + +static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) +{ + int8_t res = a - b; + if ((res ^ a) & (a ^ b) & INT8_MIN) { + res = a > 0 ? INT8_MAX : INT8_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +{ + int16_t res = a - b; + if ((res ^ a) & (a ^ b) & INT16_MIN) { + res = a > 0 ? INT16_MAX : INT16_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + int32_t res = a - b; + if ((res ^ a) & (a ^ b) & INT32_MIN) { + res = a > 0 ? INT32_MAX : INT32_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + int64_t res = a - b; + if ((res ^ a) & (a ^ b) & INT64_MIN) { + res = a > 0 ? INT64_MAX : INT64_MIN; + env->vxsat = 0x1; + } + return res; +} + +RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) +RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) +RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) +RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) +GEN_VEXT_VV_RM(vssub_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vssub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vssub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vssub_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) +RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) +RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) +RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) +GEN_VEXT_VX_RM(vssub_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vssub_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vssub_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vssub_vx_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-26-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 17 ++++ target/riscv/insn32.decode | 5 ++ target/riscv/insn_trans/trans_rvv.inc.c | 7 ++ target/riscv/vector_helper.c | 100 ++++++++++++++++++++++++ 4 files changed, 129 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vssub_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vssub_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vssub_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vssub_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vaadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vaadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vaadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vaadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vasub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vasub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vasub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vasub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vaadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vaadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vaadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vaadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vasub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vasub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vasub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vasub_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm +vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm +vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm +vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm +vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm +vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vssubu_vx, opivx_check) GEN_OPIVX_TRANS(vssub_vx, opivx_check) GEN_OPIVI_TRANS(vsaddu_vi, 1, vsaddu_vx, opivx_check) GEN_OPIVI_TRANS(vsadd_vi, 0, vsadd_vx, opivx_check) + +/* Vector Single-Width Averaging Add and Subtract */ +GEN_OPIVV_TRANS(vaadd_vv, opivv_check) +GEN_OPIVV_TRANS(vasub_vv, opivv_check) +GEN_OPIVX_TRANS(vaadd_vx, opivx_check) +GEN_OPIVX_TRANS(vasub_vx, opivx_check) +GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX_RM(vssub_vx_b, 1, 1, clearb) GEN_VEXT_VX_RM(vssub_vx_h, 2, 2, clearh) GEN_VEXT_VX_RM(vssub_vx_w, 4, 4, clearl) GEN_VEXT_VX_RM(vssub_vx_d, 8, 8, clearq) + +/* Vector Single-Width Averaging Add and Subtract */ +static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) +{ + uint8_t d = extract64(v, shift, 1); + uint8_t d1; + uint64_t D1, D2; + + if (shift == 0 || shift > 64) { + return 0; + } + + d1 = extract64(v, shift - 1, 1); + D1 = extract64(v, 0, shift); + if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ + return d1; + } else if (vxrm == 1) { /* round-to-nearest-even */ + if (shift > 1) { + D2 = extract64(v, 0, shift - 1); + return d1 & ((D2 != 0) | d); + } else { + return d1 & d; + } + } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ + return !d & (D1 != 0); + } + return 0; /* round-down (truncate) */ +} + +static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + int64_t res = (int64_t)a + b; + uint8_t round = get_round(vxrm, res, 1); + + return (res >> 1) + round; +} + +static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + int64_t res = a + b; + uint8_t round = get_round(vxrm, res, 1); + int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; + + /* With signed overflow, bit 64 is inverse of bit 63. */ + return ((res >> 1) ^ over) + round; +} + +RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) +RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) +RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) +RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) +GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) +RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) +RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) +RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) +GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8, clearq) + +static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + int64_t res = (int64_t)a - b; + uint8_t round = get_round(vxrm, res, 1); + + return (res >> 1) + round; +} + +static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + int64_t res = (int64_t)a - b; + uint8_t round = get_round(vxrm, res, 1); + int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; + + /* With signed overflow, bit 64 is inverse of bit 63. */ + return ((res >> 1) ^ over) + round; +} + +RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) +RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) +RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) +RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) +GEN_VEXT_VV_RM(vasub_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vasub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vasub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vasub_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) +RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) +RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) +RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) +GEN_VEXT_VX_RM(vasub_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vasub_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vasub_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vasub_vx_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-27-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 9 ++ target/riscv/insn32.decode | 2 + target/riscv/insn_trans/trans_rvv.inc.c | 4 + target/riscv/vector_helper.c | 107 ++++++++++++++++++++++++ 4 files changed, 122 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vasub_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vasub_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vasub_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vasub_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vsmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm +vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm +vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVV_TRANS(vasub_vv, opivv_check) GEN_OPIVX_TRANS(vaadd_vx, opivx_check) GEN_OPIVX_TRANS(vasub_vx, opivx_check) GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check) + +/* Vector Single-Width Fractional Multiply with Rounding and Saturation */ +GEN_OPIVV_TRANS(vsmul_vv, opivv_check) +GEN_OPIVX_TRANS(vsmul_vx, opivx_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX_RM(vasub_vx_b, 1, 1, clearb) GEN_VEXT_VX_RM(vasub_vx_h, 2, 2, clearh) GEN_VEXT_VX_RM(vasub_vx_w, 4, 4, clearl) GEN_VEXT_VX_RM(vasub_vx_d, 8, 8, clearq) + +/* Vector Single-Width Fractional Multiply with Rounding and Saturation */ +static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) +{ + uint8_t round; + int16_t res; + + res = (int16_t)a * (int16_t)b; + round = get_round(vxrm, res, 7); + res = (res >> 7) + round; + + if (res > INT8_MAX) { + env->vxsat = 0x1; + return INT8_MAX; + } else if (res < INT8_MIN) { + env->vxsat = 0x1; + return INT8_MIN; + } else { + return res; + } +} + +static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +{ + uint8_t round; + int32_t res; + + res = (int32_t)a * (int32_t)b; + round = get_round(vxrm, res, 15); + res = (res >> 15) + round; + + if (res > INT16_MAX) { + env->vxsat = 0x1; + return INT16_MAX; + } else if (res < INT16_MIN) { + env->vxsat = 0x1; + return INT16_MIN; + } else { + return res; + } +} + +static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + uint8_t round; + int64_t res; + + res = (int64_t)a * (int64_t)b; + round = get_round(vxrm, res, 31); + res = (res >> 31) + round; + + if (res > INT32_MAX) { + env->vxsat = 0x1; + return INT32_MAX; + } else if (res < INT32_MIN) { + env->vxsat = 0x1; + return INT32_MIN; + } else { + return res; + } +} + +static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + uint8_t round; + uint64_t hi_64, lo_64; + int64_t res; + + if (a == INT64_MIN && b == INT64_MIN) { + env->vxsat = 1; + return INT64_MAX; + } + + muls64(&lo_64, &hi_64, a, b); + round = get_round(vxrm, lo_64, 63); + /* + * Cannot overflow, as there are always + * 2 sign bits after multiply. + */ + res = (hi_64 << 1) | (lo_64 >> 63); + if (round) { + if (res == INT64_MAX) { + env->vxsat = 1; + } else { + res += 1; + } + } + return res; +} + +RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) +RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) +RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) +RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) +GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) +RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) +RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) +RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) +GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-28-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 22 +++ target/riscv/insn32.decode | 7 + target/riscv/insn_trans/trans_rvv.inc.c | 9 ++ target/riscv/vector_helper.c | 205 ++++++++++++++++++++++++ 4 files changed, 243 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vsmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vsmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vsmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vsmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vwsmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check) /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ GEN_OPIVV_TRANS(vsmul_vv, opivv_check) GEN_OPIVX_TRANS(vsmul_vx, opivx_check) + +/* Vector Widening Saturating Scaled Multiply-Add */ +GEN_OPIVV_WIDEN_TRANS(vwsmaccu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsmacc_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsmaccsu_vv, opivv_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwsmaccu_vx) +GEN_OPIVX_WIDEN_TRANS(vwsmacc_vx) +GEN_OPIVX_WIDEN_TRANS(vwsmaccsu_vx) +GEN_OPIVX_WIDEN_TRANS(vwsmaccus_vx) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb) GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh) GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl) GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq) + +/* Vector Widening Saturating Scaled Multiply-Add */ +static inline uint16_t +vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, + uint16_t c) +{ + uint8_t round; + uint16_t res = (uint16_t)a * b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return saddu16(env, vxrm, c, res); +} + +static inline uint32_t +vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, + uint32_t c) +{ + uint8_t round; + uint32_t res = (uint32_t)a * b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return saddu32(env, vxrm, c, res); +} + +static inline uint64_t +vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, + uint64_t c) +{ + uint8_t round; + uint64_t res = (uint64_t)a * b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return saddu64(env, vxrm, c, res); +} + +#define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, void *vs1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ +} + +RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) +RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) +RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) +GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2, clearh) +GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4, clearl) +GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8, clearq) + +#define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, target_long s1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ +} + +RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) +RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) +RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) +GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8, clearq) + +static inline int16_t +vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) +{ + uint8_t round; + int16_t res = (int16_t)a * b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return sadd16(env, vxrm, c, res); +} + +static inline int32_t +vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) +{ + uint8_t round; + int32_t res = (int32_t)a * b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return sadd32(env, vxrm, c, res); + +} + +static inline int64_t +vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) +{ + uint8_t round; + int64_t res = (int64_t)a * b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return sadd64(env, vxrm, c, res); +} + +RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) +RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) +RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) +GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2, clearh) +GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4, clearl) +GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8, clearq) +RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) +RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) +RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) +GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8, clearq) + +static inline int16_t +vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) +{ + uint8_t round; + int16_t res = a * (int16_t)b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return ssub16(env, vxrm, c, res); +} + +static inline int32_t +vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) +{ + uint8_t round; + int32_t res = a * (int32_t)b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return ssub32(env, vxrm, c, res); +} + +static inline int64_t +vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) +{ + uint8_t round; + int64_t res = a * (int64_t)b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return ssub64(env, vxrm, c, res); +} + +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) +GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2, clearh) +GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4, clearl) +GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8, clearq) +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) +GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8, clearq) + +static inline int16_t +vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) +{ + uint8_t round; + int16_t res = (int16_t)a * b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return ssub16(env, vxrm, c, res); +} + +static inline int32_t +vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) +{ + uint8_t round; + int32_t res = (int32_t)a * b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return ssub32(env, vxrm, c, res); +} + +static inline int64_t +vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) +{ + uint8_t round; + int64_t res = (int64_t)a * b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return ssub64(env, vxrm, c, res); +} + +RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) +RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) +RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) +GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-29-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 17 ++++ target/riscv/insn32.decode | 6 ++ target/riscv/insn_trans/trans_rvv.inc.c | 8 ++ target/riscv/vector_helper.c | 117 ++++++++++++++++++++++++ 4 files changed, 148 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwsmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwsmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwsmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vwsmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vssrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssrl_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssra_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssra_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssra_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssra_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssrl_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssrl_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssrl_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssra_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssra_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssra_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssra_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm +vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm +vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm +vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm +vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm +vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm +vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_WIDEN_TRANS(vwsmaccu_vx) GEN_OPIVX_WIDEN_TRANS(vwsmacc_vx) GEN_OPIVX_WIDEN_TRANS(vwsmaccsu_vx) GEN_OPIVX_WIDEN_TRANS(vwsmaccus_vx) + +/* Vector Single-Width Scaling Shift Instructions */ +GEN_OPIVV_TRANS(vssrl_vv, opivv_check) +GEN_OPIVV_TRANS(vssra_vv, opivv_check) +GEN_OPIVX_TRANS(vssrl_vx, opivx_check) +GEN_OPIVX_TRANS(vssra_vx, opivx_check) +GEN_OPIVI_TRANS(vssrl_vi, 1, vssrl_vx, opivx_check) +GEN_OPIVI_TRANS(vssra_vi, 0, vssra_vx, opivx_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh) GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl) GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq) + +/* Vector Single-Width Scaling Shift Instructions */ +static inline uint8_t +vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) +{ + uint8_t round, shift = b & 0x7; + uint8_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline uint16_t +vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) +{ + uint8_t round, shift = b & 0xf; + uint16_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline uint32_t +vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) +{ + uint8_t round, shift = b & 0x1f; + uint32_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline uint64_t +vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) +{ + uint8_t round, shift = b & 0x3f; + uint64_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) +RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) +RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) +RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) +GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) +RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) +RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) +RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) +GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8, clearq) + +static inline int8_t +vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) +{ + uint8_t round, shift = b & 0x7; + int8_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline int16_t +vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +{ + uint8_t round, shift = b & 0xf; + int16_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline int32_t +vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + uint8_t round, shift = b & 0x1f; + int32_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline int64_t +vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + uint8_t round, shift = b & 0x3f; + int64_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) +RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) +RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) +RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) +GEN_VEXT_VV_RM(vssra_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vssra_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vssra_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vssra_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) +RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) +RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) +RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) +GEN_VEXT_VX_RM(vssra_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vssra_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vssra_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vssra_vx_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-30-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 13 +++ target/riscv/insn32.decode | 6 + target/riscv/insn_trans/trans_rvv.inc.c | 8 ++ target/riscv/vector_helper.c | 141 ++++++++++++++++++++++++ 4 files changed, 168 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vssra_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vssra_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vssra_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vssra_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vnclip_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclip_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclip_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclipu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclipu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclipu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclipu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclipu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclipu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclip_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclip_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclip_vx_w, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm +vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm +vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm +vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm +vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm +vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm +vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_TRANS(vssrl_vx, opivx_check) GEN_OPIVX_TRANS(vssra_vx, opivx_check) GEN_OPIVI_TRANS(vssrl_vi, 1, vssrl_vx, opivx_check) GEN_OPIVI_TRANS(vssra_vi, 0, vssra_vx, opivx_check) + +/* Vector Narrowing Fixed-Point Clip Instructions */ +GEN_OPIVV_NARROW_TRANS(vnclipu_vv) +GEN_OPIVV_NARROW_TRANS(vnclip_vv) +GEN_OPIVX_NARROW_TRANS(vnclipu_vx) +GEN_OPIVX_NARROW_TRANS(vnclip_vx) +GEN_OPIVI_NARROW_TRANS(vnclipu_vi, 1, vnclipu_vx) +GEN_OPIVI_NARROW_TRANS(vnclip_vi, 1, vnclip_vx) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t +#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t +#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t +#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t +#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t +#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t +#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t /* operation of two vector elements */ typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); @@ -XXX,XX +XXX,XX @@ vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) res = (a >> shift) + round; return res; } + RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VX_RM(vssra_vx_b, 1, 1, clearb) GEN_VEXT_VX_RM(vssra_vx_h, 2, 2, clearh) GEN_VEXT_VX_RM(vssra_vx_w, 4, 4, clearl) GEN_VEXT_VX_RM(vssra_vx_d, 8, 8, clearq) + +/* Vector Narrowing Fixed-Point Clip Instructions */ +static inline int8_t +vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) +{ + uint8_t round, shift = b & 0xf; + int16_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > INT8_MAX) { + env->vxsat = 0x1; + return INT8_MAX; + } else if (res < INT8_MIN) { + env->vxsat = 0x1; + return INT8_MIN; + } else { + return res; + } +} + +static inline int16_t +vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) +{ + uint8_t round, shift = b & 0x1f; + int32_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > INT16_MAX) { + env->vxsat = 0x1; + return INT16_MAX; + } else if (res < INT16_MIN) { + env->vxsat = 0x1; + return INT16_MIN; + } else { + return res; + } +} + +static inline int32_t +vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) +{ + uint8_t round, shift = b & 0x3f; + int64_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > INT32_MAX) { + env->vxsat = 0x1; + return INT32_MAX; + } else if (res < INT32_MIN) { + env->vxsat = 0x1; + return INT32_MIN; + } else { + return res; + } +} + +RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8) +RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16) +RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32) +GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4, clearl) + +RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8) +RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16) +RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32) +GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4, clearl) + +static inline uint8_t +vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) +{ + uint8_t round, shift = b & 0xf; + uint16_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > UINT8_MAX) { + env->vxsat = 0x1; + return UINT8_MAX; + } else { + return res; + } +} + +static inline uint16_t +vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) +{ + uint8_t round, shift = b & 0x1f; + uint32_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > UINT16_MAX) { + env->vxsat = 0x1; + return UINT16_MAX; + } else { + return res; + } +} + +static inline uint32_t +vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) +{ + uint8_t round, shift = b & 0x3f; + int64_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > UINT32_MAX) { + env->vxsat = 0x1; + return UINT32_MAX; + } else { + return res; + } +} + +RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) +RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) +RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) +GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4, clearl) + +RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8) +RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16) +RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) +GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4, clearl) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-31-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 16 ++++ target/riscv/insn32.decode | 5 + target/riscv/insn_trans/trans_rvv.inc.c | 118 ++++++++++++++++++++++++ target/riscv/vector_helper.c | 111 ++++++++++++++++++++++ 4 files changed, 250 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vnclipu_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vnclip_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vnclip_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vnclip_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vfadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm +vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm +vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm +vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm +vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm +vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVX_NARROW_TRANS(vnclipu_vx) GEN_OPIVX_NARROW_TRANS(vnclip_vx) GEN_OPIVI_NARROW_TRANS(vnclipu_vi, 1, vnclipu_vx) GEN_OPIVI_NARROW_TRANS(vnclip_vi, 1, vnclip_vx) + +/* + *** Vector Float Point Arithmetic Instructions + */ +/* Vector Single-Width Floating-Point Add/Subtract Instructions */ + +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised. + */ +static bool opfvv_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + (s->sew != 0)); +} + +/* OPFVV without GVEC IR */ +#define GEN_OPFVV_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + TCGLabel *over = gen_new_label(); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ +} +GEN_OPFVV_TRANS(vfadd_vv, opfvv_check) +GEN_OPFVV_TRANS(vfsub_vv, opfvv_check) + +typedef void gen_helper_opfvf(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr, + TCGv_env, TCGv_i32); + +static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, + uint32_t data, gen_helper_opfvf *fn, DisasContext *s) +{ + TCGv_ptr dest, src2, mask; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + src2 = tcg_temp_new_ptr(); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + fn(dest, mask, cpu_fpr[rs1], src2, cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_ptr(mask); + tcg_temp_free_ptr(src2); + tcg_temp_free_i32(desc); + gen_set_label(over); + return true; +} + +static bool opfvf_check(DisasContext *s, arg_rmrr *a) +{ +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised + */ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + (s->sew != 0)); +} + +/* OPFVF without GVEC IR */ +#define GEN_OPFVF_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_opfvf *const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + gen_set_rm(s, 7); \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ + fns[s->sew - 1], s); \ + } \ + return false; \ +} + +GEN_OPFVF_TRANS(vfadd_vf, opfvf_check) +GEN_OPFVF_TRANS(vfsub_vf, opfvf_check) +GEN_OPFVF_TRANS(vfrsub_vf, opfvf_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ #include "exec/memop.h" #include "exec/exec-all.h" #include "exec/helper-proto.h" +#include "fpu/softfloat.h" #include "tcg/tcg-gvec-desc.h" #include "internals.h" #include <math.h> @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1, clearb) GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2, clearh) GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4, clearl) + +/* + *** Vector Float Point Arithmetic Instructions + */ +/* Vector Single-Width Floating-Point Add/Subtract Instructions */ +#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ +} + +#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, vs1, vs2, i, env); \ + } \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ +} + +RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) +RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) +RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) +GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8, clearq) + +#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ +} + +#define GEN_VEXT_VF(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, s1, vs2, i, env); \ + } \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ +} + +RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) +RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) +RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) +GEN_VEXT_VF(vfadd_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfadd_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfadd_vf_d, 8, 8, clearq) + +RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) +RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) +RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) +GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) +RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) +RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) +GEN_VEXT_VF(vfsub_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfsub_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfsub_vf_d, 8, 8, clearq) + +static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) +{ + return float16_sub(b, a, s); +} + +static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) +{ + return float32_sub(b, a, s); +} + +static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) +{ + return float64_sub(b, a, s); +} + +RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) +RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) +RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) +GEN_VEXT_VF(vfrsub_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfrsub_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfrsub_vf_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-32-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 17 +++ target/riscv/insn32.decode | 8 ++ target/riscv/insn_trans/trans_rvv.inc.c | 149 ++++++++++++++++++++++++ target/riscv/vector_helper.c | 83 +++++++++++++ 4 files changed, 257 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfrsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfrsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfrsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfwadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwadd_wf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwadd_wf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_wf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_wf_w, void, ptr, ptr, i64, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm +vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm +vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm +vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm +vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm +vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm +vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm +vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm +vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ GEN_OPFVF_TRANS(vfadd_vf, opfvf_check) GEN_OPFVF_TRANS(vfsub_vf, opfvf_check) GEN_OPFVF_TRANS(vfrsub_vf, opfvf_check) + +/* Vector Widening Floating-Point Add/Subtract Instructions */ +static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +/* OPFVV with WIDEN */ +#define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFVV_WIDEN_TRANS(vfwadd_vv, opfvv_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check) + +static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +/* OPFVF with WIDEN */ +#define GEN_OPFVF_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opfvf_widen_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_opfvf *const fns[2] = { \ + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ + }; \ + gen_set_rm(s, 7); \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ + fns[s->sew - 1], s); \ + } \ + return false; \ +} + +GEN_OPFVF_WIDEN_TRANS(vfwadd_vf) +GEN_OPFVF_WIDEN_TRANS(vfwsub_vf) + +static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, true) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +/* WIDEN OPFVV with WIDEN */ +#define GEN_OPFWV_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opfwv_widen_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFWV_WIDEN_TRANS(vfwadd_wv) +GEN_OPFWV_WIDEN_TRANS(vfwsub_wv) + +static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, true) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +/* WIDEN OPFVF with WIDEN */ +#define GEN_OPFWF_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opfwf_widen_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_opfvf *const fns[2] = { \ + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ + }; \ + gen_set_rm(s, 7); \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ + fns[s->sew - 1], s); \ + } \ + return false; \ +} + +GEN_OPFWF_WIDEN_TRANS(vfwadd_wf) +GEN_OPFWF_WIDEN_TRANS(vfwsub_wf) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) GEN_VEXT_VF(vfrsub_vf_h, 2, 2, clearh) GEN_VEXT_VF(vfrsub_vf_w, 4, 4, clearl) GEN_VEXT_VF(vfrsub_vf_d, 8, 8, clearq) + +/* Vector Widening Floating-Point Add/Subtract Instructions */ +static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) +{ + return float32_add(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), s); +} + +static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) +{ + return float64_add(float32_to_float64(a, s), + float32_to_float64(b, s), s); + +} + +RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) +RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) +GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) +RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) +GEN_VEXT_VF(vfwadd_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwadd_vf_w, 4, 8, clearq) + +static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) +{ + return float32_sub(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), s); +} + +static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) +{ + return float64_sub(float32_to_float64(a, s), + float32_to_float64(b, s), s); + +} + +RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) +RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) +GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) +RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) +GEN_VEXT_VF(vfwsub_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwsub_vf_w, 4, 8, clearq) + +static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) +{ + return float32_add(a, float16_to_float32(b, true, s), s); +} + +static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) +{ + return float64_add(a, float32_to_float64(b, s), s); +} + +RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) +RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) +GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) +RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) +GEN_VEXT_VF(vfwadd_wf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwadd_wf_w, 4, 8, clearq) + +static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) +{ + return float32_sub(a, float16_to_float32(b, true, s), s); +} + +static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) +{ + return float64_sub(a, float32_to_float64(b, s), s); +} + +RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) +RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) +GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) +RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) +GEN_VEXT_VF(vfwsub_wf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwsub_wf_w, 4, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-33-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 16 ++++++++ target/riscv/insn32.decode | 5 +++ target/riscv/insn_trans/trans_rvv.inc.c | 7 ++++ target/riscv/vector_helper.c | 49 +++++++++++++++++++++++++ 4 files changed, 77 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfwadd_wf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwadd_wf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwsub_wf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwsub_wf_w, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfdiv_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfdiv_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfdiv_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmul_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmul_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmul_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm +vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm +vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm +vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm +vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm +vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ GEN_OPFWF_WIDEN_TRANS(vfwadd_wf) GEN_OPFWF_WIDEN_TRANS(vfwsub_wf) + +/* Vector Single-Width Floating-Point Multiply/Divide Instructions */ +GEN_OPFVV_TRANS(vfmul_vv, opfvv_check) +GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check) +GEN_OPFVF_TRANS(vfmul_vf, opfvf_check) +GEN_OPFVF_TRANS(vfdiv_vf, opfvf_check) +GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) GEN_VEXT_VF(vfwsub_wf_h, 2, 4, clearl) GEN_VEXT_VF(vfwsub_wf_w, 4, 8, clearq) + +/* Vector Single-Width Floating-Point Multiply/Divide Instructions */ +RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) +RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) +RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) +GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) +RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) +RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) +GEN_VEXT_VF(vfmul_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmul_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmul_vf_d, 8, 8, clearq) + +RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) +RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) +RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) +GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) +RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) +RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) +GEN_VEXT_VF(vfdiv_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfdiv_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfdiv_vf_d, 8, 8, clearq) + +static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) +{ + return float16_div(b, a, s); +} + +static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) +{ + return float32_div(b, a, s); +} + +static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) +{ + return float64_div(b, a, s); +} + +RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) +RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) +RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) +GEN_VEXT_VF(vfrdiv_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfrdiv_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfrdiv_vf_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-34-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 5 +++++ target/riscv/insn32.decode | 2 ++ target/riscv/insn_trans/trans_rvv.inc.c | 4 ++++ target/riscv/vector_helper.c | 22 ++++++++++++++++++++++ 4 files changed, 33 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfrdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfrdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfrdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmul_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmul_vf_w, void, ptr, ptr, i64, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm +vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm +vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check) GEN_OPFVF_TRANS(vfmul_vf, opfvf_check) GEN_OPFVF_TRANS(vfdiv_vf, opfvf_check) GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) + +/* Vector Widening Floating-Point Multiply */ +GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmul_vf) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) GEN_VEXT_VF(vfrdiv_vf_h, 2, 2, clearh) GEN_VEXT_VF(vfrdiv_vf_w, 4, 4, clearl) GEN_VEXT_VF(vfrdiv_vf_d, 8, 8, clearq) + +/* Vector Widening Floating-Point Multiply */ +static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) +{ + return float32_mul(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), s); +} + +static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) +{ + return float64_mul(float32_to_float64(a, s), + float32_to_float64(b, s), s); + +} +RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) +RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) +GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) +RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) +GEN_VEXT_VF(vfwmul_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwmul_vf_w, 4, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-35-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 49 +++++ target/riscv/insn32.decode | 16 ++ target/riscv/insn_trans/trans_rvv.inc.c | 18 ++ target/riscv/vector_helper.c | 251 ++++++++++++++++++++++++ 4 files changed, 334 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwmul_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwmul_vf_w, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm +vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm +vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm +vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm +vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm +vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm +vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm +vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm +vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm +vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm +vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm +vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm +vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm +vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm +vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm +vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm +vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) /* Vector Widening Floating-Point Multiply */ GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check) GEN_OPFVF_WIDEN_TRANS(vfwmul_vf) + +/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ +GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check) +GEN_OPFVV_TRANS(vfnmacc_vv, opfvv_check) +GEN_OPFVV_TRANS(vfmsac_vv, opfvv_check) +GEN_OPFVV_TRANS(vfnmsac_vv, opfvv_check) +GEN_OPFVV_TRANS(vfmadd_vv, opfvv_check) +GEN_OPFVV_TRANS(vfnmadd_vv, opfvv_check) +GEN_OPFVV_TRANS(vfmsub_vv, opfvv_check) +GEN_OPFVV_TRANS(vfnmsub_vv, opfvv_check) +GEN_OPFVF_TRANS(vfmacc_vf, opfvf_check) +GEN_OPFVF_TRANS(vfnmacc_vf, opfvf_check) +GEN_OPFVF_TRANS(vfmsac_vf, opfvf_check) +GEN_OPFVF_TRANS(vfnmsac_vf, opfvf_check) +GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check) +GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check) +GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check) +GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) GEN_VEXT_VF(vfwmul_vf_h, 2, 4, clearl) GEN_VEXT_VF(vfwmul_vf_w, 4, 8, clearq) + +/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ +#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ +} + +static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(a, b, d, 0, s); +} + +static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(a, b, d, 0, s); +} + +static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(a, b, d, 0, s); +} + +RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) +RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) +RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) +GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8, clearq) + +#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ +} + +RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) +RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) +RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) +GEN_VEXT_VF(vfmacc_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmacc_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmacc_vf_d, 8, 8, clearq) + +static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(a, b, d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(a, b, d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(a, b, d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) +RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) +RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) +GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) +RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) +RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) +GEN_VEXT_VF(vfnmacc_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfnmacc_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfnmacc_vf_d, 8, 8, clearq) + +static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(a, b, d, float_muladd_negate_c, s); +} + +static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(a, b, d, float_muladd_negate_c, s); +} + +static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(a, b, d, float_muladd_negate_c, s); +} + +RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) +RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) +RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) +GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) +RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) +RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) +GEN_VEXT_VF(vfmsac_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmsac_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmsac_vf_d, 8, 8, clearq) + +static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(a, b, d, float_muladd_negate_product, s); +} + +static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(a, b, d, float_muladd_negate_product, s); +} + +static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(a, b, d, float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) +RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) +RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) +GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) +RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) +RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) +GEN_VEXT_VF(vfnmsac_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfnmsac_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfnmsac_vf_d, 8, 8, clearq) + +static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(d, b, a, 0, s); +} + +static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(d, b, a, 0, s); +} + +static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(d, b, a, 0, s); +} + +RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) +RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) +RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) +GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) +RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) +RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) +GEN_VEXT_VF(vfmadd_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmadd_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmadd_vf_d, 8, 8, clearq) + +static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(d, b, a, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(d, b, a, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(d, b, a, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) +RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) +RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) +GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) +RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) +RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) +GEN_VEXT_VF(vfnmadd_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfnmadd_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfnmadd_vf_d, 8, 8, clearq) + +static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(d, b, a, float_muladd_negate_c, s); +} + +static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(d, b, a, float_muladd_negate_c, s); +} + +static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(d, b, a, float_muladd_negate_c, s); +} + +RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) +RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) +RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) +GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) +RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) +RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) +GEN_VEXT_VF(vfmsub_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmsub_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmsub_vf_d, 8, 8, clearq) + +static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(d, b, a, float_muladd_negate_product, s); +} + +static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(d, b, a, float_muladd_negate_product, s); +} + +static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(d, b, a, float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) +RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) +RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) +GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) +RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) +RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) +GEN_VEXT_VF(vfnmsub_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfnmsub_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfnmsub_vf_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-36-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 17 +++++ target/riscv/insn32.decode | 8 +++ target/riscv/insn_trans/trans_rvv.inc.c | 10 +++ target/riscv/vector_helper.c | 91 +++++++++++++++++++++++++ 4 files changed, 126 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm +vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm +vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm +vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm +vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm +vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm +vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm +vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm +vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check) GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check) GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check) GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check) + +/* Vector Widening Floating-Point Fused Multiply-Add Instructions */ +GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf) +GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf) +GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf) +GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) GEN_VEXT_VF(vfnmsub_vf_h, 2, 2, clearh) GEN_VEXT_VF(vfnmsub_vf_w, 4, 4, clearl) GEN_VEXT_VF(vfnmsub_vf_d, 8, 8, clearq) + +/* Vector Widening Floating-Point Fused Multiply-Add Instructions */ +static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +{ + return float32_muladd(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), d, 0, s); +} + +static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +{ + return float64_muladd(float32_to_float64(a, s), + float32_to_float64(b, s), d, 0, s); +} + +RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) +RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) +GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8, clearq) +RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) +RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) +GEN_VEXT_VF(vfwmacc_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwmacc_vf_w, 4, 8, clearq) + +static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +{ + return float32_muladd(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +{ + return float64_muladd(float32_to_float64(a, s), + float32_to_float64(b, s), d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) +RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) +GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8, clearq) +RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) +RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) +GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8, clearq) + +static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +{ + return float32_muladd(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), d, + float_muladd_negate_c, s); +} + +static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +{ + return float64_muladd(float32_to_float64(a, s), + float32_to_float64(b, s), d, + float_muladd_negate_c, s); +} + +RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) +RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) +GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8, clearq) +RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) +RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) +GEN_VEXT_VF(vfwmsac_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwmsac_vf_w, 4, 8, clearq) + +static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +{ + return float32_muladd(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), d, + float_muladd_negate_product, s); +} + +static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +{ + return float64_muladd(float32_to_float64(a, s), + float32_to_float64(b, s), d, + float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) +RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) +GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8, clearq) +RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) +RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) +GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-37-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 4 +++ target/riscv/insn32.decode | 3 ++ target/riscv/insn_trans/trans_rvv.inc.c | 43 +++++++++++++++++++++++++ target/riscv/vector_helper.c | 43 +++++++++++++++++++++++++ 4 files changed, 93 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_5(vfsqrt_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfsqrt_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfsqrt_v_d, void, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ &shift shamt rs1 rd &atomic aq rl rs2 rs1 rd &rmrr vm rd rs1 rs2 +&rmr vm rd rs2 &rwdvm vm wd rd rs1 rs2 &r2nfvm vm rd rs1 nf &rnfvm vm rd rs1 rs2 nf @@ -XXX,XX +XXX,XX @@ @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd @r2 ....... ..... ..... ... ..... ....... %rs1 %rd @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd +@r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd @r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd @@ -XXX,XX +XXX,XX @@ vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm +vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf) GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf) GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf) GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf) + +/* Vector Floating-Point Square-Root Instruction */ + +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised + */ +static bool opfv_check(DisasContext *s, arg_rmr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + (s->sew != 0)); +} + +#define GEN_OPFV_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + TCGLabel *over = gen_new_label(); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFV_TRANS(vfsqrt_v, opfv_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4, clearl) GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8, clearq) + +/* Vector Floating-Point Square-Root Instruction */ +/* (TD, T2, TX2) */ +#define OP_UU_H uint16_t, uint16_t, uint16_t +#define OP_UU_W uint32_t, uint32_t, uint32_t +#define OP_UU_D uint64_t, uint64_t, uint64_t + +#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ +} + +#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + if (vl == 0) { \ + return; \ + } \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, vs2, i, env); \ + } \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ +} + +RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) +RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) +RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) +GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-38-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 13 ++++++++++++ target/riscv/insn32.decode | 4 ++++ target/riscv/insn_trans/trans_rvv.inc.c | 6 ++++++ target/riscv/vector_helper.c | 27 +++++++++++++++++++++++++ 4 files changed, 50 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_5(vfsqrt_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfsqrt_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfsqrt_v_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vfmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmax_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmax_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmax_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmin_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmin_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmin_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmax_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmax_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmax_vf_d, void, ptr, ptr, i64, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm +vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm +vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm +vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm +vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ } GEN_OPFV_TRANS(vfsqrt_v, opfv_check) + +/* Vector Floating-Point MIN/MAX Instructions */ +GEN_OPFVV_TRANS(vfmin_vv, opfvv_check) +GEN_OPFVV_TRANS(vfmax_vv, opfvv_check) +GEN_OPFVF_TRANS(vfmin_vf, opfvf_check) +GEN_OPFVF_TRANS(vfmax_vf, opfvf_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2, clearh) GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4, clearl) GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8, clearq) + +/* Vector Floating-Point MIN/MAX Instructions */ +RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum) +RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum) +RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum) +GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum) +RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum) +RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum) +GEN_VEXT_VF(vfmin_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmin_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmin_vf_d, 8, 8, clearq) + +RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum) +RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum) +RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum) +GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum) +RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum) +RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) +GEN_VEXT_VF(vfmax_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmax_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmax_vf_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-39-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 19 ++++++ target/riscv/insn32.decode | 6 ++ target/riscv/insn_trans/trans_rvv.inc.c | 8 +++ target/riscv/vector_helper.c | 85 +++++++++++++++++++++++++ 4 files changed, 118 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfmin_vf_d, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmax_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmax_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmax_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfsgnj_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vf_d, void, ptr, ptr, i64, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm +vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm +vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm +vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm +vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm +vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm +vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPFVV_TRANS(vfmin_vv, opfvv_check) GEN_OPFVV_TRANS(vfmax_vv, opfvv_check) GEN_OPFVF_TRANS(vfmin_vf, opfvf_check) GEN_OPFVF_TRANS(vfmax_vf, opfvf_check) + +/* Vector Floating-Point Sign-Injection Instructions */ +GEN_OPFVV_TRANS(vfsgnj_vv, opfvv_check) +GEN_OPFVV_TRANS(vfsgnjn_vv, opfvv_check) +GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check) +GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check) +GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check) +GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) GEN_VEXT_VF(vfmax_vf_h, 2, 2, clearh) GEN_VEXT_VF(vfmax_vf_w, 4, 4, clearl) GEN_VEXT_VF(vfmax_vf_d, 8, 8, clearq) + +/* Vector Floating-Point Sign-Injection Instructions */ +static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) +{ + return deposit64(b, 0, 15, a); +} + +static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) +{ + return deposit64(b, 0, 31, a); +} + +static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) +{ + return deposit64(b, 0, 63, a); +} + +RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) +RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) +RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) +GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) +RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) +RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) +GEN_VEXT_VF(vfsgnj_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfsgnj_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfsgnj_vf_d, 8, 8, clearq) + +static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) +{ + return deposit64(~b, 0, 15, a); +} + +static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) +{ + return deposit64(~b, 0, 31, a); +} + +static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) +{ + return deposit64(~b, 0, 63, a); +} + +RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) +RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) +RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) +GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) +RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) +RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) +GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8, clearq) + +static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) +{ + return deposit64(b ^ a, 0, 15, a); +} + +static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) +{ + return deposit64(b ^ a, 0, 31, a); +} + +static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) +{ + return deposit64(b ^ a, 0, 63, a); +} + +RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) +RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) +RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) +GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) +RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) +RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) +GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-40-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 37 +++++ target/riscv/insn32.decode | 12 ++ target/riscv/insn_trans/trans_rvv.inc.c | 35 +++++ target/riscv/vector_helper.c | 174 ++++++++++++++++++++++++ 4 files changed, 258 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfsgnjn_vf_d, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfsgnjx_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfsgnjx_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfsgnjx_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vmfeq_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfeq_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfeq_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfne_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfne_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfne_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmflt_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmflt_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmflt_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfle_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfle_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfle_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfeq_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfeq_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfeq_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfne_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfne_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfne_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmflt_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmflt_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmflt_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfle_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfle_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfle_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfgt_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfgt_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfgt_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfge_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfge_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfge_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmford_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmford_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmford_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmford_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmford_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm +vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm +vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm +vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm +vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm +vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm +vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm +vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm +vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm +vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm +vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm +vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm +vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check) GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check) GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check) GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check) + +/* Vector Floating-Point Compare Instructions */ +static bool opfvv_cmp_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + (s->sew != 0) && + ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) && + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) || + (s->lmul == 0))); +} + +GEN_OPFVV_TRANS(vmfeq_vv, opfvv_cmp_check) +GEN_OPFVV_TRANS(vmfne_vv, opfvv_cmp_check) +GEN_OPFVV_TRANS(vmflt_vv, opfvv_cmp_check) +GEN_OPFVV_TRANS(vmfle_vv, opfvv_cmp_check) +GEN_OPFVV_TRANS(vmford_vv, opfvv_cmp_check) + +static bool opfvf_cmp_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + (s->sew != 0) && + (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) || + (s->lmul == 0))); +} + +GEN_OPFVF_TRANS(vmfeq_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmfne_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmflt_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2, clearh) GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4, clearl) GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8, clearq) + +/* Vector Floating-Point Compare Instructions */ +#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + vext_set_elem_mask(vd, mlen, i, \ + DO_OP(s2, s1, &env->fp_status)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +static bool float16_eq_quiet(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare_quiet(a, b, s); + return compare == float_relation_equal; +} + +GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) +GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) +GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) + +#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + vext_set_elem_mask(vd, mlen, i, \ + DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) +GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) +GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) + +static bool vmfne16(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare_quiet(a, b, s); + return compare != float_relation_equal; +} + +static bool vmfne32(uint32_t a, uint32_t b, float_status *s) +{ + FloatRelation compare = float32_compare_quiet(a, b, s); + return compare != float_relation_equal; +} + +static bool vmfne64(uint64_t a, uint64_t b, float_status *s) +{ + FloatRelation compare = float64_compare_quiet(a, b, s); + return compare != float_relation_equal; +} + +GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) +GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) +GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) +GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) +GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) +GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) + +static bool float16_lt(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare(a, b, s); + return compare == float_relation_less; +} + +GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) +GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) +GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) +GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) +GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) +GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) + +static bool float16_le(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare(a, b, s); + return compare == float_relation_less || + compare == float_relation_equal; +} + +GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) +GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) +GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) +GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) +GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) +GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) + +static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare(a, b, s); + return compare == float_relation_greater; +} + +static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) +{ + FloatRelation compare = float32_compare(a, b, s); + return compare == float_relation_greater; +} + +static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) +{ + FloatRelation compare = float64_compare(a, b, s); + return compare == float_relation_greater; +} + +GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) +GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) +GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) + +static bool vmfge16(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare(a, b, s); + return compare == float_relation_greater || + compare == float_relation_equal; +} + +static bool vmfge32(uint32_t a, uint32_t b, float_status *s) +{ + FloatRelation compare = float32_compare(a, b, s); + return compare == float_relation_greater || + compare == float_relation_equal; +} + +static bool vmfge64(uint64_t a, uint64_t b, float_status *s) +{ + FloatRelation compare = float64_compare(a, b, s); + return compare == float_relation_greater || + compare == float_relation_equal; +} + +GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) +GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) +GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) + +static bool float16_unordered_quiet(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare_quiet(a, b, s); + return compare == float_relation_unordered; +} + +GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) +GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) +GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) +GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) +GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) +GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-41-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 4 ++ target/riscv/internals.h | 5 ++ target/riscv/insn32.decode | 1 + target/riscv/fpu_helper.c | 33 +-------- target/riscv/insn_trans/trans_rvv.inc.c | 3 + target/riscv/vector_helper.c | 91 +++++++++++++++++++++++++ 6 files changed, 107 insertions(+), 30 deletions(-) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmford_vv_d, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmford_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmford_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_5(vfclass_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfclass_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/internals.h b/target/riscv/internals.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -XXX,XX +XXX,XX @@ FIELD(VDATA, VM, 8, 1) FIELD(VDATA, LMUL, 9, 2) FIELD(VDATA, NF, 11, 4) FIELD(VDATA, WD, 11, 1) + +/* float point classify helpers */ +target_ulong fclass_h(uint64_t frs1); +target_ulong fclass_s(uint64_t frs1); +target_ulong fclass_d(uint64_t frs1); #endif diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm +vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/fpu_helper.c +++ b/target/riscv/fpu_helper.c @@ -XXX,XX +XXX,XX @@ #include "exec/exec-all.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" +#include "internals.h" target_ulong riscv_cpu_get_fflags(CPURISCVState *env) { @@ -XXX,XX +XXX,XX @@ uint64_t helper_fcvt_s_lu(CPURISCVState *env, uint64_t rs1) target_ulong helper_fclass_s(uint64_t frs1) { - float32 f = frs1; - bool sign = float32_is_neg(f); - - if (float32_is_infinity(f)) { - return sign ? 1 << 0 : 1 << 7; - } else if (float32_is_zero(f)) { - return sign ? 1 << 3 : 1 << 4; - } else if (float32_is_zero_or_denormal(f)) { - return sign ? 1 << 2 : 1 << 5; - } else if (float32_is_any_nan(f)) { - float_status s = { }; /* for snan_bit_is_one */ - return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; - } else { - return sign ? 1 << 1 : 1 << 6; - } + return fclass_s(frs1); } uint64_t helper_fadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2) @@ -XXX,XX +XXX,XX @@ uint64_t helper_fcvt_d_lu(CPURISCVState *env, uint64_t rs1) target_ulong helper_fclass_d(uint64_t frs1) { - float64 f = frs1; - bool sign = float64_is_neg(f); - - if (float64_is_infinity(f)) { - return sign ? 1 << 0 : 1 << 7; - } else if (float64_is_zero(f)) { - return sign ? 1 << 3 : 1 << 4; - } else if (float64_is_zero_or_denormal(f)) { - return sign ? 1 << 2 : 1 << 5; - } else if (float64_is_any_nan(f)) { - float_status s = { }; /* for snan_bit_is_one */ - return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; - } else { - return sign ? 1 << 1 : 1 << 6; - } + return fclass_d(frs1); } diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check) GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check) GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check) GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check) + +/* Vector Floating-Point Classify Instruction */ +GEN_OPFV_TRANS(vfclass_v, opfv_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) + +/* Vector Floating-Point Classify Instruction */ +#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2); \ +} + +#define GEN_VEXT_V(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, vs2, i); \ + } \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ +} + +target_ulong fclass_h(uint64_t frs1) +{ + float16 f = frs1; + bool sign = float16_is_neg(f); + + if (float16_is_infinity(f)) { + return sign ? 1 << 0 : 1 << 7; + } else if (float16_is_zero(f)) { + return sign ? 1 << 3 : 1 << 4; + } else if (float16_is_zero_or_denormal(f)) { + return sign ? 1 << 2 : 1 << 5; + } else if (float16_is_any_nan(f)) { + float_status s = { }; /* for snan_bit_is_one */ + return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; + } else { + return sign ? 1 << 1 : 1 << 6; + } +} + +target_ulong fclass_s(uint64_t frs1) +{ + float32 f = frs1; + bool sign = float32_is_neg(f); + + if (float32_is_infinity(f)) { + return sign ? 1 << 0 : 1 << 7; + } else if (float32_is_zero(f)) { + return sign ? 1 << 3 : 1 << 4; + } else if (float32_is_zero_or_denormal(f)) { + return sign ? 1 << 2 : 1 << 5; + } else if (float32_is_any_nan(f)) { + float_status s = { }; /* for snan_bit_is_one */ + return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; + } else { + return sign ? 1 << 1 : 1 << 6; + } +} + +target_ulong fclass_d(uint64_t frs1) +{ + float64 f = frs1; + bool sign = float64_is_neg(f); + + if (float64_is_infinity(f)) { + return sign ? 1 << 0 : 1 << 7; + } else if (float64_is_zero(f)) { + return sign ? 1 << 3 : 1 << 4; + } else if (float64_is_zero_or_denormal(f)) { + return sign ? 1 << 2 : 1 << 5; + } else if (float64_is_any_nan(f)) { + float_status s = { }; /* for snan_bit_is_one */ + return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; + } else { + return sign ? 1 << 1 : 1 << 6; + } +} + +RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) +RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) +RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) +GEN_VEXT_V(vfclass_v_h, 2, 2, clearh) +GEN_VEXT_V(vfclass_v_w, 4, 4, clearl) +GEN_VEXT_V(vfclass_v_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-42-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 4 +++ target/riscv/insn32.decode | 2 ++ target/riscv/insn_trans/trans_rvv.inc.c | 38 +++++++++++++++++++++++++ target/riscv/vector_helper.c | 24 ++++++++++++++++ 4 files changed, 68 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_5(vfclass_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfclass_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vfmerge_vfm_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmerge_vfm_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmerge_vfm_d, void, ptr, ptr, i64, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm +vfmerge_vfm 010111 0 ..... ..... 101 ..... 1010111 @r_vm_0 +vfmv_v_f 010111 1 00000 ..... 101 ..... 1010111 @r2 vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check) /* Vector Floating-Point Classify Instruction */ GEN_OPFV_TRANS(vfclass_v, opfv_check) + +/* Vector Floating-Point Merge Instruction */ +GEN_OPFVF_TRANS(vfmerge_vfm, opfvf_check) + +static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) +{ + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + (s->sew != 0)) { + + if (s->vl_eq_vlmax) { + tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), cpu_fpr[a->rs1]); + } else { + TCGv_ptr dest; + TCGv_i32 desc; + uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); + static gen_helper_vmv_vx * const fns[3] = { + gen_helper_vmv_v_x_h, + gen_helper_vmv_v_x_w, + gen_helper_vmv_v_x_d, + }; + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd)); + fns[s->sew - 1](dest, cpu_fpr[a->rs1], cpu_env, desc); + + tcg_temp_free_ptr(dest); + tcg_temp_free_i32(desc); + gen_set_label(over); + } + return true; + } + return false; +} diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) GEN_VEXT_V(vfclass_v_h, 2, 2, clearh) GEN_VEXT_V(vfclass_v_w, 4, 4, clearl) GEN_VEXT_V(vfclass_v_d, 8, 8, clearq) + +/* Vector Floating-Point Merge Instruction */ +#define GEN_VFMERGE_VF(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + *((ETYPE *)vd + H(i)) \ + = (!vm && !vext_elem_mask(v0, mlen, i) ? s2 : s1); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh) +GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4, clearl) +GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-43-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 13 ++++++++++ target/riscv/insn32.decode | 4 +++ target/riscv/insn_trans/trans_rvv.inc.c | 6 +++++ target/riscv/vector_helper.c | 33 +++++++++++++++++++++++++ 4 files changed, 56 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfmerge_vfm_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmerge_vfm_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfmerge_vfm_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_5(vfcvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_xu_f_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_x_f_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_xu_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_x_v_d, void, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm vfmerge_vfm 010111 0 ..... ..... 101 ..... 1010111 @r_vm_0 vfmv_v_f 010111 1 00000 ..... 101 ..... 1010111 @r2 +vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm +vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm +vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm +vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) } return false; } + +/* Single-Width Floating-Point/Integer Type-Convert Instructions */ +GEN_OPFV_TRANS(vfcvt_xu_f_v, opfv_check) +GEN_OPFV_TRANS(vfcvt_x_f_v, opfv_check) +GEN_OPFV_TRANS(vfcvt_f_xu_v, opfv_check) +GEN_OPFV_TRANS(vfcvt_f_x_v, opfv_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh) GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4, clearl) GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8, clearq) + +/* Single-Width Floating-Point/Integer Type-Convert Instructions */ +/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ +RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) +RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) +RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8, clearq) + +/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ +RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) +RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) +RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) +GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8, clearq) + +/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ +RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) +RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) +RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8, clearq) + +/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ +RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) +RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) +RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) +GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-44-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 11 ++++++ target/riscv/insn32.decode | 5 +++ target/riscv/insn_trans/trans_rvv.inc.c | 48 +++++++++++++++++++++++++ target/riscv/vector_helper.c | 42 ++++++++++++++++++++++ 4 files changed, 106 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfcvt_f_xu_v_d, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfcvt_f_x_v_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_5(vfwcvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_f_v_w, void, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm +vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm +vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm +vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm +vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm +vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPFV_TRANS(vfcvt_xu_f_v, opfv_check) GEN_OPFV_TRANS(vfcvt_x_f_v, opfv_check) GEN_OPFV_TRANS(vfcvt_f_xu_v, opfv_check) GEN_OPFV_TRANS(vfcvt_f_x_v, opfv_check) + +/* Widening Floating-Point/Integer Type-Convert Instructions */ + +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised + */ +static bool opfv_widen_check(DisasContext *s, arg_rmr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +#define GEN_OPFV_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + if (opfv_widen_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFV_WIDEN_TRANS(vfwcvt_xu_f_v) +GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v) +GEN_OPFV_WIDEN_TRANS(vfwcvt_f_xu_v) +GEN_OPFV_WIDEN_TRANS(vfwcvt_f_x_v) +GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2, clearh) GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4, clearl) GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8, clearq) + +/* Widening Floating-Point/Integer Type-Convert Instructions */ +/* (TD, T2, TX2) */ +#define WOP_UU_H uint32_t, uint16_t, uint16_t +#define WOP_UU_W uint64_t, uint32_t, uint32_t +/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ +RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) +RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8, clearq) + +/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ +RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) +RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8, clearq) + +/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ +RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) +RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8, clearq) + +/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ +RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) +RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8, clearq) + +/* + * vfwcvt.f.f.v vd, vs2, vm # + * Convert single-width float to double-width float. + */ +static uint32_t vfwcvtffv16(uint16_t a, float_status *s) +{ + return float16_to_float32(a, true, s); +} + +RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) +RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-45-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 11 ++++++ target/riscv/insn32.decode | 5 +++ target/riscv/insn_trans/trans_rvv.inc.c | 48 +++++++++++++++++++++++++ target/riscv/vector_helper.c | 39 ++++++++++++++++++++ 4 files changed, 103 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfwcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfwcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfwcvt_f_f_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfwcvt_f_f_v_w, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_5(vfncvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_x_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_x_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_f_v_w, void, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm +vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm +vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm +vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm +vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm +vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v) GEN_OPFV_WIDEN_TRANS(vfwcvt_f_xu_v) GEN_OPFV_WIDEN_TRANS(vfwcvt_f_x_v) GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v) + +/* Narrowing Floating-Point/Integer Type-Convert Instructions */ + +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised + */ +static bool opfv_narrow_check(DisasContext *s, arg_rmr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, true) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, + 2 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +#define GEN_OPFV_NARROW_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + if (opfv_narrow_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFV_NARROW_TRANS(vfncvt_xu_f_v) +GEN_OPFV_NARROW_TRANS(vfncvt_x_f_v) +GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_v) +GEN_OPFV_NARROW_TRANS(vfncvt_f_x_v) +GEN_OPFV_NARROW_TRANS(vfncvt_f_f_v) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4, clearl) GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8, clearq) + +/* Narrowing Floating-Point/Integer Type-Convert Instructions */ +/* (TD, T2, TX2) */ +#define NOP_UU_H uint16_t, uint32_t, uint32_t +#define NOP_UU_W uint32_t, uint64_t, uint64_t +/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ +RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16) +RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32) +GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4, clearl) + +/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ +RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16) +RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32) +GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4, clearl) + +/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ +RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16) +RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32) +GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4, clearl) + +/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ +RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16) +RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32) +GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4, clearl) + +/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ +static uint16_t vfncvtffv16(uint32_t a, float_status *s) +{ + return float32_to_float16(a, true, s); +} + +RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) +RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) +GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4, clearl) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-46-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 33 +++++++++++ target/riscv/insn32.decode | 8 +++ target/riscv/insn_trans/trans_rvv.inc.c | 18 ++++++ target/riscv/vector_helper.c | 74 +++++++++++++++++++++++++ 4 files changed, 133 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(vfncvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfncvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfncvt_f_f_v_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vfncvt_f_f_v_w, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmaxu_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmaxu_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmaxu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmaxu_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmax_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredminu_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredminu_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredminu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredminu_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmin_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredand_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredand_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredand_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredand_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredor_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredor_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredor_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredor_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredxor_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredxor_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredxor_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredxor_vs_d, void, ptr, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm +vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm +vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm +vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm +vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm +vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm +vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm +vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm +vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPFV_NARROW_TRANS(vfncvt_x_f_v) GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_v) GEN_OPFV_NARROW_TRANS(vfncvt_f_x_v) GEN_OPFV_NARROW_TRANS(vfncvt_f_f_v) + +/* + *** Vector Reduction Operations + */ +/* Vector Single-Width Integer Reduction Instructions */ +static bool reduction_check(DisasContext *s, arg_rmrr *a) +{ + return vext_check_isa_ill(s) && vext_check_reg(s, a->rs2, false); +} + +GEN_OPIVV_TRANS(vredsum_vs, reduction_check) +GEN_OPIVV_TRANS(vredmaxu_vs, reduction_check) +GEN_OPIVV_TRANS(vredmax_vs, reduction_check) +GEN_OPIVV_TRANS(vredminu_vs, reduction_check) +GEN_OPIVV_TRANS(vredmin_vs, reduction_check) +GEN_OPIVV_TRANS(vredand_vs, reduction_check) +GEN_OPIVV_TRANS(vredor_vs, reduction_check) +GEN_OPIVV_TRANS(vredxor_vs, reduction_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2, clearh) GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4, clearl) + +/* + *** Vector Reduction Operations + */ +/* Vector Single-Width Integer Reduction Instructions */ +#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \ + TD s1 = *((TD *)vs1 + HD(0)); \ + \ + for (i = 0; i < vl; i++) { \ + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + s1 = OP(s1, (TD)s2); \ + } \ + *((TD *)vd + HD(0)) = s1; \ + CLEAR_FN(vd, 1, sizeof(TD), tot); \ +} + +/* vd[0] = sum(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD, clearb) +GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD, clearh) +GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD, clearl) +GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD, clearq) + +/* vd[0] = maxu(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX, clearb) +GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX, clearh) +GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX, clearl) +GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX, clearq) + +/* vd[0] = max(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX, clearb) +GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX, clearh) +GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX, clearl) +GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX, clearq) + +/* vd[0] = minu(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN, clearb) +GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN, clearh) +GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN, clearl) +GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN, clearq) + +/* vd[0] = min(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN, clearb) +GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN, clearh) +GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN, clearl) +GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN, clearq) + +/* vd[0] = and(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND, clearb) +GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND, clearh) +GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND, clearl) +GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND, clearq) + +/* vd[0] = or(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR, clearb) +GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR, clearh) +GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR, clearl) +GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR, clearq) + +/* vd[0] = xor(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR, clearb) +GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR, clearh) +GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR, clearl) +GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-47-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 7 +++++++ target/riscv/insn32.decode | 2 ++ target/riscv/insn_trans/trans_rvv.inc.c | 4 ++++ target/riscv/vector_helper.c | 11 +++++++++++ 4 files changed, 24 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vredxor_vs_b, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vredxor_vs_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vredxor_vs_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vredxor_vs_d, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vwredsumu_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsumu_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsumu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm +vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm +vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVV_TRANS(vredmin_vs, reduction_check) GEN_OPIVV_TRANS(vredand_vs, reduction_check) GEN_OPIVV_TRANS(vredor_vs, reduction_check) GEN_OPIVV_TRANS(vredxor_vs, reduction_check) + +/* Vector Widening Integer Reduction Instructions */ +GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_check) +GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR, clearb) GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR, clearh) GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR, clearl) GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR, clearq) + +/* Vector Widening Integer Reduction Instructions */ +/* signed sum reduction into double-width accumulator */ +GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD, clearh) +GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD, clearl) +GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD, clearq) + +/* Unsigned sum reduction into double-width accumulator */ +GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD, clearh) +GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD, clearl) +GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-48-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 10 +++++++ target/riscv/insn32.decode | 4 +++ target/riscv/insn_trans/trans_rvv.inc.c | 5 ++++ target/riscv/vector_helper.c | 39 +++++++++++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vwredsumu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vwredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vfredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm +# Vector ordered and unordered reduction sum +vfredsum_vs 0000-1 . ..... ..... 001 ..... 1010111 @r_vm +vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm +vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVV_TRANS(vredxor_vs, reduction_check) /* Vector Widening Integer Reduction Instructions */ GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_check) GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_check) + +/* Vector Single-Width Floating-Point Reduction Instructions */ +GEN_OPFVV_TRANS(vfredsum_vs, reduction_check) +GEN_OPFVV_TRANS(vfredmax_vs, reduction_check) +GEN_OPFVV_TRANS(vfredmin_vs, reduction_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD, clearq) GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD, clearh) GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD, clearl) GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD, clearq) + +/* Vector Single-Width Floating-Point Reduction Instructions */ +#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \ + TD s1 = *((TD *)vs1 + HD(0)); \ + \ + for (i = 0; i < vl; i++) { \ + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + s1 = OP(s1, (TD)s2, &env->fp_status); \ + } \ + *((TD *)vd + HD(0)) = s1; \ + CLEAR_FN(vd, 1, sizeof(TD), tot); \ +} + +/* Unordered sum */ +GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add, clearh) +GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add, clearl) +GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add, clearq) + +/* Maximum value */ +GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum, clearh) +GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum, clearl) +GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, clearq) + +/* Minimum value */ +GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, clearh) +GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, clearl) +GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-id: 20200623215920.2594-49-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 3 ++ target/riscv/insn32.decode | 2 ++ target/riscv/insn_trans/trans_rvv.inc.c | 3 ++ target/riscv/vector_helper.c | 46 +++++++++++++++++++++++++ 4 files changed, 54 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vfwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm vfredsum_vs 0000-1 . ..... ..... 001 ..... 1010111 @r_vm vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm +# Vector widening ordered and unordered float reduction sum +vfwredsum_vs 1100-1 . ..... ..... 001 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_check) GEN_OPFVV_TRANS(vfredsum_vs, reduction_check) GEN_OPFVV_TRANS(vfredmax_vs, reduction_check) GEN_OPFVV_TRANS(vfredmin_vs, reduction_check) + +/* Vector Widening Floating-Point Reduction Instructions */ +GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, clearq) GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, clearh) GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, clearl) GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, clearq) + +/* Vector Widening Floating-Point Reduction Instructions */ +/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ +void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, + void *vs2, CPURISCVState *env, uint32_t desc) +{ + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; + uint32_t s1 = *((uint32_t *)vs1 + H4(0)); + + for (i = 0; i < vl; i++) { + uint16_t s2 = *((uint16_t *)vs2 + H2(i)); + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), + &env->fp_status); + } + *((uint32_t *)vd + H4(0)) = s1; + clearl(vd, 1, sizeof(uint32_t), tot); +} + +void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, + void *vs2, CPURISCVState *env, uint32_t desc) +{ + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; + uint64_t s1 = *((uint64_t *)vs1); + + for (i = 0; i < vl; i++) { + uint32_t s2 = *((uint32_t *)vs2 + H4(i)); + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), + &env->fp_status); + } + *((uint64_t *)vd) = s1; + clearq(vd, 1, sizeof(uint64_t), tot); +} -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-50-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 9 ++++++ target/riscv/insn32.decode | 8 +++++ target/riscv/insn_trans/trans_rvv.inc.c | 35 ++++++++++++++++++++++ target/riscv/vector_helper.c | 40 +++++++++++++++++++++++++ 4 files changed, 92 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vmand_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmnand_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmandnot_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmxor_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmor_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmnor_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmornot_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm # Vector widening ordered and unordered float reduction sum vfwredsum_vs 1100-1 . ..... ..... 001 ..... 1010111 @r_vm +vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r +vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r +vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r +vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r +vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r +vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r +vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r +vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPFVV_TRANS(vfredmin_vs, reduction_check) /* Vector Widening Floating-Point Reduction Instructions */ GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check) + +/* + *** Vector Mask Operations + */ + +/* Vector Mask-Register Logical Instructions */ +#define GEN_MM_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_r *a) \ +{ \ + if (vext_check_isa_ill(s)) { \ + uint32_t data = 0; \ + gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ + TCGLabel *over = gen_new_label(); \ + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), cpu_env, 0, \ + s->vlen / 8, data, fn); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ +} + +GEN_MM_TRANS(vmand_mm) +GEN_MM_TRANS(vmnand_mm) +GEN_MM_TRANS(vmandnot_mm) +GEN_MM_TRANS(vmxor_mm) +GEN_MM_TRANS(vmor_mm) +GEN_MM_TRANS(vmnor_mm) +GEN_MM_TRANS(vmornot_mm) +GEN_MM_TRANS(vmxnor_mm) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, *((uint64_t *)vd) = s1; clearq(vd, 1, sizeof(uint64_t), tot); } + +/* + *** Vector Mask Operations + */ +/* Vector Mask-Register Logical Instructions */ +#define GEN_VEXT_MASK_VV(NAME, OP) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vl = env->vl; \ + uint32_t i; \ + int a, b; \ + \ + for (i = 0; i < vl; i++) { \ + a = vext_elem_mask(vs1, mlen, i); \ + b = vext_elem_mask(vs2, mlen, i); \ + vext_set_elem_mask(vd, mlen, i, OP(b, a)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +#define DO_NAND(N, M) (!(N & M)) +#define DO_ANDNOT(N, M) (N & !M) +#define DO_NOR(N, M) (!(N | M)) +#define DO_ORNOT(N, M) (N | !M) +#define DO_XNOR(N, M) (!(N ^ M)) + +GEN_VEXT_MASK_VV(vmand_mm, DO_AND) +GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) +GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) +GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) +GEN_VEXT_MASK_VV(vmor_mm, DO_OR) +GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) +GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) +GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-51-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 2 ++ target/riscv/insn32.decode | 1 + target/riscv/insn_trans/trans_rvv.inc.c | 32 +++++++++++++++++++++++++ target/riscv/vector_helper.c | 20 ++++++++++++++++ 4 files changed, 55 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmor_mm, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmnor_mm, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmornot_mm, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_4(vmpopc_m, tl, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r +vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_MM_TRANS(vmor_mm) GEN_MM_TRANS(vmnor_mm) GEN_MM_TRANS(vmornot_mm) GEN_MM_TRANS(vmxnor_mm) + +/* Vector mask population count vmpopc */ +static bool trans_vmpopc_m(DisasContext *s, arg_rmr *a) +{ + if (vext_check_isa_ill(s)) { + TCGv_ptr src2, mask; + TCGv dst; + TCGv_i32 desc; + uint32_t data = 0; + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + + mask = tcg_temp_new_ptr(); + src2 = tcg_temp_new_ptr(); + dst = tcg_temp_new(); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + gen_helper_vmpopc_m(dst, mask, src2, cpu_env, desc); + gen_set_gpr(a->rd, dst); + + tcg_temp_free_ptr(mask); + tcg_temp_free_ptr(src2); + tcg_temp_free(dst); + tcg_temp_free_i32(desc); + return true; + } + return false; +} diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_MASK_VV(vmor_mm, DO_OR) GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) + +/* Vector mask population count vmpopc */ +target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + target_ulong cnt = 0; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + int i; + + for (i = 0; i < vl; i++) { + if (vm || vext_elem_mask(v0, mlen, i)) { + if (vext_elem_mask(vs2, mlen, i)) { + cnt++; + } + } + } + return cnt; +} -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-52-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 2 ++ target/riscv/insn32.decode | 1 + target/riscv/insn_trans/trans_rvv.inc.c | 32 +++++++++++++++++++++++++ target/riscv/vector_helper.c | 19 +++++++++++++++ 4 files changed, 54 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmornot_mm, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_4(vmpopc_m, tl, ptr, ptr, env, i32) + +DEF_HELPER_4(vmfirst_m, tl, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm +vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_vmpopc_m(DisasContext *s, arg_rmr *a) } return false; } + +/* vmfirst find-first-set mask bit */ +static bool trans_vmfirst_m(DisasContext *s, arg_rmr *a) +{ + if (vext_check_isa_ill(s)) { + TCGv_ptr src2, mask; + TCGv dst; + TCGv_i32 desc; + uint32_t data = 0; + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + + mask = tcg_temp_new_ptr(); + src2 = tcg_temp_new_ptr(); + dst = tcg_temp_new(); + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2)); + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0)); + + gen_helper_vmfirst_m(dst, mask, src2, cpu_env, desc); + gen_set_gpr(a->rd, dst); + + tcg_temp_free_ptr(mask); + tcg_temp_free_ptr(src2); + tcg_temp_free(dst); + tcg_temp_free_i32(desc); + return true; + } + return false; +} diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env, } return cnt; } + +/* vmfirst find-first-set mask bit*/ +target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + int i; + + for (i = 0; i < vl; i++) { + if (vm || vext_elem_mask(v0, mlen, i)) { + if (vext_elem_mask(vs2, mlen, i)) { + return i; + } + } + } + return -1LL; +} -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-53-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 4 ++ target/riscv/insn32.decode | 3 ++ target/riscv/insn_trans/trans_rvv.inc.c | 28 +++++++++++ target/riscv/vector_helper.c | 63 +++++++++++++++++++++++++ 4 files changed, 98 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_4(vmpopc_m, tl, ptr, ptr, env, i32) DEF_HELPER_4(vmfirst_m, tl, ptr, ptr, env, i32) + +DEF_HELPER_5(vmsbf_m, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vmsif_m, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vmsof_m, void, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm +vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm +vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm +vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_vmfirst_m(DisasContext *s, arg_rmr *a) } return false; } + +/* vmsbf.m set-before-first mask bit */ +/* vmsif.m set-includ-first mask bit */ +/* vmsof.m set-only-first mask bit */ +#define GEN_M_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + if (vext_check_isa_ill(s)) { \ + uint32_t data = 0; \ + gen_helper_gvec_3_ptr *fn = gen_helper_##NAME; \ + TCGLabel *over = gen_new_label(); \ + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ + \ + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \ + vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \ + cpu_env, 0, s->vlen / 8, data, fn); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ +} + +GEN_M_TRANS(vmsbf_m) +GEN_M_TRANS(vmsif_m) +GEN_M_TRANS(vmsof_m) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env, } return -1LL; } + +enum set_mask_type { + ONLY_FIRST = 1, + INCLUDE_FIRST, + BEFORE_FIRST, +}; + +static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, + uint32_t desc, enum set_mask_type type) +{ + uint32_t mlen = vext_mlen(desc); + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + int i; + bool first_mask_bit = false; + + for (i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + /* write a zero to all following active elements */ + if (first_mask_bit) { + vext_set_elem_mask(vd, mlen, i, 0); + continue; + } + if (vext_elem_mask(vs2, mlen, i)) { + first_mask_bit = true; + if (type == BEFORE_FIRST) { + vext_set_elem_mask(vd, mlen, i, 0); + } else { + vext_set_elem_mask(vd, mlen, i, 1); + } + } else { + if (type == ONLY_FIRST) { + vext_set_elem_mask(vd, mlen, i, 0); + } else { + vext_set_elem_mask(vd, mlen, i, 1); + } + } + } + for (; i < vlmax; i++) { + vext_set_elem_mask(vd, mlen, i, 0); + } +} + +void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); +} + +void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); +} + +void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); +} -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-54-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 5 +++++ target/riscv/insn32.decode | 1 + target/riscv/insn_trans/trans_rvv.inc.c | 27 +++++++++++++++++++++++ target/riscv/vector_helper.c | 29 +++++++++++++++++++++++++ 4 files changed, 62 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vmfirst_m, tl, ptr, ptr, env, i32) DEF_HELPER_5(vmsbf_m, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vmsif_m, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vmsof_m, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_5(viota_m_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(viota_m_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(viota_m_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(viota_m_d, void, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm +viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ GEN_M_TRANS(vmsbf_m) GEN_M_TRANS(vmsif_m) GEN_M_TRANS(vmsof_m) + +/* Vector Iota Instruction */ +static bool trans_viota_m(DisasContext *s, arg_viota_m *a) +{ + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, 1) && + (a->vm != 0 || a->rd != 0)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + static gen_helper_gvec_3_ptr * const fns[4] = { + gen_helper_viota_m_b, gen_helper_viota_m_h, + gen_helper_viota_m_w, gen_helper_viota_m_d, + }; + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs2), cpu_env, 0, + s->vlen / 8, data, fns[s->sew]); + gen_set_label(over); + return true; + } + return false; +} diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, { vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); } + +/* Vector Iota Instruction */ +#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t sum = 0; \ + int i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(i)) = sum; \ + if (vext_elem_mask(vs2, mlen, i)) { \ + sum++; \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1, clearb) +GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2, clearh) +GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4, clearl) +GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-55-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 5 +++++ target/riscv/insn32.decode | 2 ++ target/riscv/insn_trans/trans_rvv.inc.c | 25 +++++++++++++++++++++++++ target/riscv/vector_helper.c | 24 ++++++++++++++++++++++++ 4 files changed, 56 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_5(viota_m_b, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(viota_m_h, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(viota_m_w, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(viota_m_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_4(vid_v_b, void, ptr, ptr, env, i32) +DEF_HELPER_4(vid_v_h, void, ptr, ptr, env, i32) +DEF_HELPER_4(vid_v_w, void, ptr, ptr, env, i32) +DEF_HELPER_4(vid_v_d, void, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ @r2 ....... ..... ..... ... ..... ....... %rs1 %rd @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd @r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd +@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd @r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd @@ -XXX,XX +XXX,XX @@ vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm +vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a) } return false; } + +/* Vector Element Index Instruction */ +static bool trans_vid_v(DisasContext *s, arg_vid_v *a) +{ + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_overlap_mask(s, a->rd, a->vm, false)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + static gen_helper_gvec_2_ptr * const fns[4] = { + gen_helper_vid_v_b, gen_helper_vid_v_h, + gen_helper_vid_v_w, gen_helper_vid_v_d, + }; + tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), + cpu_env, 0, s->vlen / 8, data, fns[s->sew]); + gen_set_label(over); + return true; + } + return false; +} diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1, clearb) GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2, clearh) GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4, clearl) GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8, clearq) + +/* Vector Element Index Instruction */ +#define GEN_VEXT_VID_V(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + int i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(i)) = i; \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb) +GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh) +GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl) +GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-56-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/insn32.decode | 1 + target/riscv/insn_trans/trans_rvv.inc.c | 116 ++++++++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm +vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) } return false; } + +/* + *** Vector Permutation Instructions + */ + +/* Integer Extract Instruction */ + +static void load_element(TCGv_i64 dest, TCGv_ptr base, + int ofs, int sew) +{ + switch (sew) { + case MO_8: + tcg_gen_ld8u_i64(dest, base, ofs); + break; + case MO_16: + tcg_gen_ld16u_i64(dest, base, ofs); + break; + case MO_32: + tcg_gen_ld32u_i64(dest, base, ofs); + break; + case MO_64: + tcg_gen_ld_i64(dest, base, ofs); + break; + default: + g_assert_not_reached(); + break; + } +} + +/* offset of the idx element with base regsiter r */ +static uint32_t endian_ofs(DisasContext *s, int r, int idx) +{ +#ifdef HOST_WORDS_BIGENDIAN + return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew); +#else + return vreg_ofs(s, r) + (idx << s->sew); +#endif +} + +/* adjust the index according to the endian */ +static void endian_adjust(TCGv_i32 ofs, int sew) +{ +#ifdef HOST_WORDS_BIGENDIAN + tcg_gen_xori_i32(ofs, ofs, 7 >> sew); +#endif +} + +/* Load idx >= VLMAX ? 0 : vreg[idx] */ +static void vec_element_loadx(DisasContext *s, TCGv_i64 dest, + int vreg, TCGv idx, int vlmax) +{ + TCGv_i32 ofs = tcg_temp_new_i32(); + TCGv_ptr base = tcg_temp_new_ptr(); + TCGv_i64 t_idx = tcg_temp_new_i64(); + TCGv_i64 t_vlmax, t_zero; + + /* + * Mask the index to the length so that we do + * not produce an out-of-range load. + */ + tcg_gen_trunc_tl_i32(ofs, idx); + tcg_gen_andi_i32(ofs, ofs, vlmax - 1); + + /* Convert the index to an offset. */ + endian_adjust(ofs, s->sew); + tcg_gen_shli_i32(ofs, ofs, s->sew); + + /* Convert the index to a pointer. */ + tcg_gen_ext_i32_ptr(base, ofs); + tcg_gen_add_ptr(base, base, cpu_env); + + /* Perform the load. */ + load_element(dest, base, + vreg_ofs(s, vreg), s->sew); + tcg_temp_free_ptr(base); + tcg_temp_free_i32(ofs); + + /* Flush out-of-range indexing to zero. */ + t_vlmax = tcg_const_i64(vlmax); + t_zero = tcg_const_i64(0); + tcg_gen_extu_tl_i64(t_idx, idx); + + tcg_gen_movcond_i64(TCG_COND_LTU, dest, t_idx, + t_vlmax, dest, t_zero); + + tcg_temp_free_i64(t_vlmax); + tcg_temp_free_i64(t_zero); + tcg_temp_free_i64(t_idx); +} + +static void vec_element_loadi(DisasContext *s, TCGv_i64 dest, + int vreg, int idx) +{ + load_element(dest, cpu_env, endian_ofs(s, vreg, idx), s->sew); +} + +static bool trans_vext_x_v(DisasContext *s, arg_r *a) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGv dest = tcg_temp_new(); + + if (a->rs1 == 0) { + /* Special case vmv.x.s rd, vs2. */ + vec_element_loadi(s, tmp, a->rs2, 0); + } else { + /* This instruction ignores LMUL and vector register groups */ + int vlmax = s->vlen >> (3 + s->sew); + vec_element_loadx(s, tmp, a->rs2, cpu_gpr[a->rs1], vlmax); + } + tcg_gen_trunc_i64_tl(dest, tmp); + gen_set_gpr(a->rd, dest); + + tcg_temp_free(dest); + tcg_temp_free_i64(tmp); + return true; +} -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-57-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/internals.h | 6 +++ target/riscv/insn32.decode | 1 + target/riscv/insn_trans/trans_rvv.inc.c | 60 +++++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/target/riscv/internals.h b/target/riscv/internals.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -XXX,XX +XXX,XX @@ FIELD(VDATA, WD, 11, 1) target_ulong fclass_h(uint64_t frs1); target_ulong fclass_s(uint64_t frs1); target_ulong fclass_d(uint64_t frs1); + +#define SEW8 0 +#define SEW16 1 +#define SEW32 2 +#define SEW64 3 + #endif diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r +vmv_s_x 001101 1 00000 ..... 110 ..... 1010111 @r2 vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_vext_x_v(DisasContext *s, arg_r *a) tcg_temp_free_i64(tmp); return true; } + +/* Integer Scalar Move Instruction */ + +static void store_element(TCGv_i64 val, TCGv_ptr base, + int ofs, int sew) +{ + switch (sew) { + case MO_8: + tcg_gen_st8_i64(val, base, ofs); + break; + case MO_16: + tcg_gen_st16_i64(val, base, ofs); + break; + case MO_32: + tcg_gen_st32_i64(val, base, ofs); + break; + case MO_64: + tcg_gen_st_i64(val, base, ofs); + break; + default: + g_assert_not_reached(); + break; + } +} + +/* + * Store vreg[idx] = val. + * The index must be in range of VLMAX. + */ +static void vec_element_storei(DisasContext *s, int vreg, + int idx, TCGv_i64 val) +{ + store_element(val, cpu_env, endian_ofs(s, vreg, idx), s->sew); +} + +/* vmv.s.x vd, rs1 # vd[0] = rs1 */ +static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) +{ + if (vext_check_isa_ill(s)) { + /* This instruction ignores LMUL and vector register groups */ + int maxsz = s->vlen >> 3; + TCGv_i64 t1; + TCGLabel *over = gen_new_label(); + + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_gvec_dup_imm(SEW64, vreg_ofs(s, a->rd), maxsz, maxsz, 0); + if (a->rs1 == 0) { + goto done; + } + + t1 = tcg_temp_new_i64(); + tcg_gen_extu_tl_i64(t1, cpu_gpr[a->rs1]); + vec_element_storei(s, a->rd, 0, t1); + tcg_temp_free_i64(t1); + done: + gen_set_label(over); + return true; + } + return false; +} -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-58-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/insn32.decode | 3 ++ target/riscv/insn_trans/trans_rvv.inc.c | 49 +++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ @r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd @r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd +@r2rd ....... ..... ..... ... ..... ....... %rs2 %rd @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd @r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd @r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd @@ -XXX,XX +XXX,XX @@ viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r vmv_s_x 001101 1 00000 ..... 110 ..... 1010111 @r2 +vfmv_f_s 001100 1 ..... 00000 001 ..... 1010111 @r2rd +vfmv_s_f 001101 1 00000 ..... 101 ..... 1010111 @r2 vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) } return false; } + +/* Floating-Point Scalar Move Instructions */ +static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a) +{ + if (!s->vill && has_ext(s, RVF) && + (s->mstatus_fs != 0) && (s->sew != 0)) { + unsigned int len = 8 << s->sew; + + vec_element_loadi(s, cpu_fpr[a->rd], a->rs2, 0); + if (len < 64) { + tcg_gen_ori_i64(cpu_fpr[a->rd], cpu_fpr[a->rd], + MAKE_64BIT_MASK(len, 64 - len)); + } + + mark_fs_dirty(s); + return true; + } + return false; +} + +/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2=0) */ +static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) +{ + if (!s->vill && has_ext(s, RVF) && (s->sew != 0)) { + TCGv_i64 t1; + /* The instructions ignore LMUL and vector register group. */ + uint32_t vlmax = s->vlen >> 3; + + /* if vl == 0, skip vector register write back */ + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + /* zeroed all elements */ + tcg_gen_gvec_dup_imm(SEW64, vreg_ofs(s, a->rd), vlmax, vlmax, 0); + + /* NaN-box f[rs1] as necessary for SEW */ + t1 = tcg_temp_new_i64(); + if (s->sew == MO_64 && !has_ext(s, RVD)) { + tcg_gen_ori_i64(t1, cpu_fpr[a->rs1], MAKE_64BIT_MASK(32, 32)); + } else { + tcg_gen_mov_i64(t1, cpu_fpr[a->rs1]); + } + vec_element_storei(s, a->rd, 0, t1); + tcg_temp_free_i64(t1); + gen_set_label(over); + return true; + } + return false; +} -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-59-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 17 ++++ target/riscv/insn32.decode | 6 ++ target/riscv/insn_trans/trans_rvv.inc.c | 18 ++++ target/riscv/vector_helper.c | 114 ++++++++++++++++++++++++ 4 files changed, 155 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(vid_v_b, void, ptr, ptr, env, i32) DEF_HELPER_4(vid_v_h, void, ptr, ptr, env, i32) DEF_HELPER_4(vid_v_w, void, ptr, ptr, env, i32) DEF_HELPER_4(vid_v_d, void, ptr, ptr, env, i32) + +DEF_HELPER_6(vslideup_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslideup_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslideup_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslideup_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslidedown_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslidedown_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslidedown_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslidedown_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r vmv_s_x 001101 1 00000 ..... 110 ..... 1010111 @r2 vfmv_f_s 001100 1 ..... 00000 001 ..... 1010111 @r2rd vfmv_s_f 001101 1 00000 ..... 101 ..... 1010111 @r2 +vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm +vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm +vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm +vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm +vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm +vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) } return false; } + +/* Vector Slide Instructions */ +static bool slideup_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + (a->rd != a->rs2)); +} + +GEN_OPIVX_TRANS(vslideup_vx, slideup_check) +GEN_OPIVX_TRANS(vslide1up_vx, slideup_check) +GEN_OPIVI_TRANS(vslideup_vi, 1, vslideup_vx, slideup_check) + +GEN_OPIVX_TRANS(vslidedown_vx, opivx_check) +GEN_OPIVX_TRANS(vslide1down_vx, opivx_check) +GEN_OPIVI_TRANS(vslidedown_vi, 1, vslidedown_vx, opivx_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb) GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh) GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl) GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq) + +/* + *** Vector Permutation Instructions + */ + +/* Vector Slide Instructions */ +#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + target_ulong offset = s1, i; \ + \ + for (i = offset; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8, clearq) + +#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + target_ulong offset = s1, i; \ + \ + for (i = 0; i < vl; ++i) { \ + target_ulong j = i + offset; \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8, clearq) + +#define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + if (i == 0) { \ + *((ETYPE *)vd + H(i)) = s1; \ + } else { \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8, clearq) + +#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + if (i == vl - 1) { \ + *((ETYPE *)vd + H(i)) = s1; \ + } else { \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-60-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 9 +++ target/riscv/insn32.decode | 3 + target/riscv/insn_trans/trans_rvv.inc.c | 78 +++++++++++++++++++++++++ target/riscv/vector_helper.c | 60 +++++++++++++++++++ 4 files changed, 150 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vslide1down_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vrgather_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrgather_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrgather_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrgather_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrgather_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrgather_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrgather_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrgather_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm +vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm +vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm +vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ GEN_OPIVI_TRANS(vslideup_vi, 1, vslideup_vx, slideup_check) GEN_OPIVX_TRANS(vslidedown_vx, opivx_check) GEN_OPIVX_TRANS(vslide1down_vx, opivx_check) GEN_OPIVI_TRANS(vslidedown_vi, 1, vslidedown_vx, opivx_check) + +/* Vector Register Gather Instruction */ +static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs1, false) && + vext_check_reg(s, a->rs2, false) && + (a->rd != a->rs2) && (a->rd != a->rs1)); +} + +GEN_OPIVV_TRANS(vrgather_vv, vrgather_vv_check) + +static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + (a->rd != a->rs2)); +} + +/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ +static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a) +{ + if (!vrgather_vx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + int vlmax = s->vlen / s->mlen; + TCGv_i64 dest = tcg_temp_new_i64(); + + if (a->rs1 == 0) { + vec_element_loadi(s, dest, a->rs2, 0); + } else { + vec_element_loadx(s, dest, a->rs2, cpu_gpr[a->rs1], vlmax); + } + + tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), dest); + tcg_temp_free_i64(dest); + } else { + static gen_helper_opivx * const fns[4] = { + gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h, + gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d + }; + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); + } + return true; +} + +/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] */ +static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a) +{ + if (!vrgather_vx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + if (a->rs1 >= s->vlen / s->mlen) { + tcg_gen_gvec_dup_imm(SEW64, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), 0); + } else { + tcg_gen_gvec_dup_mem(s->sew, vreg_ofs(s, a->rd), + endian_ofs(s, a->rs2, a->rs1), + MAXSZ(s), MAXSZ(s)); + } + } else { + static gen_helper_opivx * const fns[4] = { + gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h, + gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d + }; + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, 1); + } + return true; +} diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1, clearb) GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2, clearh) GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4, clearl) GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq) + +/* Vector Register Gather Instruction */ +#define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t index, i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + index = *((ETYPE *)vs1 + H(i)); \ + if (index >= vlmax) { \ + *((ETYPE *)vd + H(i)) = 0; \ + } else { \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ +GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1, clearb) +GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2, clearh) +GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4, clearl) +GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8, clearq) + +#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t index = s1, i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + if (index >= vlmax) { \ + *((ETYPE *)vd + H(i)) = 0; \ + } else { \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ +GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-61-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 5 ++++ target/riscv/insn32.decode | 1 + target/riscv/insn_trans/trans_rvv.inc.c | 32 +++++++++++++++++++++++++ target/riscv/vector_helper.c | 26 ++++++++++++++++++++ 4 files changed, 64 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_6(vrgather_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vrgather_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vrgather_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vrgather_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -XXX,XX +XXX,XX @@ vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm +vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -XXX,XX +XXX,XX @@ static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a) } return true; } + +/* Vector Compress Instruction */ +static bool vcompress_vm_check(DisasContext *s, arg_r *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs1, 1) && + (a->rd != a->rs2)); +} + +static bool trans_vcompress_vm(DisasContext *s, arg_r *a) +{ + if (vcompress_vm_check(s, a)) { + uint32_t data = 0; + static gen_helper_gvec_4_ptr * const fns[4] = { + gen_helper_vcompress_vm_b, gen_helper_vcompress_vm_h, + gen_helper_vcompress_vm_w, gen_helper_vcompress_vm_d, + }; + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + + data = FIELD_DP32(data, VDATA, MLEN, s->mlen); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), + cpu_env, 0, s->vlen / 8, data, fns[s->sew]); + gen_set_label(over); + return true; + } + return false; +} diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1, clearb) GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2, clearh) GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4, clearl) GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq) + +/* Vector Compress Instruction */ +#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vl = env->vl; \ + uint32_t num = 0, i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vext_elem_mask(vs1, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ + num++; \ + } \ + CLEAR_FN(vd, num, num * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* Compress into vd elements of vs2 where vs1 is enabled */ +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1, clearb) +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2, clearh) +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4, clearl) +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8, clearq) -- 2.27.0
From: LIU Zhiwei <zhiwei_liu@c-sky.com> Vector extension is default off. The only way to use vector extension is 1. use cpu rv32 or rv64 2. turn on it by command line "-cpu rv64,x-v=true,vlen=128,elen=64,vext_spec=v0.7.1". vlen is the vector register length, default value is 128 bit. elen is the max operator size in bits, default value is 64 bit. vext_spec is the vector specification version, default value is v0.7.1. These properties can be specified with other values. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200623215920.2594-62-zhiwei_liu@c-sky.com Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.h | 4 +++- target/riscv/cpu.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -XXX,XX +XXX,XX @@ typedef struct CPURISCVState CPURISCVState; #include "pmp.h" -#define RV_VLEN_MAX 512 +#define RV_VLEN_MAX 256 FIELD(VTYPE, VLMUL, 0, 2) FIELD(VTYPE, VSEW, 2, 3) @@ -XXX,XX +XXX,XX @@ typedef struct RISCVCPU { bool ext_s; bool ext_u; bool ext_h; + bool ext_v; bool ext_counters; bool ext_ifencei; bool ext_icsr; char *priv_spec; char *user_spec; + char *vext_spec; uint16_t vlen; uint16_t elen; bool mmu; diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) if (cpu->cfg.ext_h) { target_misa |= RVH; } + if (cpu->cfg.ext_v) { + target_misa |= RVV; + if (!is_power_of_2(cpu->cfg.vlen)) { + error_setg(errp, + "Vector extension VLEN must be power of 2"); + return; + } + if (cpu->cfg.vlen > RV_VLEN_MAX || cpu->cfg.vlen < 128) { + error_setg(errp, + "Vector extension implementation only supports VLEN " + "in the range [128, %d]", RV_VLEN_MAX); + return; + } + if (!is_power_of_2(cpu->cfg.elen)) { + error_setg(errp, + "Vector extension ELEN must be power of 2"); + return; + } + if (cpu->cfg.elen > 64 || cpu->cfg.vlen < 8) { + error_setg(errp, + "Vector extension implementation only supports ELEN " + "in the range [8, 64]"); + return; + } + if (cpu->cfg.vext_spec) { + if (!g_strcmp0(cpu->cfg.vext_spec, "v0.7.1")) { + vext_version = VEXT_VERSION_0_07_1; + } else { + error_setg(errp, + "Unsupported vector spec version '%s'", + cpu->cfg.vext_spec); + return; + } + } else { + qemu_log("vector verison is not specified, " + "use the default value v0.7.1\n"); + } + set_vext_version(env, vext_version); + } set_misa(env, RVXLEN | target_misa); } @@ -XXX,XX +XXX,XX @@ static Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("u", RISCVCPU, cfg.ext_u, true), /* This is experimental so mark with 'x-' */ DEFINE_PROP_BOOL("x-h", RISCVCPU, cfg.ext_h, false), + DEFINE_PROP_BOOL("x-v", RISCVCPU, cfg.ext_v, false), DEFINE_PROP_BOOL("Counters", RISCVCPU, cfg.ext_counters, true), DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true), DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true), DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec), + DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec), + DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128), + DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64), DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true), DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true), DEFINE_PROP_END_OF_LIST(), -- 2.27.0
From: Alistair Francis <alistair.francis@wdc.com> The following changes since commit c77283dd5d79149f4e7e9edd00f65416c648ee59: Merge tag 'pull-request-2025-07-02' of https://gitlab.com/thuth/qemu into staging (2025-07-03 06:01:41 -0400) are available in the Git repository at: https://github.com/alistair23/qemu.git tags/pull-riscv-to-apply-20250704 for you to fetch changes up to dc8bffc4eb0a93d3266cea1b17f8848dea5b915c: target: riscv: Add Svrsw60t59b extension support (2025-07-04 21:09:49 +1000) ---------------------------------------------------------------- Second RISC-V PR for 10.1 * sstc extension fixes * Fix zama16b order in isa_edata_arr * Profile handling fixes * Extend PMP region up to 64 * Remove capital 'Z' CPU properties * Add missing named features * Support atomic instruction fetch (Ziccif) * Add max_satp_mode from host cpu * Extend and configure PMP region count * Fix PPN field of Translation-reponse register * Use qemu_chr_fe_write_all() in DBCN_CONSOLE_WRITE_BYTE * Fix fcvt.s.bf16 NaN box checking * Avoid infinite delay of async xmit function * Device tree reg cleanups * Add Kunminghu CPU and platform * Fix missing exit TB flow for ldff_trans * Fix migration failure when aia is configured as aplic-imsic * Fix MEPC/SEPC bit masking for IALIGN * Add a property to set vill bit on reserved usage of vsetvli instruction * Add Svrsw60t59b extension support ---------------------------------------------------------------- Alexandre Ghiti (1): target: riscv: Add Svrsw60t59b extension support Anton Blanchard (1): target/riscv: Fix fcvt.s.bf16 NaN box checking Charalampos Mitrodimas (2): target/riscv: Fix MEPC/SEPC bit masking for IALIGN tests/tcg/riscv64: Add test for MEPC bit masking Daniel Henrique Barboza (9): target/riscv/cpu.c: fix zama16b order in isa_edata_arr[] target/riscv/tcg: restrict satp_mode changes in cpu_set_profile target/riscv/tcg: decouple profile enablement from user prop target/riscv: add profile->present flag target/riscv: remove capital 'Z' CPU properties target/riscv/cpu.c: add 'sdtrig' in riscv,isa target/riscv/cpu.c: add 'ssstrict' to riscv, isa target/riscv/cpu.c: do better with 'named features' doc target/riscv: use qemu_chr_fe_write_all() in DBCN_CONSOLE_WRITE_BYTE Florian Lugou (1): hw/char: sifive_uart: Avoid infinite delay of async xmit function Huang Borong (2): target/riscv: Add BOSC's Xiangshan Kunminghu CPU hw/riscv: Initial support for BOSC's Xiangshan Kunminghu FPGA prototype Jay Chang (2): target/riscv: Extend PMP region up to 64 target/riscv: Make PMP region count configurable Jim Shu (5): target/riscv: Add the checking into stimecmp write function. hw/intc: riscv_aclint: Fix mtime write for sstc extension target/riscv: Fix VSTIP bit in sstc extension. target/riscv: Enable/Disable S/VS-mode Timer when STCE bit is changed target/riscv: support atomic instruction fetch (Ziccif) Joel Stanley (12): hw/riscv/virt: Fix clint base address type hw/riscv/virt: Use setprop_sized_cells for clint hw/riscv/virt: Use setprop_sized_cells for memory hw/riscv/virt: Use setprop_sized_cells for aplic hw/riscv/virt: Use setprop_sized_cells for aclint hw/riscv/virt: Use setprop_sized_cells for plic hw/riscv/virt: Use setprop_sized_cells for virtio hw/riscv/virt: Use setprop_sized_cells for reset hw/riscv/virt: Use setprop_sized_cells for uart hw/riscv/virt: Use setprop_sized_cells for rtc hw/riscv/virt: Use setprop_sized_cells for iommu hw/riscv/virt: Use setprop_sized_cells for pcie Max Chou (1): target/riscv: rvv: Fix missing exit TB flow for ldff_trans Meng Zhuo (1): target/riscv/kvm: add max_satp_mode from host cpu Nutty Liu (1): hw/riscv/riscv-iommu: Fix PPN field of Translation-reponse register Vasilis Liaskovitis (1): target/riscv: Add a property to set vill bit on reserved usage of vsetvli instruction liu.xuemei1@zte.com.cn (1): migration: Fix migration failure when aia is configured as aplic-imsic MAINTAINERS | 7 + docs/system/riscv/xiangshan-kunminghu.rst | 39 +++++ docs/system/target-riscv.rst | 1 + configs/devices/riscv64-softmmu/default.mak | 1 + hw/riscv/riscv-iommu-bits.h | 1 + include/hw/riscv/xiangshan_kmh.h | 68 +++++++++ target/riscv/cpu-qom.h | 1 + target/riscv/cpu.h | 19 ++- target/riscv/cpu_bits.h | 63 +++++++- target/riscv/helper.h | 2 +- target/riscv/internals.h | 27 ++++ target/riscv/time_helper.h | 1 + target/riscv/cpu_cfg_fields.h.inc | 3 + hw/char/sifive_uart.c | 6 +- hw/intc/riscv_aclint.c | 5 + hw/intc/riscv_aplic.c | 12 +- hw/intc/riscv_imsic.c | 10 +- hw/riscv/riscv-iommu.c | 9 +- hw/riscv/virt.c | 66 ++++----- hw/riscv/xiangshan_kmh.c | 220 ++++++++++++++++++++++++++++ target/riscv/cpu.c | 144 +++++++++++++++--- target/riscv/cpu_helper.c | 3 +- target/riscv/csr.c | 192 +++++++++++++++++++++++- target/riscv/fpu_helper.c | 2 +- target/riscv/kvm/kvm-cpu.c | 18 ++- target/riscv/machine.c | 3 +- target/riscv/op_helper.c | 4 +- target/riscv/pmp.c | 28 ++-- target/riscv/riscv-qmp-cmds.c | 2 +- target/riscv/tcg/tcg-cpu.c | 186 +++++++++++------------ target/riscv/time_helper.c | 65 +++++++- target/riscv/translate.c | 46 ++++-- target/riscv/vector_helper.c | 12 +- target/riscv/insn_trans/trans_rvv.c.inc | 10 +- hw/riscv/Kconfig | 9 ++ hw/riscv/meson.build | 1 + tests/data/acpi/riscv64/virt/RHCT | Bin 400 -> 416 bytes tests/tcg/riscv64/Makefile.softmmu-target | 4 + tests/tcg/riscv64/test-mepc-masking.S | 73 +++++++++ 39 files changed, 1151 insertions(+), 212 deletions(-) create mode 100644 docs/system/riscv/xiangshan-kunminghu.rst create mode 100644 include/hw/riscv/xiangshan_kmh.h create mode 100644 hw/riscv/xiangshan_kmh.c create mode 100644 tests/tcg/riscv64/test-mepc-masking.S
From: Jim Shu <jim.shu@sifive.com> Preparation commit to let aclint timer to use stimecmp write function. Aclint timer doesn't call sstc() predicate so we need to check inside the stimecmp write function. Signed-off-by: Jim Shu <jim.shu@sifive.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250519143518.11086-2-jim.shu@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/time_helper.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/target/riscv/time_helper.c b/target/riscv/time_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/time_helper.c +++ b/target/riscv/time_helper.c @@ -XXX,XX +XXX,XX @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, { uint64_t diff, ns_diff, next; RISCVAclintMTimerState *mtimer = env->rdtime_fn_arg; - uint32_t timebase_freq = mtimer->timebase_freq; - uint64_t rtc_r = env->rdtime_fn(env->rdtime_fn_arg) + delta; + uint32_t timebase_freq; + uint64_t rtc_r; + + if (!riscv_cpu_cfg(env)->ext_sstc || !env->rdtime_fn || + !env->rdtime_fn_arg || !get_field(env->menvcfg, MENVCFG_STCE)) { + /* S/VS Timer IRQ depends on sstc extension, rdtime_fn(), and STCE. */ + return; + } + + if (timer_irq == MIP_VSTIP && + (!riscv_has_ext(env, RVH) || !get_field(env->henvcfg, HENVCFG_STCE))) { + /* VS Timer IRQ also depends on RVH and henvcfg.STCE. */ + return; + } + + timebase_freq = mtimer->timebase_freq; + rtc_r = env->rdtime_fn(env->rdtime_fn_arg) + delta; if (timecmp <= rtc_r) { /* -- 2.50.0
From: Jim Shu <jim.shu@sifive.com> When changing the mtime value, the period of [s|vs]timecmp timers should also be updated, similar to the period of mtimecmp timer. The period of the stimecmp timer is the time until the next S-mode timer IRQ. The value is calculated as "stimecmp - time". [1] It is equal to "stimecmp - mtime" since the time CSR is a read-only shadow of the memory-mapped mtime register. Thus, changing mtime value will update the period of stimecmp timer. Similarly, the period of vstimecmp timer is calculated as "vstimecmp - (mtime + htimedelta)" [2], so changing mtime value will update the period of vstimecmp timer. [1] RISC-V Priv spec ch 9.1.1. Supervisor Timer (stimecmp) Register A supervisor timer interrupt becomes pending, as reflected in the STIP bit in the mip and sip registers whenever time contains a value greater than or equal to stimecmp. [2] RISC-V Priv spec ch19.2.1. Virtual Supervisor Timer (vstimecmp) Register A virtual supervisor timer interrupt becomes pending, as reflected in the VSTIP bit in the hip register, whenever (time + htimedelta), truncated to 64 bits, contains a value greater than or equal to vstimecmp Signed-off-by: Jim Shu <jim.shu@sifive.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250519143518.11086-3-jim.shu@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/intc/riscv_aclint.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c index XXXXXXX..XXXXXXX 100644 --- a/hw/intc/riscv_aclint.c +++ b/hw/intc/riscv_aclint.c @@ -XXX,XX +XXX,XX @@ #include "qemu/module.h" #include "hw/sysbus.h" #include "target/riscv/cpu.h" +#include "target/riscv/time_helper.h" #include "hw/qdev-properties.h" #include "hw/intc/riscv_aclint.h" #include "qemu/timer.h" @@ -XXX,XX +XXX,XX @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), mtimer->hartid_base + i, mtimer->timecmp[i]); + riscv_timer_write_timecmp(env, env->stimer, env->stimecmp, 0, MIP_STIP); + riscv_timer_write_timecmp(env, env->vstimer, env->vstimecmp, + env->htimedelta, MIP_VSTIP); + } return; } -- 2.50.0
From: Jim Shu <jim.shu@sifive.com> VSTIP is only writable when both [mh]envcfg.STCE is enabled, or it will revert it's defined behavior as if sstc extension is not implemented. Signed-off-by: Jim Shu <jim.shu@sifive.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250519143518.11086-4-jim.shu@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/csr.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/target/riscv/csr.c b/target/riscv/csr.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -XXX,XX +XXX,XX @@ static RISCVException rmw_mip64(CPURISCVState *env, int csrno, if (riscv_cpu_cfg(env)->ext_sstc && (env->priv == PRV_M) && get_field(env->menvcfg, MENVCFG_STCE)) { /* sstc extension forbids STIP & VSTIP to be writeable in mip */ - mask = mask & ~(MIP_STIP | MIP_VSTIP); + + /* STIP is not writable when menvcfg.STCE is enabled. */ + mask = mask & ~MIP_STIP; + + /* VSTIP is not writable when both [mh]envcfg.STCE are enabled. */ + if (get_field(env->henvcfg, HENVCFG_STCE)) { + mask = mask & ~MIP_VSTIP; + } } if (mask) { -- 2.50.0
From: Jim Shu <jim.shu@sifive.com> Updating STCE will enable/disable SSTC in S-mode or/and VS-mode, so we also need to update S/VS-mode Timer and S/VSTIP bits in $mip CSR. Signed-off-by: Jim Shu <jim.shu@sifive.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250519143518.11086-5-jim.shu@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/time_helper.h | 1 + target/riscv/csr.c | 46 ++++++++++++++++++++++++++++++++++++++ target/riscv/time_helper.c | 46 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+) diff --git a/target/riscv/time_helper.h b/target/riscv/time_helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/time_helper.h +++ b/target/riscv/time_helper.h @@ -XXX,XX +XXX,XX @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, uint64_t timecmp, uint64_t delta, uint32_t timer_irq); +void riscv_timer_stce_changed(CPURISCVState *env, bool is_m_mode, bool enable); void riscv_timer_init(RISCVCPU *cpu); #endif diff --git a/target/riscv/csr.c b/target/riscv/csr.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = MENVCFG_FIOM | MENVCFG_CBIE | MENVCFG_CBCFE | MENVCFG_CBZE | MENVCFG_CDE; + bool stce_changed = false; if (riscv_cpu_mxl(env) == MXL_RV64) { mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | @@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, if ((val & MENVCFG_DTE) == 0) { env->mstatus &= ~MSTATUS_SDT; } + + if (cfg->ext_sstc && + ((env->menvcfg & MENVCFG_STCE) != (val & MENVCFG_STCE))) { + stce_changed = true; + } } env->menvcfg = (env->menvcfg & ~mask) | (val & mask); + + if (stce_changed) { + riscv_timer_stce_changed(env, true, !!(val & MENVCFG_STCE)); + } + return write_henvcfg(env, CSR_HENVCFG, env->henvcfg, ra); } @@ -XXX,XX +XXX,XX @@ static RISCVException write_menvcfgh(CPURISCVState *env, int csrno, (cfg->ext_smcdeleg ? MENVCFG_CDE : 0) | (cfg->ext_ssdbltrp ? MENVCFG_DTE : 0); uint64_t valh = (uint64_t)val << 32; + bool stce_changed = false; + + if (cfg->ext_sstc && + ((env->menvcfg & MENVCFG_STCE) != (valh & MENVCFG_STCE))) { + stce_changed = true; + } if ((valh & MENVCFG_DTE) == 0) { env->mstatus &= ~MSTATUS_SDT; } env->menvcfg = (env->menvcfg & ~mask) | (valh & mask); + + if (stce_changed) { + riscv_timer_stce_changed(env, true, !!(valh & MENVCFG_STCE)); + } + return write_henvcfgh(env, CSR_HENVCFGH, env->henvcfg >> 32, ra); } @@ -XXX,XX +XXX,XX @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno, static RISCVException write_henvcfg(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { + const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = HENVCFG_FIOM | HENVCFG_CBIE | HENVCFG_CBCFE | HENVCFG_CBZE; RISCVException ret; + bool stce_changed = false; ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); if (ret != RISCV_EXCP_NONE) { @@ -XXX,XX +XXX,XX @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, get_field(val, HENVCFG_PMM) != PMM_FIELD_RESERVED) { mask |= HENVCFG_PMM; } + + if (cfg->ext_sstc && + ((env->henvcfg & HENVCFG_STCE) != (val & HENVCFG_STCE))) { + stce_changed = true; + } } env->henvcfg = val & mask; @@ -XXX,XX +XXX,XX @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, env->vsstatus &= ~MSTATUS_SDT; } + if (stce_changed) { + riscv_timer_stce_changed(env, false, !!(val & HENVCFG_STCE)); + } + return RISCV_EXCP_NONE; } @@ -XXX,XX +XXX,XX @@ static RISCVException read_henvcfgh(CPURISCVState *env, int csrno, static RISCVException write_henvcfgh(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { + const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE | HENVCFG_DTE); uint64_t valh = (uint64_t)val << 32; RISCVException ret; + bool stce_changed = false; ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); if (ret != RISCV_EXCP_NONE) { return ret; } + + if (cfg->ext_sstc && + ((env->henvcfg & HENVCFG_STCE) != (valh & HENVCFG_STCE))) { + stce_changed = true; + } + env->henvcfg = (env->henvcfg & 0xFFFFFFFF) | (valh & mask); if ((env->henvcfg & HENVCFG_DTE) == 0) { env->vsstatus &= ~MSTATUS_SDT; } + + if (stce_changed) { + riscv_timer_stce_changed(env, false, !!(val & HENVCFG_STCE)); + } + return RISCV_EXCP_NONE; } diff --git a/target/riscv/time_helper.c b/target/riscv/time_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/time_helper.c +++ b/target/riscv/time_helper.c @@ -XXX,XX +XXX,XX @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, timer_mod(timer, next); } +/* + * When disabling xenvcfg.STCE, the S/VS Timer may be disabled at the same time. + * It is safe to call this function regardless of whether the timer has been + * deleted or not. timer_del() will do nothing if the timer has already + * been deleted. + */ +static void riscv_timer_disable_timecmp(CPURISCVState *env, QEMUTimer *timer, + uint32_t timer_irq) +{ + /* Disable S-mode Timer IRQ and HW-based STIP */ + if ((timer_irq == MIP_STIP) && !get_field(env->menvcfg, MENVCFG_STCE)) { + riscv_cpu_update_mip(env, timer_irq, BOOL_TO_MASK(0)); + timer_del(timer); + return; + } + + /* Disable VS-mode Timer IRQ and HW-based VSTIP */ + if ((timer_irq == MIP_VSTIP) && + (!get_field(env->menvcfg, MENVCFG_STCE) || + !get_field(env->henvcfg, HENVCFG_STCE))) { + env->vstime_irq = 0; + riscv_cpu_update_mip(env, 0, BOOL_TO_MASK(0)); + timer_del(timer); + return; + } +} + +/* Enable or disable S/VS-mode Timer when xenvcfg.STCE is changed */ +void riscv_timer_stce_changed(CPURISCVState *env, bool is_m_mode, bool enable) +{ + if (enable) { + riscv_timer_write_timecmp(env, env->vstimer, env->vstimecmp, + env->htimedelta, MIP_VSTIP); + } else { + riscv_timer_disable_timecmp(env, env->vstimer, MIP_VSTIP); + } + + if (is_m_mode) { + if (enable) { + riscv_timer_write_timecmp(env, env->stimer, env->stimecmp, 0, MIP_STIP); + } else { + riscv_timer_disable_timecmp(env, env->stimer, MIP_STIP); + } + } +} + void riscv_timer_init(RISCVCPU *cpu) { CPURISCVState *env; -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Put it after zalrsc and before zawrs. Cc: qemu-trivial@nongnu.org Fixes: a60ce58fd9 ("target/riscv: Support Zama16b extension") Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250522113344.823294-1-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zaamo, PRIV_VERSION_1_12_0, ext_zaamo), ISA_EXT_DATA_ENTRY(zabha, PRIV_VERSION_1_13_0, ext_zabha), ISA_EXT_DATA_ENTRY(zacas, PRIV_VERSION_1_12_0, ext_zacas), - ISA_EXT_DATA_ENTRY(zama16b, PRIV_VERSION_1_13_0, ext_zama16b), ISA_EXT_DATA_ENTRY(zalrsc, PRIV_VERSION_1_12_0, ext_zalrsc), + ISA_EXT_DATA_ENTRY(zama16b, PRIV_VERSION_1_13_0, ext_zama16b), ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs), ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa), ISA_EXT_DATA_ENTRY(zfbfmin, PRIV_VERSION_1_12_0, ext_zfbfmin), -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> We're changing 'mmu' to true regardless of whether the profile is being enabled or not, and at the same time we're changing satp_mode to profile->enabled. This will promote a situation where we'll set mmu=on without a virtual memory mode, which is a mistake. Only touch 'mmu' and satp_mode if the profile is being enabled. Suggested-by: Andrew Jones <ajones@ventanamicro.com> Fixes: 55398025e7 ("target/riscv: add satp_mode profile support") Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Reviewed-by: Björn Töpel <bjorn@rivosinc.com> Tested-by: Björn Töpel <bjorn@rivosinc.com> Message-ID: <20250528184407.1451983-2-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/tcg/tcg-cpu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -XXX,XX +XXX,XX @@ static void cpu_set_profile(Object *obj, Visitor *v, const char *name, if (profile->enabled) { cpu->env.priv_ver = profile->priv_spec; - } #ifndef CONFIG_USER_ONLY - if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { - object_property_set_bool(obj, "mmu", true, NULL); - const char *satp_prop = satp_mode_str(profile->satp_mode, - riscv_cpu_is_32bit(cpu)); - object_property_set_bool(obj, satp_prop, profile->enabled, NULL); - } + if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { + object_property_set_bool(obj, "mmu", true, NULL); + const char *satp_prop = satp_mode_str(profile->satp_mode, + riscv_cpu_is_32bit(cpu)); + object_property_set_bool(obj, satp_prop, true, NULL); + } #endif + } for (i = 0; misa_bits[i] != 0; i++) { uint32_t bit = misa_bits[i]; -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> We have code in riscv_cpu_add_profiles() to enable a profile right away in case a CPU chose the profile during its cpu_init(). But we're using the user callback option to do so, setting profile->user_set. Create a new helper that does all the grunt work to enable/disable a given profile. Use this new helper in the cases where we want a CPU to be compatible to a certain profile, leaving the user callback to be used exclusively by users. Fixes: fba92a92e3 ("target/riscv: add 'rva22u64' CPU") Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Reviewed-by: Björn Töpel <bjorn@rivosinc.com> Tested-by: Björn Töpel <bjorn@rivosinc.com> Message-ID: <20250528184407.1451983-3-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/tcg/tcg-cpu.c | 127 +++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 60 deletions(-) diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -XXX,XX +XXX,XX @@ static bool riscv_cpu_is_generic(Object *cpu_obj) return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL; } +static void riscv_cpu_set_profile(RISCVCPU *cpu, + RISCVCPUProfile *profile, + bool enabled) +{ + int i, ext_offset; + + if (profile->u_parent != NULL) { + riscv_cpu_set_profile(cpu, profile->u_parent, enabled); + } + + if (profile->s_parent != NULL) { + riscv_cpu_set_profile(cpu, profile->s_parent, enabled); + } + + profile->enabled = enabled; + + if (profile->enabled) { + cpu->env.priv_ver = profile->priv_spec; + +#ifndef CONFIG_USER_ONLY + if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { + object_property_set_bool(OBJECT(cpu), "mmu", true, NULL); + const char *satp_prop = satp_mode_str(profile->satp_mode, + riscv_cpu_is_32bit(cpu)); + object_property_set_bool(OBJECT(cpu), satp_prop, true, NULL); + } +#endif + } + + for (i = 0; misa_bits[i] != 0; i++) { + uint32_t bit = misa_bits[i]; + + if (!(profile->misa_ext & bit)) { + continue; + } + + if (bit == RVI && !profile->enabled) { + /* + * Disabling profiles will not disable the base + * ISA RV64I. + */ + continue; + } + + cpu_misa_ext_add_user_opt(bit, profile->enabled); + riscv_cpu_write_misa_bit(cpu, bit, profile->enabled); + } + + for (i = 0; profile->ext_offsets[i] != RISCV_PROFILE_EXT_LIST_END; i++) { + ext_offset = profile->ext_offsets[i]; + + if (profile->enabled) { + if (cpu_cfg_offset_is_named_feat(ext_offset)) { + riscv_cpu_enable_named_feat(cpu, ext_offset); + } + + cpu_bump_multi_ext_priv_ver(&cpu->env, ext_offset); + } + + cpu_cfg_ext_add_user_opt(ext_offset, profile->enabled); + isa_ext_update_enabled(cpu, ext_offset, profile->enabled); + } +} + /* * We'll get here via the following path: * @@ -XXX,XX +XXX,XX @@ static void cpu_set_profile(Object *obj, Visitor *v, const char *name, RISCVCPUProfile *profile = opaque; RISCVCPU *cpu = RISCV_CPU(obj); bool value; - int i, ext_offset; if (riscv_cpu_is_vendor(obj)) { error_setg(errp, "Profile %s is not available for vendor CPUs", @@ -XXX,XX +XXX,XX @@ static void cpu_set_profile(Object *obj, Visitor *v, const char *name, } profile->user_set = true; - profile->enabled = value; - - if (profile->u_parent != NULL) { - object_property_set_bool(obj, profile->u_parent->name, - profile->enabled, NULL); - } - - if (profile->s_parent != NULL) { - object_property_set_bool(obj, profile->s_parent->name, - profile->enabled, NULL); - } - - if (profile->enabled) { - cpu->env.priv_ver = profile->priv_spec; - -#ifndef CONFIG_USER_ONLY - if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { - object_property_set_bool(obj, "mmu", true, NULL); - const char *satp_prop = satp_mode_str(profile->satp_mode, - riscv_cpu_is_32bit(cpu)); - object_property_set_bool(obj, satp_prop, true, NULL); - } -#endif - } - - for (i = 0; misa_bits[i] != 0; i++) { - uint32_t bit = misa_bits[i]; - - if (!(profile->misa_ext & bit)) { - continue; - } - if (bit == RVI && !profile->enabled) { - /* - * Disabling profiles will not disable the base - * ISA RV64I. - */ - continue; - } - - cpu_misa_ext_add_user_opt(bit, profile->enabled); - riscv_cpu_write_misa_bit(cpu, bit, profile->enabled); - } - - for (i = 0; profile->ext_offsets[i] != RISCV_PROFILE_EXT_LIST_END; i++) { - ext_offset = profile->ext_offsets[i]; - - if (profile->enabled) { - if (cpu_cfg_offset_is_named_feat(ext_offset)) { - riscv_cpu_enable_named_feat(cpu, ext_offset); - } - - cpu_bump_multi_ext_priv_ver(&cpu->env, ext_offset); - } - - cpu_cfg_ext_add_user_opt(ext_offset, profile->enabled); - isa_ext_update_enabled(cpu, ext_offset, profile->enabled); - } + riscv_cpu_set_profile(cpu, profile, value); } static void cpu_get_profile(Object *obj, Visitor *v, const char *name, @@ -XXX,XX +XXX,XX @@ static void cpu_get_profile(Object *obj, Visitor *v, const char *name, static void riscv_cpu_add_profiles(Object *cpu_obj) { for (int i = 0; riscv_profiles[i] != NULL; i++) { - const RISCVCPUProfile *profile = riscv_profiles[i]; + RISCVCPUProfile *profile = riscv_profiles[i]; object_property_add(cpu_obj, profile->name, "bool", cpu_get_profile, cpu_set_profile, @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_add_profiles(Object *cpu_obj) * case. */ if (profile->enabled) { - object_property_set_bool(cpu_obj, profile->name, true, NULL); + riscv_cpu_set_profile(RISCV_CPU(cpu_obj), profile, true); } } } -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Björn reported in [1] a case where a rv64 CPU is going through the profile code path to enable satp mode. In this case,the amount of extensions on top of the rv64 CPU made it compliant with the RVA22S64 profile during the validation of CPU 0. When the subsequent CPUs were initialized the static profile object has the 'enable' flag set, enabling the profile code path for those CPUs. This happens because we are initializing and realizing each CPU before going to the next, i.e. init and realize CPU0, then init and realize CPU1 and so on. If we change any persistent state during the validation of CPU N it will interfere with the init/realization of CPU N+1. We're using the 'enabled' profile flag to do two distinct things: inform cpu_init() that we want profile extensions to be enabled, and telling QMP that a profile is currently enabled in the CPU. We want to be flexible enough to recognize profile support for all CPUs that has the extension prerequisites, but we do not want to force the profile code path if a profile wasn't set too. Add a new 'present' flag for profiles that will coexist with the 'enabled' flag. Enabling a profile means "we want to switch on all its mandatory extensions". A profile is 'present' if we asserted during validation that the CPU has the needed prerequisites. This means that the case reported by Björn now results in RVA22S64.enabled=false and RVA22S64.present=true. QMP will recognize it as a RVA22 compliant CPU and we won't force the CPU into the profile path. [1] https://lore.kernel.org/qemu-riscv/87y0usiz22.fsf@all.your.base.are.belong.to.us/ Reported-by: Björn Töpel <bjorn@kernel.org> Fixes: 2af005d610 ("target/riscv/tcg: validate profiles during finalize") Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Reviewed-by: Björn Töpel <bjorn@rivosinc.com> Tested-by: Björn Töpel <bjorn@rivosinc.com> Message-ID: <20250528184407.1451983-4-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.h | 15 +++++++++++++++ target/riscv/riscv-qmp-cmds.c | 2 +- target/riscv/tcg/tcg-cpu.c | 11 +++-------- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -XXX,XX +XXX,XX @@ typedef struct riscv_cpu_profile { struct riscv_cpu_profile *s_parent; const char *name; uint32_t misa_ext; + /* + * The profile is enabled/disabled via command line or + * via cpu_init(). Enabling a profile will add all its + * mandatory extensions in the CPU during init(). + */ bool enabled; + /* + * The profile is present in the CPU, i.e. the current set of + * CPU extensions complies with it. A profile can be enabled + * and not present (e.g. the user disabled a mandatory extension) + * and the other way around (e.g. all mandatory extensions are + * present in a non-profile CPU). + * + * QMP uses this flag. + */ + bool present; bool user_set; int priv_spec; int satp_mode; diff --git a/target/riscv/riscv-qmp-cmds.c b/target/riscv/riscv-qmp-cmds.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/riscv-qmp-cmds.c +++ b/target/riscv/riscv-qmp-cmds.c @@ -XXX,XX +XXX,XX @@ static void riscv_obj_add_profiles_qdict(Object *obj, QDict *qdict_out) for (int i = 0; riscv_profiles[i] != NULL; i++) { profile = riscv_profiles[i]; - value = QOBJECT(qbool_from_bool(profile->enabled)); + value = QOBJECT(qbool_from_bool(profile->present)); qdict_put_obj(qdict_out, profile->name, value); } diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_check_parent_profile(RISCVCPU *cpu, RISCVCPUProfile *profile, RISCVCPUProfile *parent) { - const char *parent_name; - bool parent_enabled; - - if (!profile->enabled || !parent) { + if (!profile->present || !parent) { return; } - parent_name = parent->name; - parent_enabled = object_property_get_bool(OBJECT(cpu), parent_name, NULL); - profile->enabled = parent_enabled; + profile->present = parent->present; } static void riscv_cpu_validate_profile(RISCVCPU *cpu, @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_validate_profile(RISCVCPU *cpu, } } - profile->enabled = profile_impl; + profile->present = profile_impl; riscv_cpu_check_parent_profile(cpu, profile, profile->u_parent); riscv_cpu_check_parent_profile(cpu, profile, profile->s_parent); -- 2.50.0
From: Jay Chang <jay.chang@sifive.com> According to the RISC-V Privileged Specification (version >1.12), RV32 supports 16 CSRs (pmpcfg0–pmpcfg15) to configure 64 PMP regions (pmpaddr0–pmpaddr63). Signed-off-by: Jay Chang <jay.chang@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Message-ID: <20250522081236.4050-2-jay.chang@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu_bits.h | 60 +++++++++++++++++++ target/riscv/csr.c | 124 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 182 insertions(+), 2 deletions(-) diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -XXX,XX +XXX,XX @@ #define CSR_PMPCFG1 0x3a1 #define CSR_PMPCFG2 0x3a2 #define CSR_PMPCFG3 0x3a3 +#define CSR_PMPCFG4 0x3a4 +#define CSR_PMPCFG5 0x3a5 +#define CSR_PMPCFG6 0x3a6 +#define CSR_PMPCFG7 0x3a7 +#define CSR_PMPCFG8 0x3a8 +#define CSR_PMPCFG9 0x3a9 +#define CSR_PMPCFG10 0x3aa +#define CSR_PMPCFG11 0x3ab +#define CSR_PMPCFG12 0x3ac +#define CSR_PMPCFG13 0x3ad +#define CSR_PMPCFG14 0x3ae +#define CSR_PMPCFG15 0x3af #define CSR_PMPADDR0 0x3b0 #define CSR_PMPADDR1 0x3b1 #define CSR_PMPADDR2 0x3b2 @@ -XXX,XX +XXX,XX @@ #define CSR_PMPADDR13 0x3bd #define CSR_PMPADDR14 0x3be #define CSR_PMPADDR15 0x3bf +#define CSR_PMPADDR16 0x3c0 +#define CSR_PMPADDR17 0x3c1 +#define CSR_PMPADDR18 0x3c2 +#define CSR_PMPADDR19 0x3c3 +#define CSR_PMPADDR20 0x3c4 +#define CSR_PMPADDR21 0x3c5 +#define CSR_PMPADDR22 0x3c6 +#define CSR_PMPADDR23 0x3c7 +#define CSR_PMPADDR24 0x3c8 +#define CSR_PMPADDR25 0x3c9 +#define CSR_PMPADDR26 0x3ca +#define CSR_PMPADDR27 0x3cb +#define CSR_PMPADDR28 0x3cc +#define CSR_PMPADDR29 0x3cd +#define CSR_PMPADDR30 0x3ce +#define CSR_PMPADDR31 0x3cf +#define CSR_PMPADDR32 0x3d0 +#define CSR_PMPADDR33 0x3d1 +#define CSR_PMPADDR34 0x3d2 +#define CSR_PMPADDR35 0x3d3 +#define CSR_PMPADDR36 0x3d4 +#define CSR_PMPADDR37 0x3d5 +#define CSR_PMPADDR38 0x3d6 +#define CSR_PMPADDR39 0x3d7 +#define CSR_PMPADDR40 0x3d8 +#define CSR_PMPADDR41 0x3d9 +#define CSR_PMPADDR42 0x3da +#define CSR_PMPADDR43 0x3db +#define CSR_PMPADDR44 0x3dc +#define CSR_PMPADDR45 0x3dd +#define CSR_PMPADDR46 0x3de +#define CSR_PMPADDR47 0x3df +#define CSR_PMPADDR48 0x3e0 +#define CSR_PMPADDR49 0x3e1 +#define CSR_PMPADDR50 0x3e2 +#define CSR_PMPADDR51 0x3e3 +#define CSR_PMPADDR52 0x3e4 +#define CSR_PMPADDR53 0x3e5 +#define CSR_PMPADDR54 0x3e6 +#define CSR_PMPADDR55 0x3e7 +#define CSR_PMPADDR56 0x3e8 +#define CSR_PMPADDR57 0x3e9 +#define CSR_PMPADDR58 0x3ea +#define CSR_PMPADDR59 0x3eb +#define CSR_PMPADDR60 0x3ec +#define CSR_PMPADDR61 0x3ed +#define CSR_PMPADDR62 0x3ee +#define CSR_PMPADDR63 0x3ef /* RNMI */ #define CSR_MNSCRATCH 0x740 diff --git a/target/riscv/csr.c b/target/riscv/csr.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -XXX,XX +XXX,XX @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_PMPCFG1] = { "pmpcfg1", pmp, read_pmpcfg, write_pmpcfg }, [CSR_PMPCFG2] = { "pmpcfg2", pmp, read_pmpcfg, write_pmpcfg }, [CSR_PMPCFG3] = { "pmpcfg3", pmp, read_pmpcfg, write_pmpcfg }, + [CSR_PMPCFG4] = { "pmpcfg4", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG5] = { "pmpcfg5", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG6] = { "pmpcfg6", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG7] = { "pmpcfg7", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG8] = { "pmpcfg8", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG9] = { "pmpcfg9", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG10] = { "pmpcfg10", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG11] = { "pmpcfg11", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG12] = { "pmpcfg12", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG13] = { "pmpcfg13", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG14] = { "pmpcfg14", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG15] = { "pmpcfg15", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, [CSR_PMPADDR0] = { "pmpaddr0", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR1] = { "pmpaddr1", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR2] = { "pmpaddr2", pmp, read_pmpaddr, write_pmpaddr }, @@ -XXX,XX +XXX,XX @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_PMPADDR11] = { "pmpaddr11", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR12] = { "pmpaddr12", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR13] = { "pmpaddr13", pmp, read_pmpaddr, write_pmpaddr }, - [CSR_PMPADDR14] = { "pmpaddr14", pmp, read_pmpaddr, write_pmpaddr }, - [CSR_PMPADDR15] = { "pmpaddr15", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR14] = { "pmpaddr14", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR15] = { "pmpaddr15", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR16] = { "pmpaddr16", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR17] = { "pmpaddr17", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR18] = { "pmpaddr18", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR19] = { "pmpaddr19", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR20] = { "pmpaddr20", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR21] = { "pmpaddr21", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR22] = { "pmpaddr22", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR23] = { "pmpaddr23", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR24] = { "pmpaddr24", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR25] = { "pmpaddr25", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR26] = { "pmpaddr26", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR27] = { "pmpaddr27", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR28] = { "pmpaddr28", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR29] = { "pmpaddr29", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR30] = { "pmpaddr30", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR31] = { "pmpaddr31", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR32] = { "pmpaddr32", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR33] = { "pmpaddr33", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR34] = { "pmpaddr34", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR35] = { "pmpaddr35", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR36] = { "pmpaddr36", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR37] = { "pmpaddr37", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR38] = { "pmpaddr38", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR39] = { "pmpaddr39", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR40] = { "pmpaddr40", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR41] = { "pmpaddr41", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR42] = { "pmpaddr42", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR43] = { "pmpaddr43", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR44] = { "pmpaddr44", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR45] = { "pmpaddr45", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR46] = { "pmpaddr46", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR47] = { "pmpaddr47", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR48] = { "pmpaddr48", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR49] = { "pmpaddr49", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR50] = { "pmpaddr50", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR51] = { "pmpaddr51", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR52] = { "pmpaddr52", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR53] = { "pmpaddr53", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR54] = { "pmpaddr54", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR55] = { "pmpaddr55", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR56] = { "pmpaddr56", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR57] = { "pmpaddr57", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR58] = { "pmpaddr58", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR59] = { "pmpaddr59", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR60] = { "pmpaddr60", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR61] = { "pmpaddr61", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR62] = { "pmpaddr62", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR63] = { "pmpaddr63", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, /* Debug CSRs */ [CSR_TSELECT] = { "tselect", debug, read_tselect, write_tselect }, -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> These properties were deprecated in QEMU 8.2, commit 8043effd9b. Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250530134608.1806922-1-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.h | 1 - target/riscv/cpu.c | 17 ----------------- target/riscv/tcg/tcg-cpu.c | 31 +------------------------------ 3 files changed, 1 insertion(+), 48 deletions(-) diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -XXX,XX +XXX,XX @@ extern const RISCVCPUMultiExtConfig riscv_cpu_extensions[]; extern const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[]; extern const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[]; extern const RISCVCPUMultiExtConfig riscv_cpu_named_features[]; -extern const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[]; typedef struct isa_ext_data { const char *name; diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ const RISCVCPUMultiExtConfig riscv_cpu_named_features[] = { { }, }; -/* Deprecated entries marked for future removal */ -const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[] = { - MULTI_EXT_CFG_BOOL("Zifencei", ext_zifencei, true), - MULTI_EXT_CFG_BOOL("Zicsr", ext_zicsr, true), - MULTI_EXT_CFG_BOOL("Zihintntl", ext_zihintntl, true), - MULTI_EXT_CFG_BOOL("Zihintpause", ext_zihintpause, true), - MULTI_EXT_CFG_BOOL("Zawrs", ext_zawrs, true), - MULTI_EXT_CFG_BOOL("Zfa", ext_zfa, true), - MULTI_EXT_CFG_BOOL("Zfh", ext_zfh, false), - MULTI_EXT_CFG_BOOL("Zfhmin", ext_zfhmin, false), - MULTI_EXT_CFG_BOOL("Zve32f", ext_zve32f, false), - MULTI_EXT_CFG_BOOL("Zve64f", ext_zve64f, false), - MULTI_EXT_CFG_BOOL("Zve64d", ext_zve64d, false), - - { }, -}; - static void cpu_set_prop_err(RISCVCPU *cpu, const char *propname, Error **errp) { diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_add_profiles(Object *cpu_obj) } } -static bool cpu_ext_is_deprecated(const char *ext_name) -{ - return isupper(ext_name[0]); -} - -/* - * String will be allocated in the heap. Caller is responsible - * for freeing it. - */ -static char *cpu_ext_to_lower(const char *ext_name) -{ - char *ret = g_malloc0(strlen(ext_name) + 1); - - strcpy(ret, ext_name); - ret[0] = tolower(ret[0]); - - return ret; -} - static void cpu_set_multi_ext_cfg(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -XXX,XX +XXX,XX @@ static void cpu_set_multi_ext_cfg(Object *obj, Visitor *v, const char *name, return; } - if (cpu_ext_is_deprecated(multi_ext_cfg->name)) { - g_autofree char *lower = cpu_ext_to_lower(multi_ext_cfg->name); - - warn_report("CPU property '%s' is deprecated. Please use '%s' instead", - multi_ext_cfg->name, lower); - } - cpu_cfg_ext_add_user_opt(multi_ext_cfg->offset, value); prev_val = isa_ext_is_enabled(cpu, multi_ext_cfg->offset); @@ -XXX,XX +XXX,XX @@ static void cpu_add_multi_ext_prop(Object *cpu_obj, const RISCVCPUMultiExtConfig *multi_cfg) { bool generic_cpu = riscv_cpu_is_generic(cpu_obj); - bool deprecated_ext = cpu_ext_is_deprecated(multi_cfg->name); object_property_add(cpu_obj, multi_cfg->name, "bool", cpu_get_multi_ext_cfg, cpu_set_multi_ext_cfg, NULL, (void *)multi_cfg); - if (!generic_cpu || deprecated_ext) { + if (!generic_cpu) { return; } @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_add_user_properties(Object *obj) riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_vendor_exts); riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_experimental_exts); - riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_deprecated_exts); - riscv_cpu_add_profiles(obj); } -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> We have support for sdtrig for awhile but we are not advertising it. It is enabled by default via the 'debug' flag. Use the same flag to also advertise sdtrig. Add an exception in disable_priv_spec_isa_exts() to avoid spamming warnings for 'sdtrig' for vendor CPUs like sifive_u. Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250604174329.1147549-2-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.c | 1 + target/riscv/tcg/tcg-cpu.c | 9 +++++++++ tests/data/acpi/riscv64/virt/RHCT | Bin 400 -> 406 bytes 3 files changed, 10 insertions(+) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zvkt, PRIV_VERSION_1_12_0, ext_zvkt), ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), + ISA_EXT_DATA_ENTRY(sdtrig, PRIV_VERSION_1_12_0, debug), ISA_EXT_DATA_ENTRY(shcounterenw, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sha, PRIV_VERSION_1_12_0, ext_sha), ISA_EXT_DATA_ENTRY(shgatpa, PRIV_VERSION_1_12_0, has_priv_1_12), diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu) continue; } + /* + * cpu.debug = true is marked as 'sdtrig', priv spec 1.12. + * Skip this warning since existing CPUs with older priv + * spec and debug = true will be impacted. + */ + if (!strcmp(edata->name, "sdtrig")) { + continue; + } + isa_ext_update_enabled(cpu, edata->ext_enable_offset, false); /* diff --git a/tests/data/acpi/riscv64/virt/RHCT b/tests/data/acpi/riscv64/virt/RHCT index XXXXXXX..XXXXXXX 100644 Binary files a/tests/data/acpi/riscv64/virt/RHCT and b/tests/data/acpi/riscv64/virt/RHCT differ -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> 'ssstrict' is a RVA23 profile-defined extension defined as follows: "No non-conforming extensions are present. Attempts to execute unimplemented opcodes or access unimplemented CSRs in the standard or reserved encoding spaces raises an illegal instruction exception that results in a contained trap to the supervisor-mode trap handler." In short, we need to throw an exception when accessing unimplemented CSRs or opcodes. We do that, so let's advertise it. Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Message-ID: <20250529202315.1684198-3-dbarboza@ventanamicro.com> Message-ID: <20250604174329.1147549-3-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.c | 1 + tests/data/acpi/riscv64/virt/RHCT | Bin 406 -> 416 bytes 2 files changed, 1 insertion(+) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(ssnpm, PRIV_VERSION_1_13_0, ext_ssnpm), ISA_EXT_DATA_ENTRY(sspm, PRIV_VERSION_1_13_0, ext_sspm), ISA_EXT_DATA_ENTRY(ssstateen, PRIV_VERSION_1_12_0, ext_ssstateen), + ISA_EXT_DATA_ENTRY(ssstrict, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sstc, PRIV_VERSION_1_12_0, ext_sstc), ISA_EXT_DATA_ENTRY(sstvala, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sstvecd, PRIV_VERSION_1_12_0, has_priv_1_12), diff --git a/tests/data/acpi/riscv64/virt/RHCT b/tests/data/acpi/riscv64/virt/RHCT index XXXXXXX..XXXXXXX 100644 Binary files a/tests/data/acpi/riscv64/virt/RHCT and b/tests/data/acpi/riscv64/virt/RHCT differ -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Most of the named features are added directly in isa_edata_arr[], some of them are also added in riscv_cpu_named_features(). There is a reason for that, and the existing docs can do better explaining it. Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Message-ID: <20250529202315.1684198-4-dbarboza@ventanamicro.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250604174329.1147549-4-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = { * 'Named features' is the name we give to extensions that we * don't want to expose to users. They are either immutable * (always enabled/disable) or they'll vary depending on - * the resulting CPU state. They have riscv,isa strings - * and priv_ver like regular extensions. + * the resulting CPU state. + * + * Some of them are always enabled depending on priv version + * of the CPU and are declared directly in isa_edata_arr[]. + * The ones listed here have special checks during finalize() + * time and require their own flags like regular extensions. + * See riscv_cpu_update_named_features() for more info. */ const RISCVCPUMultiExtConfig riscv_cpu_named_features[] = { MULTI_EXT_CFG_BOOL("zic64b", ext_zic64b, true), MULTI_EXT_CFG_BOOL("ssstateen", ext_ssstateen, true), MULTI_EXT_CFG_BOOL("sha", ext_sha, true), + + /* + * 'ziccrse' has its own flag because the KVM driver + * wants to enable/disable it on its own accord. + */ MULTI_EXT_CFG_BOOL("ziccrse", ext_ziccrse, true), { }, -- 2.50.0
From: Jim Shu <jim.shu@sifive.com> Support 4-byte atomic instruction fetch when instruction is natural aligned. Current implementation is not atomic because it loads instruction twice for first and last 2 bytes. We load 4 bytes at once to keep the atomicity. This instruction preload method only applys when instruction is 4-byte aligned. If instruction is unaligned, it could be across pages so that preload will trigger additional page fault. We encounter this issue when doing pressure test of enabling & disabling Linux kernel ftrace. Ftrace with kernel preemption requires concurrent modification and execution of instruction, so non-atomic instruction fetch will cause the race condition. We may fetch the wrong instruction which is the mixing of 2 instructions. Also, RISC-V Profile wants to provide this feature by HW. RVA20U64 Ziccif protects the atomicity of instruction fetch when it is natural aligned. This commit depends on the atomic read support of translator_ld in the commit 6a9dfe1984b0c593fb0ddb52d4e70832e6201dd6. Signed-off-by: Jim Shu <jim.shu@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250508094838.19394-1-jim.shu@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/translate.c | 46 +++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/target/riscv/translate.c b/target/riscv/translate.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -XXX,XX +XXX,XX @@ const RISCVDecoder decoder_table[] = { const size_t decoder_table_size = ARRAY_SIZE(decoder_table); -static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) +static void decode_opc(CPURISCVState *env, DisasContext *ctx) { + uint32_t opcode; + bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0); + ctx->virt_inst_excp = false; - ctx->cur_insn_len = insn_len(opcode); + if (pc_is_4byte_align) { + /* + * Load 4 bytes at once to make instruction fetch atomically. + * + * Note: When pc is 4-byte aligned, 4-byte instruction wouldn't be + * across pages. We could preload 4 bytes instruction no matter + * real one is 2 or 4 bytes. Instruction preload wouldn't trigger + * additional page fault. + */ + opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next); + } else { + /* + * For unaligned pc, instruction preload may trigger additional + * page fault so we only load 2 bytes here. + */ + opcode = (uint32_t) translator_lduw(env, &ctx->base, ctx->base.pc_next); + } + ctx->ol = ctx->xl; + + ctx->cur_insn_len = insn_len((uint16_t)opcode); /* Check for compressed insn */ if (ctx->cur_insn_len == 2) { - ctx->opcode = opcode; + ctx->opcode = (uint16_t)opcode; /* * The Zca extension is added as way to refer to instructions in the C * extension that do not include the floating-point loads and stores @@ -XXX,XX +XXX,XX @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) return; } } else { - uint32_t opcode32 = opcode; - opcode32 = deposit32(opcode32, 16, 16, - translator_lduw(env, &ctx->base, - ctx->base.pc_next + 2)); - ctx->opcode = opcode32; + if (!pc_is_4byte_align) { + /* Load last 2 bytes of instruction here */ + opcode = deposit32(opcode, 16, 16, + translator_lduw(env, &ctx->base, + ctx->base.pc_next + 2)); + } + ctx->opcode = opcode; for (guint i = 0; i < ctx->decoders->len; ++i) { riscv_cpu_decode_fn func = g_ptr_array_index(ctx->decoders, i); - if (func(ctx, opcode32)) { + if (func(ctx, opcode)) { return; } } @@ -XXX,XX +XXX,XX @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *ctx = container_of(dcbase, DisasContext, base); CPURISCVState *env = cpu_env(cpu); - uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next); - ctx->ol = ctx->xl; - decode_opc(env, ctx, opcode16); + decode_opc(env, ctx); ctx->base.pc_next += ctx->cur_insn_len; /* -- 2.50.0
From: Meng Zhuo <mengzhuo@iscas.ac.cn> This patch adds max_satp_mode from host kvm cpu setting. Tested on: Milkv Megrez (Eswin 7700x) Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2931 Signed-off-by: Meng Zhuo <mengzhuo@iscas.ac.cn> Message-ID: <20250606034250.181707-1-mengzhuo@iscas.ac.cn> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/kvm/kvm-cpu.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -XXX,XX +XXX,XX @@ static void kvm_riscv_destroy_scratch_vcpu(KVMScratchCPU *scratch) close(scratch->kvmfd); } +static void kvm_riscv_init_max_satp_mode(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) +{ + struct kvm_one_reg reg; + int ret; + + reg.id = RISCV_CONFIG_REG(satp_mode); + reg.addr = (uint64_t)&cpu->cfg.max_satp_mode; + ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); + if (ret != 0) { + error_report("Unable to retrieve satp mode from host, error %d", ret); + } +} + static void kvm_riscv_init_machine_ids(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) { struct kvm_one_reg reg; @@ -XXX,XX +XXX,XX @@ static void riscv_init_kvm_registers(Object *cpu_obj) kvm_riscv_init_machine_ids(cpu, &kvmcpu); kvm_riscv_init_misa_ext_mask(cpu, &kvmcpu); kvm_riscv_init_cfg(cpu, &kvmcpu); + kvm_riscv_init_max_satp_mode(cpu, &kvmcpu); kvm_riscv_destroy_scratch_vcpu(&kvmcpu); } @@ -XXX,XX +XXX,XX @@ static bool kvm_cpu_realize(CPUState *cs, Error **errp) } } - return true; + return true; } void riscv_kvm_cpu_finalize_features(RISCVCPU *cpu, Error **errp) -- 2.50.0
From: Jay Chang <jay.chang@sifive.com> Previously, the number of PMP regions was hardcoded to 16 in QEMU. This patch replaces the fixed value with a new `pmp_regions` field, allowing platforms to configure the number of PMP regions. If no specific value is provided, the default number of PMP regions remains 16 to preserve the existing behavior. A new CPU parameter num-pmp-regions has been introduced to the QEMU command line. For example: -cpu rv64, g=true, c=true, pmp=true, num-pmp-regions=8 Signed-off-by: Jay Chang <jay.chang@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250606072525.17313-3-jay.chang@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu.h | 3 +- target/riscv/cpu_cfg_fields.h.inc | 1 + target/riscv/cpu.c | 48 +++++++++++++++++++++++++++++-- target/riscv/csr.c | 5 +++- target/riscv/machine.c | 3 +- target/riscv/pmp.c | 28 ++++++++++++------ 6 files changed, 74 insertions(+), 14 deletions(-) diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -XXX,XX +XXX,XX @@ extern RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[]; #define MMU_USER_IDX 3 -#define MAX_RISCV_PMPS (16) +#define MAX_RISCV_PMPS (64) +#define OLD_MAX_RISCV_PMPS (16) #if !defined(CONFIG_USER_ONLY) #include "pmp.h" diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_cfg_fields.h.inc +++ b/target/riscv/cpu_cfg_fields.h.inc @@ -XXX,XX +XXX,XX @@ TYPED_FIELD(uint16_t, elen, 0) TYPED_FIELD(uint16_t, cbom_blocksize, 0) TYPED_FIELD(uint16_t, cbop_blocksize, 0) TYPED_FIELD(uint16_t, cboz_blocksize, 0) +TYPED_FIELD(uint8_t, pmp_regions, 0) TYPED_FIELD(int8_t, max_satp_mode, -1) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ static void riscv_cpu_init(Object *obj) cpu->cfg.cbom_blocksize = 64; cpu->cfg.cbop_blocksize = 64; cpu->cfg.cboz_blocksize = 64; + cpu->cfg.pmp_regions = 16; cpu->env.vext_ver = VEXT_VERSION_1_00_0; cpu->cfg.max_satp_mode = -1; @@ -XXX,XX +XXX,XX @@ static const PropertyInfo prop_pmp = { .set = prop_pmp_set, }; +static void prop_num_pmp_regions_set(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVCPU *cpu = RISCV_CPU(obj); + uint8_t value; + + visit_type_uint8(v, name, &value, errp); + + if (cpu->cfg.pmp_regions != value && riscv_cpu_is_vendor(obj)) { + cpu_set_prop_err(cpu, name, errp); + return; + } + + if (cpu->env.priv_ver < PRIV_VERSION_1_12_0 && value > OLD_MAX_RISCV_PMPS) { + error_setg(errp, "Number of PMP regions exceeds maximum available"); + return; + } else if (value > MAX_RISCV_PMPS) { + error_setg(errp, "Number of PMP regions exceeds maximum available"); + return; + } + + cpu_option_add_user_setting(name, value); + cpu->cfg.pmp_regions = value; +} + +static void prop_num_pmp_regions_get(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t value = RISCV_CPU(obj)->cfg.pmp_regions; + + visit_type_uint8(v, name, &value, errp); +} + +static const PropertyInfo prop_num_pmp_regions = { + .type = "uint8", + .description = "num-pmp-regions", + .get = prop_num_pmp_regions_get, + .set = prop_num_pmp_regions_set, +}; + static int priv_spec_from_str(const char *priv_spec_str) { int priv_version = -1; @@ -XXX,XX +XXX,XX @@ static const Property riscv_cpu_properties[] = { {.name = "mmu", .info = &prop_mmu}, {.name = "pmp", .info = &prop_pmp}, + {.name = "num-pmp-regions", .info = &prop_num_pmp_regions}, {.name = "priv_spec", .info = &prop_priv_spec}, {.name = "vext_spec", .info = &prop_vext_spec}, @@ -XXX,XX +XXX,XX @@ static const TypeInfo riscv_cpu_type_infos[] = { .cfg.max_satp_mode = VM_1_10_MBARE, .cfg.ext_zifencei = true, .cfg.ext_zicsr = true, - .cfg.pmp = true + .cfg.pmp = true, + .cfg.pmp_regions = 8 ), DEFINE_ABSTRACT_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_U, TYPE_RISCV_VENDOR_CPU, @@ -XXX,XX +XXX,XX @@ static const TypeInfo riscv_cpu_type_infos[] = { .cfg.ext_zifencei = true, .cfg.ext_zicsr = true, .cfg.mmu = true, - .cfg.pmp = true + .cfg.pmp = true, + .cfg.pmp_regions = 8 ), #if defined(TARGET_RISCV32) || \ diff --git a/target/riscv/csr.c b/target/riscv/csr.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -XXX,XX +XXX,XX @@ static RISCVException dbltrp_hmode(CPURISCVState *env, int csrno) static RISCVException pmp(CPURISCVState *env, int csrno) { if (riscv_cpu_cfg(env)->pmp) { - if (csrno <= CSR_PMPCFG3) { + int max_pmpcfg = (env->priv_ver >= PRIV_VERSION_1_12_0) ? ++ CSR_PMPCFG15 : CSR_PMPCFG3; + + if (csrno <= max_pmpcfg) { uint32_t reg_index = csrno - CSR_PMPCFG0; /* TODO: RV128 restriction check */ diff --git a/target/riscv/machine.c b/target/riscv/machine.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/machine.c +++ b/target/riscv/machine.c @@ -XXX,XX +XXX,XX @@ static int pmp_post_load(void *opaque, int version_id) RISCVCPU *cpu = opaque; CPURISCVState *env = &cpu->env; int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { pmp_update_rule_addr(env, i); } pmp_update_rule_nums(env); diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/pmp.c +++ b/target/riscv/pmp.c @@ -XXX,XX +XXX,XX @@ uint32_t pmp_get_num_rules(CPURISCVState *env) */ static inline uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t pmp_index) { - if (pmp_index < MAX_RISCV_PMPS) { + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; + + if (pmp_index < pmp_regions) { return env->pmp_state.pmp[pmp_index].cfg_reg; } @@ -XXX,XX +XXX,XX @@ static inline uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t pmp_index) */ static bool pmp_write_cfg(CPURISCVState *env, uint32_t pmp_index, uint8_t val) { - if (pmp_index < MAX_RISCV_PMPS) { + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; + + if (pmp_index < pmp_regions) { if (env->pmp_state.pmp[pmp_index].cfg_reg == val) { /* no change */ return false; @@ -XXX,XX +XXX,XX @@ void pmp_update_rule_addr(CPURISCVState *env, uint32_t pmp_index) void pmp_update_rule_nums(CPURISCVState *env) { int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; env->pmp_state.num_rules = 0; - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { const uint8_t a_field = pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg); if (PMP_AMATCH_OFF != a_field) { @@ -XXX,XX +XXX,XX @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, int pmp_size = 0; hwaddr s = 0; hwaddr e = 0; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* Short cut if no rules */ if (0 == pmp_get_num_rules(env)) { @@ -XXX,XX +XXX,XX @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, * 1.10 draft priv spec states there is an implicit order * from low to high */ - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { s = pmp_is_in_range(env, i, addr); e = pmp_is_in_range(env, i, addr + pmp_size - 1); @@ -XXX,XX +XXX,XX @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, { trace_pmpaddr_csr_write(env->mhartid, addr_index, val); bool is_next_cfg_tor = false; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - if (addr_index < MAX_RISCV_PMPS) { + if (addr_index < pmp_regions) { if (env->pmp_state.pmp[addr_index].addr_reg == val) { /* no change */ return; @@ -XXX,XX +XXX,XX @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, * In TOR mode, need to check the lock bit of the next pmp * (if there is a next). */ - if (addr_index + 1 < MAX_RISCV_PMPS) { + if (addr_index + 1 < pmp_regions) { uint8_t pmp_cfg = env->pmp_state.pmp[addr_index + 1].cfg_reg; is_next_cfg_tor = PMP_AMATCH_TOR == pmp_get_a_field(pmp_cfg); @@ -XXX,XX +XXX,XX @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, target_ulong pmpaddr_csr_read(CPURISCVState *env, uint32_t addr_index) { target_ulong val = 0; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - if (addr_index < MAX_RISCV_PMPS) { + if (addr_index < pmp_regions) { val = env->pmp_state.pmp[addr_index].addr_reg; trace_pmpaddr_csr_read(env->mhartid, addr_index, val); } else { @@ -XXX,XX +XXX,XX @@ void mseccfg_csr_write(CPURISCVState *env, target_ulong val) { int i; uint64_t mask = MSECCFG_MMWP | MSECCFG_MML; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* Update PMM field only if the value is valid according to Zjpm v1.0 */ if (riscv_cpu_cfg(env)->ext_smmpm && riscv_cpu_mxl(env) == MXL_RV64 && @@ -XXX,XX +XXX,XX @@ void mseccfg_csr_write(CPURISCVState *env, target_ulong val) /* RLB cannot be enabled if it's already 0 and if any regions are locked */ if (!MSECCFG_RLB_ISSET(env)) { - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { if (pmp_is_locked(env, i)) { val &= ~MSECCFG_RLB; break; @@ -XXX,XX +XXX,XX @@ target_ulong pmp_get_tlb_size(CPURISCVState *env, hwaddr addr) hwaddr tlb_sa = addr & ~(TARGET_PAGE_SIZE - 1); hwaddr tlb_ea = tlb_sa + TARGET_PAGE_SIZE - 1; int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* * If PMP is not supported or there are no PMP rules, the TLB page will not @@ -XXX,XX +XXX,XX @@ target_ulong pmp_get_tlb_size(CPURISCVState *env, hwaddr addr) return TARGET_PAGE_SIZE; } - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { if (pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg) == PMP_AMATCH_OFF) { continue; } -- 2.50.0
From: Nutty Liu <liujingqi@lanxincomputing.com> The original implementation incorrectly performed a bitwise AND operation between the PPN of iova and PPN Mask, leading to an incorrect PPN field in Translation-reponse register. The PPN of iova should be set entirely in the PPN field of Translation-reponse register. Also remove the code that was used to clear S field since this field is already zero. Signed-off-by: Nutty Liu <liujingqi@lanxincomputing.com> Reviewed-by: Tomasz Jeznach <tjeznach@rivosinc.com> Message-ID: <20250605124848.1248-1-liujingqi@lanxincomputing.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/riscv-iommu.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/riscv-iommu.c +++ b/hw/riscv/riscv-iommu.c @@ -XXX,XX +XXX,XX @@ static void riscv_iommu_process_dbg(RISCVIOMMUState *s) iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); } else { iova = iotlb.translated_addr & ~iotlb.addr_mask; - iova >>= TARGET_PAGE_BITS; - iova &= RISCV_IOMMU_TR_RESPONSE_PPN; - - /* We do not support superpages (> 4kbs) for now */ - iova &= ~RISCV_IOMMU_TR_RESPONSE_S; + iova = set_field(0, RISCV_IOMMU_TR_RESPONSE_PPN, PPN_DOWN(iova)); } riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); } -- 2.50.0
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com> The SBI spec states, for console write byte: "This is a blocking SBI call and it will only return after writing the specified byte to the debug console. It will also return, with SBI_ERR_FAILED, if there are I/O errors." Being a blocker call will either succeed writing the byte or error out, it's feasible to use the blocking qemu_chr_fe_write_all() instead of qemu_chr_fe_write(). Last but not the least, we will duck possible changes in qemu_chr_fe_write() where ret = 0 will have a 'zero byte written' semantic [1] - something that we're not ready to deal in this current state. [1] https://lore.kernel.org/qemu-devel/CAFEAcA_kEndvNtw4EHySXWwQPoGs029yAzZGGBcV=zGHaj7KUQ@mail.gmail.com/ Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> Message-ID: <20250605094456.1385105-2-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/kvm/kvm-cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -XXX,XX +XXX,XX @@ static void kvm_riscv_handle_sbi_dbcn(CPUState *cs, struct kvm_run *run) break; case SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: ch = run->riscv_sbi.args[0]; - ret = qemu_chr_fe_write(serial_hd(0)->be, &ch, sizeof(ch)); + ret = qemu_chr_fe_write_all(serial_hd(0)->be, &ch, sizeof(ch)); if (ret < 0) { error_report("SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: error when " -- 2.50.0
From: Anton Blanchard <antonb@tenstorrent.com> fcvt.s.bf16 uses the FP16 check_nanbox_h() which returns an FP16 quiet NaN. Add check_nanbox_bf16() which returns a BF16 quiet NaN. Signed-off-by: Anton Blanchard <antonb@tenstorrent.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250501114253.594887-1-antonb@tenstorrent.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/internals.h | 16 ++++++++++++++++ target/riscv/fpu_helper.c | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/target/riscv/internals.h b/target/riscv/internals.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -XXX,XX +XXX,XX @@ static inline float16 check_nanbox_h(CPURISCVState *env, uint64_t f) } } +static inline float16 check_nanbox_bf16(CPURISCVState *env, uint64_t f) +{ + /* Disable nanbox check when enable zfinx */ + if (env_archcpu(env)->cfg.ext_zfinx) { + return (uint16_t)f; + } + + uint64_t mask = MAKE_64BIT_MASK(16, 48); + + if (likely((f & mask) == mask)) { + return (uint16_t)f; + } else { + return 0x7FC0u; /* default qnan */ + } +} + #ifndef CONFIG_USER_ONLY /* Our implementation of SysemuCPUOps::has_work */ bool riscv_cpu_has_work(CPUState *cs); diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/fpu_helper.c +++ b/target/riscv/fpu_helper.c @@ -XXX,XX +XXX,XX @@ uint64_t helper_fcvt_bf16_s(CPURISCVState *env, uint64_t rs1) uint64_t helper_fcvt_s_bf16(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_h(env, rs1); + float16 frs1 = check_nanbox_bf16(env, rs1); return nanbox_s(env, bfloat16_to_float32(frs1, &env->fp_status)); } -- 2.50.0
From: Florian Lugou <florian.lugou@provenrun.com> The current handler for TXFIFO writes schedules an async callback to pop characters from the queue. When software writes to TXFIFO faster than the async callback delay (100ns), the timer may be pushed back while the previous character has not be dequeued yet. This happens in particular when using -icount with small shift values. This is especially worrysome when software repetitively issues amoor.w instructions (as suggested by SiFive specification) and the FIFO is full, leading to the callback being infinitly pushed back. This commit fixes the issue by never pushing back the timer, only updating it if it is not already active. Signed-off-by: Florian Lugou <florian.lugou@provenrun.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250605101255.797162-1-florian.lugou@provenrun.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/char/sifive_uart.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/char/sifive_uart.c b/hw/char/sifive_uart.c index XXXXXXX..XXXXXXX 100644 --- a/hw/char/sifive_uart.c +++ b/hw/char/sifive_uart.c @@ -XXX,XX +XXX,XX @@ static void sifive_uart_write_tx_fifo(SiFiveUARTState *s, const uint8_t *buf, s->txfifo |= SIFIVE_UART_TXFIFO_FULL; } - timer_mod(s->fifo_trigger_handle, current_time + - TX_INTERRUPT_TRIGGER_DELAY_NS); + if (!timer_pending(s->fifo_trigger_handle)) { + timer_mod(s->fifo_trigger_handle, current_time + + TX_INTERRUPT_TRIGGER_DELAY_NS); + } } static uint64_t -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The address is a hardware address, so use hwaddr for consistency with the rest of the machine. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-2-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_clint(RISCVVirtState *s, int cpu; g_autofree char *clint_name = NULL; g_autofree uint32_t *clint_cells = NULL; - unsigned long clint_addr; + hwaddr clint_addr; MachineState *ms = MACHINE(s); static const char * const clint_compat[2] = { "sifive,clint0", "riscv,clint0" @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_clint(RISCVVirtState *s, } clint_addr = s->memmap[VIRT_CLINT].base + - (s->memmap[VIRT_CLINT].size * socket); - clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr); + s->memmap[VIRT_CLINT].size * socket; + clint_name = g_strdup_printf("/soc/clint@%"HWADDR_PRIx, clint_addr); qemu_fdt_add_subnode(ms->fdt, clint_name); qemu_fdt_setprop_string_array(ms->fdt, clint_name, "compatible", (char **)&clint_compat, -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-3-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_clint(RISCVVirtState *s, qemu_fdt_setprop_string_array(ms->fdt, clint_name, "compatible", (char **)&clint_compat, ARRAY_SIZE(clint_compat)); - qemu_fdt_setprop_cells(ms->fdt, clint_name, "reg", - 0x0, clint_addr, 0x0, s->memmap[VIRT_CLINT].size); + qemu_fdt_setprop_sized_cells(ms->fdt, clint_name, "reg", + 2, clint_addr, 2, s->memmap[VIRT_CLINT].size); qemu_fdt_setprop(ms->fdt, clint_name, "interrupts-extended", clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); riscv_socket_fdt_write_id(ms, clint_name, socket); -- 2.50.0
From: Joel Stanley <joel@jms.id.au> Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-4-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_memory(RISCVVirtState *s, int socket) size = riscv_socket_mem_size(ms, socket); mem_name = g_strdup_printf("/memory@%"HWADDR_PRIx, addr); qemu_fdt_add_subnode(ms->fdt, mem_name); - qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg", - addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_sized_cells(ms->fdt, mem_name, "reg", 2, addr, 2, size); qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory"); riscv_socket_fdt_write_id(ms, mem_name, socket); } -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-5-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle); } - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", - 0x0, aplic_addr, 0x0, aplic_size); + qemu_fdt_setprop_sized_cells(ms->fdt, aplic_name, "reg", + 2, aplic_addr, 2, aplic_size); qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", VIRT_IRQCHIP_NUM_SOURCES); -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-6-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aclint(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-mswi"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr, 2, RISCV_ACLINT_SWI_SIZE); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_mswi_cells, aclint_cells_size); qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0); @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aclint(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-mtimer"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr + RISCV_ACLINT_DEFAULT_MTIME, - 0x0, size - RISCV_ACLINT_DEFAULT_MTIME, - 0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP, - 0x0, RISCV_ACLINT_DEFAULT_MTIME); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr + RISCV_ACLINT_DEFAULT_MTIME, + 2, size - RISCV_ACLINT_DEFAULT_MTIME, + 2, addr + RISCV_ACLINT_DEFAULT_MTIMECMP, + 2, RISCV_ACLINT_DEFAULT_MTIME); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_mtimer_cells, aclint_cells_size); riscv_socket_fdt_write_id(ms, name, socket); @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_aclint(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-sswi"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, 0x0, s->memmap[VIRT_ACLINT_SSWI].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr, 2, s->memmap[VIRT_ACLINT_SSWI].size); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_sswi_cells, aclint_cells_size); qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0); -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-7-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_socket_plic(RISCVVirtState *s, s->soc[socket].num_harts * sizeof(uint32_t) * 4); } - qemu_fdt_setprop_cells(ms->fdt, plic_name, "reg", - 0x0, plic_addr, 0x0, s->memmap[VIRT_PLIC].size); + qemu_fdt_setprop_sized_cells(ms->fdt, plic_name, "reg", + 2, plic_addr, 2, s->memmap[VIRT_PLIC].size); qemu_fdt_setprop_cell(ms->fdt, plic_name, "riscv,ndev", VIRT_IRQCHIP_NUM_SOURCES - 1); riscv_socket_fdt_write_id(ms, plic_name, socket); -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-8-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_virtio(RISCVVirtState *s, uint32_t irq_virtio_phandle) qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "virtio,mmio"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, - 0x0, size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2, addr, 2, size); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_virtio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-9-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_reset(RISCVVirtState *s, uint32_t *phandle) qemu_fdt_setprop_string_array(ms->fdt, name, "compatible", (char **)&compat, ARRAY_SIZE(compat)); } - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, s->memmap[VIRT_TEST].base, 0x0, s->memmap[VIRT_TEST].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_TEST].base, + 2, s->memmap[VIRT_TEST].size); qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle); test_phandle = qemu_fdt_get_phandle(ms->fdt, name); g_free(name); -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-10-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_uart(RISCVVirtState *s, s->memmap[VIRT_UART0].base); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, s->memmap[VIRT_UART0].base, - 0x0, s->memmap[VIRT_UART0].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_UART0].base, + 2, s->memmap[VIRT_UART0].size); qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-11-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_rtc(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "google,goldfish-rtc"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, s->memmap[VIRT_RTC].base, 0x0, s->memmap[VIRT_RTC].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_RTC].base, + 2, s->memmap[VIRT_RTC].size); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-12-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip, qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1); qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle); - qemu_fdt_setprop_cells(fdt, iommu_node, "reg", - addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_sized_cells(fdt, iommu_node, "reg", 2, addr, 2, size); qemu_fdt_setprop_cell(fdt, iommu_node, "interrupt-parent", irq_chip); qemu_fdt_setprop_cells(fdt, iommu_node, "interrupts", -- 2.50.0
From: Joel Stanley <joel@jms.id.au> The current device tree property uses two cells for the address (and for the size), but assumes the they are less than 32 bits by hard coding the high cell to zero. Use qemu_fdt_setprop_sized_cells to do the job of splitting the upper and lower 32 bits across cells. Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Signed-off-by: Joel Stanley <joel@jms.id.au> Message-ID: <20250604025450.85327-13-joel@jms.id.au> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/virt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -XXX,XX +XXX,XX @@ static void create_fdt_pcie(RISCVVirtState *s, if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent", msi_pcie_phandle); } - qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0, - s->memmap[VIRT_PCIE_ECAM].base, 0, s->memmap[VIRT_PCIE_ECAM].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2, + s->memmap[VIRT_PCIE_ECAM].base, 2, s->memmap[VIRT_PCIE_ECAM].size); qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges", 1, FDT_PCI_RANGE_IOPORT, 2, 0, 2, s->memmap[VIRT_PCIE_PIO].base, 2, s->memmap[VIRT_PCIE_PIO].size, -- 2.50.0
From: Huang Borong <3543977024@qq.com> Add a CPU entry for the Xiangshan Kunminghu CPU, an open-source, high-performance RISC-V processor. More details can be found at: https://github.com/OpenXiangShan/XiangShan Note: The ISA extensions supported by the Xiangshan Kunminghu CPU are categorized based on four RISC-V specifications: Volume I: Unprivileged Architecture, Volume II: Privileged Architecture, AIA, and RVA23. The extensions within each category are organized according to the chapter order in the specifications. Signed-off-by: Yu Hu <huyu@bosc.ac.cn> Signed-off-by: Ran Wang <wangran@bosc.ac.cn> Signed-off-by: Borong Huang <3543977024@qq.com> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Acked-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250425122212.364-1-wangran@bosc.ac.cn> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/cpu-qom.h | 1 + target/riscv/cpu.c | 58 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu-qom.h +++ b/target/riscv/cpu-qom.h @@ -XXX,XX +XXX,XX @@ #define TYPE_RISCV_CPU_VEYRON_V1 RISCV_CPU_TYPE_NAME("veyron-v1") #define TYPE_RISCV_CPU_TT_ASCALON RISCV_CPU_TYPE_NAME("tt-ascalon") #define TYPE_RISCV_CPU_XIANGSHAN_NANHU RISCV_CPU_TYPE_NAME("xiangshan-nanhu") +#define TYPE_RISCV_CPU_XIANGSHAN_KMH RISCV_CPU_TYPE_NAME("xiangshan-kunminghu") #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host") OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ static const TypeInfo riscv_cpu_type_infos[] = { .cfg.max_satp_mode = VM_1_10_SV39, ), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_XIANGSHAN_KMH, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVG | RVC | RVB | RVS | RVU | RVH | RVV, + .priv_spec = PRIV_VERSION_1_13_0, + /* + * The RISC-V Instruction Set Manual: Volume I + * Unprivileged Architecture + */ + .cfg.ext_zicntr = true, + .cfg.ext_zihpm = true, + .cfg.ext_zihintntl = true, + .cfg.ext_zihintpause = true, + .cfg.ext_zimop = true, + .cfg.ext_zcmop = true, + .cfg.ext_zicond = true, + .cfg.ext_zawrs = true, + .cfg.ext_zacas = true, + .cfg.ext_zfh = true, + .cfg.ext_zfa = true, + .cfg.ext_zcb = true, + .cfg.ext_zbc = true, + .cfg.ext_zvfh = true, + .cfg.ext_zkn = true, + .cfg.ext_zks = true, + .cfg.ext_zkt = true, + .cfg.ext_zvbb = true, + .cfg.ext_zvkt = true, + /* + * The RISC-V Instruction Set Manual: Volume II + * Privileged Architecture + */ + .cfg.ext_smstateen = true, + .cfg.ext_smcsrind = true, + .cfg.ext_sscsrind = true, + .cfg.ext_svnapot = true, + .cfg.ext_svpbmt = true, + .cfg.ext_svinval = true, + .cfg.ext_sstc = true, + .cfg.ext_sscofpmf = true, + .cfg.ext_ssdbltrp = true, + .cfg.ext_ssnpm = true, + .cfg.ext_smnpm = true, + .cfg.ext_smmpm = true, + .cfg.ext_sspm = true, + .cfg.ext_supm = true, + /* The RISC-V Advanced Interrupt Architecture */ + .cfg.ext_smaia = true, + .cfg.ext_ssaia = true, + /* RVA23 Profiles */ + .cfg.ext_zicbom = true, + .cfg.ext_zicbop = true, + .cfg.ext_zicboz = true, + .cfg.ext_svade = true, + .cfg.mmu = true, + .cfg.pmp = true, + .cfg.max_satp_mode = VM_1_10_SV48, + ), + #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) DEFINE_RISCV_CPU(TYPE_RISCV_CPU_BASE128, TYPE_RISCV_DYNAMIC_CPU, .cfg.max_satp_mode = VM_1_10_SV57, -- 2.50.0
From: Huang Borong <3543977024@qq.com> This implementation provides emulation for the Xiangshan Kunminghu FPGA prototype platform, including support for UART, CLINT, IMSIC, and APLIC devices. More details can be found at https://github.com/OpenXiangShan/XiangShan Signed-off-by: qinshaoqing <qinshaoqing@bosc.ac.cn> Signed-off-by: Yang Wang <wangyang@bosc.ac.cn> Signed-off-by: Yu Hu <819258943@qq.com> Signed-off-by: Ran Wang <wangran@bosc.ac.cn> Signed-off-by: Borong Huang <3543977024@qq.com> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Message-ID: <20250617074222.17618-1-wangran@bosc.ac.cn> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- MAINTAINERS | 7 + docs/system/riscv/xiangshan-kunminghu.rst | 39 ++++ docs/system/target-riscv.rst | 1 + configs/devices/riscv64-softmmu/default.mak | 1 + include/hw/riscv/xiangshan_kmh.h | 68 ++++++ hw/riscv/xiangshan_kmh.c | 220 ++++++++++++++++++++ hw/riscv/Kconfig | 9 + hw/riscv/meson.build | 1 + 8 files changed, 346 insertions(+) create mode 100644 docs/system/riscv/xiangshan-kunminghu.rst create mode 100644 include/hw/riscv/xiangshan_kmh.h create mode 100644 hw/riscv/xiangshan_kmh.c diff --git a/MAINTAINERS b/MAINTAINERS index XXXXXXX..XXXXXXX 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -XXX,XX +XXX,XX @@ S: Maintained F: hw/riscv/microblaze-v-generic.c F: docs/system/riscv/microblaze-v-generic.rst +Xiangshan Kunminghu +M: Ran Wang <wangran@bosc.ac.cn> +S: Maintained +F: docs/system/riscv/xiangshan-kunminghu.rst +F: hw/riscv/xiangshan_kmh.c +F: include/hw/riscv/xiangshan_kmh.h + RX Machines ----------- rx-gdbsim diff --git a/docs/system/riscv/xiangshan-kunminghu.rst b/docs/system/riscv/xiangshan-kunminghu.rst new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/docs/system/riscv/xiangshan-kunminghu.rst @@ -XXX,XX +XXX,XX @@ +BOSC Xiangshan Kunminghu FPGA prototype platform (``xiangshan-kunminghu``) +========================================================================== +The ``xiangshan-kunminghu`` machine is compatible with our FPGA prototype +platform. + +XiangShan is an open-source high-performance RISC-V processor project. +The third generation processor is called Kunminghu. Kunminghu is a 64-bit +RV64GCBSUHV processor core. More information can be found in our Github +repository: +https://github.com/OpenXiangShan/XiangShan + +Supported devices +----------------- +The ``xiangshan-kunminghu`` machine supports the following devices: + +* Up to 16 xiangshan-kunminghu cores +* Core Local Interruptor (CLINT) +* Incoming MSI Controller (IMSIC) +* Advanced Platform-Level Interrupt Controller (APLIC) +* 1 UART + +Boot options +------------ +The ``xiangshan-kunminghu`` machine can start using the standard ``-bios`` +functionality for loading the boot image. You need to compile and link +the firmware, kernel, and Device Tree (FDT) into a single binary file, +such as ``fw_payload.bin``. + +Running +------- +Below is an example command line for running the ``xiangshan-kunminghu`` +machine: + +.. code-block:: bash + + $ qemu-system-riscv64 -machine xiangshan-kunminghu \ + -smp 16 -m 16G \ + -bios path/to/opensbi/platform/generic/firmware/fw_payload.bin \ + -nographic diff --git a/docs/system/target-riscv.rst b/docs/system/target-riscv.rst index XXXXXXX..XXXXXXX 100644 --- a/docs/system/target-riscv.rst +++ b/docs/system/target-riscv.rst @@ -XXX,XX +XXX,XX @@ undocumented; you can get a complete list by running riscv/shakti-c riscv/sifive_u riscv/virt + riscv/xiangshan-kunminghu RISC-V CPU firmware ------------------- diff --git a/configs/devices/riscv64-softmmu/default.mak b/configs/devices/riscv64-softmmu/default.mak index XXXXXXX..XXXXXXX 100644 --- a/configs/devices/riscv64-softmmu/default.mak +++ b/configs/devices/riscv64-softmmu/default.mak @@ -XXX,XX +XXX,XX @@ # CONFIG_RISCV_VIRT=n # CONFIG_MICROCHIP_PFSOC=n # CONFIG_SHAKTI_C=n +# CONFIG_XIANGSHAN_KUNMINGHU=n diff --git a/include/hw/riscv/xiangshan_kmh.h b/include/hw/riscv/xiangshan_kmh.h new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/include/hw/riscv/xiangshan_kmh.h @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * QEMU RISC-V Board Compatible with the Xiangshan Kunminghu + * FPGA prototype platform + * + * Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) + * + */ + +#ifndef HW_XIANGSHAN_KMH_H +#define HW_XIANGSHAN_KMH_H + +#include "hw/boards.h" +#include "hw/riscv/riscv_hart.h" + +#define XIANGSHAN_KMH_MAX_CPUS 16 + +typedef struct XiangshanKmhSoCState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + RISCVHartArrayState cpus; + DeviceState *irqchip; + MemoryRegion rom; +} XiangshanKmhSoCState; + +#define TYPE_XIANGSHAN_KMH_SOC "xiangshan.kunminghu.soc" +DECLARE_INSTANCE_CHECKER(XiangshanKmhSoCState, XIANGSHAN_KMH_SOC, + TYPE_XIANGSHAN_KMH_SOC) + +typedef struct XiangshanKmhState { + /*< private >*/ + MachineState parent_obj; + + /*< public >*/ + XiangshanKmhSoCState soc; +} XiangshanKmhState; + +#define TYPE_XIANGSHAN_KMH_MACHINE MACHINE_TYPE_NAME("xiangshan-kunminghu") +DECLARE_INSTANCE_CHECKER(XiangshanKmhState, XIANGSHAN_KMH_MACHINE, + TYPE_XIANGSHAN_KMH_MACHINE) + +enum { + XIANGSHAN_KMH_ROM, + XIANGSHAN_KMH_UART0, + XIANGSHAN_KMH_CLINT, + XIANGSHAN_KMH_APLIC_M, + XIANGSHAN_KMH_APLIC_S, + XIANGSHAN_KMH_IMSIC_M, + XIANGSHAN_KMH_IMSIC_S, + XIANGSHAN_KMH_DRAM, +}; + +enum { + XIANGSHAN_KMH_UART0_IRQ = 10, +}; + +/* Indicating Timebase-freq (1MHZ) */ +#define XIANGSHAN_KMH_CLINT_TIMEBASE_FREQ 1000000 + +#define XIANGSHAN_KMH_IMSIC_NUM_IDS 255 +#define XIANGSHAN_KMH_IMSIC_NUM_GUESTS 7 +#define XIANGSHAN_KMH_IMSIC_GUEST_BITS 3 + +#define XIANGSHAN_KMH_APLIC_NUM_SOURCES 96 + +#endif diff --git a/hw/riscv/xiangshan_kmh.c b/hw/riscv/xiangshan_kmh.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/hw/riscv/xiangshan_kmh.c @@ -XXX,XX +XXX,XX @@ +/* + * QEMU RISC-V Board Compatible with the Xiangshan Kunminghu + * FPGA prototype platform + * + * Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Provides a board compatible with the Xiangshan Kunminghu + * FPGA prototype platform: + * + * 0) UART (16550A) + * 1) CLINT (Core-Local Interruptor) + * 2) IMSIC (Incoming MSI Controller) + * 3) APLIC (Advanced Platform-Level Interrupt Controller) + * + * More information can be found in our Github repository: + * https://github.com/OpenXiangShan/XiangShan + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "system/address-spaces.h" +#include "hw/boards.h" +#include "hw/char/serial-mm.h" +#include "hw/intc/riscv_aclint.h" +#include "hw/intc/riscv_aplic.h" +#include "hw/intc/riscv_imsic.h" +#include "hw/qdev-properties.h" +#include "hw/riscv/boot.h" +#include "hw/riscv/xiangshan_kmh.h" +#include "hw/riscv/riscv_hart.h" +#include "system/system.h" + +static const MemMapEntry xiangshan_kmh_memmap[] = { + [XIANGSHAN_KMH_ROM] = { 0x1000, 0xF000 }, + [XIANGSHAN_KMH_UART0] = { 0x310B0000, 0x10000 }, + [XIANGSHAN_KMH_CLINT] = { 0x38000000, 0x10000 }, + [XIANGSHAN_KMH_APLIC_M] = { 0x31100000, 0x4000 }, + [XIANGSHAN_KMH_APLIC_S] = { 0x31120000, 0x4000 }, + [XIANGSHAN_KMH_IMSIC_M] = { 0x3A800000, 0x10000 }, + [XIANGSHAN_KMH_IMSIC_S] = { 0x3B000000, 0x80000 }, + [XIANGSHAN_KMH_DRAM] = { 0x80000000, 0x0 }, +}; + +static DeviceState *xiangshan_kmh_create_aia(uint32_t num_harts) +{ + int i; + const MemMapEntry *memmap = xiangshan_kmh_memmap; + hwaddr addr = 0; + DeviceState *aplic_m = NULL; + + /* M-level IMSICs */ + addr = memmap[XIANGSHAN_KMH_IMSIC_M].base; + for (i = 0; i < num_harts; i++) { + riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), i, true, + 1, XIANGSHAN_KMH_IMSIC_NUM_IDS); + } + + /* S-level IMSICs */ + addr = memmap[XIANGSHAN_KMH_IMSIC_S].base; + for (i = 0; i < num_harts; i++) { + riscv_imsic_create(addr + + i * IMSIC_HART_SIZE(XIANGSHAN_KMH_IMSIC_GUEST_BITS), + i, false, 1 + XIANGSHAN_KMH_IMSIC_GUEST_BITS, + XIANGSHAN_KMH_IMSIC_NUM_IDS); + } + + /* M-level APLIC */ + aplic_m = riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_M].base, + memmap[XIANGSHAN_KMH_APLIC_M].size, + 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES, + 1, true, true, NULL); + + /* S-level APLIC */ + riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_S].base, + memmap[XIANGSHAN_KMH_APLIC_S].size, + 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES, + 1, true, false, aplic_m); + + return aplic_m; +} + +static void xiangshan_kmh_soc_realize(DeviceState *dev, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(dev); + const MemMapEntry *memmap = xiangshan_kmh_memmap; + MemoryRegion *system_memory = get_system_memory(); + uint32_t num_harts = ms->smp.cpus; + + qdev_prop_set_uint32(DEVICE(&s->cpus), "num-harts", num_harts); + qdev_prop_set_uint32(DEVICE(&s->cpus), "hartid-base", 0); + qdev_prop_set_string(DEVICE(&s->cpus), "cpu-type", + TYPE_RISCV_CPU_XIANGSHAN_KMH); + sysbus_realize(SYS_BUS_DEVICE(&s->cpus), &error_fatal); + + /* AIA */ + s->irqchip = xiangshan_kmh_create_aia(num_harts); + + /* UART */ + serial_mm_init(system_memory, memmap[XIANGSHAN_KMH_UART0].base, 2, + qdev_get_gpio_in(s->irqchip, XIANGSHAN_KMH_UART0_IRQ), + 115200, serial_hd(0), DEVICE_LITTLE_ENDIAN); + + /* CLINT */ + riscv_aclint_swi_create(memmap[XIANGSHAN_KMH_CLINT].base, + 0, num_harts, false); + riscv_aclint_mtimer_create(memmap[XIANGSHAN_KMH_CLINT].base + + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, + 0, num_harts, RISCV_ACLINT_DEFAULT_MTIMECMP, + RISCV_ACLINT_DEFAULT_MTIME, + XIANGSHAN_KMH_CLINT_TIMEBASE_FREQ, true); + + /* ROM */ + memory_region_init_rom(&s->rom, OBJECT(dev), "xiangshan.kunminghu.rom", + memmap[XIANGSHAN_KMH_ROM].size, &error_fatal); + memory_region_add_subregion(system_memory, + memmap[XIANGSHAN_KMH_ROM].base, &s->rom); +} + +static void xiangshan_kmh_soc_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = xiangshan_kmh_soc_realize; + dc->user_creatable = false; +} + +static void xiangshan_kmh_soc_instance_init(Object *obj) +{ + XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(obj); + + object_initialize_child(obj, "cpus", &s->cpus, TYPE_RISCV_HART_ARRAY); +} + +static const TypeInfo xiangshan_kmh_soc_info = { + .name = TYPE_XIANGSHAN_KMH_SOC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(XiangshanKmhSoCState), + .instance_init = xiangshan_kmh_soc_instance_init, + .class_init = xiangshan_kmh_soc_class_init, +}; + +static void xiangshan_kmh_soc_register_types(void) +{ + type_register_static(&xiangshan_kmh_soc_info); +} +type_init(xiangshan_kmh_soc_register_types) + +static void xiangshan_kmh_machine_init(MachineState *machine) +{ + XiangshanKmhState *s = XIANGSHAN_KMH_MACHINE(machine); + const MemMapEntry *memmap = xiangshan_kmh_memmap; + MemoryRegion *system_memory = get_system_memory(); + hwaddr start_addr = memmap[XIANGSHAN_KMH_DRAM].base; + + /* Initialize SoC */ + object_initialize_child(OBJECT(machine), "soc", &s->soc, + TYPE_XIANGSHAN_KMH_SOC); + qdev_realize(DEVICE(&s->soc), NULL, &error_fatal); + + /* Register RAM */ + memory_region_add_subregion(system_memory, + memmap[XIANGSHAN_KMH_DRAM].base, + machine->ram); + + /* ROM reset vector */ + riscv_setup_rom_reset_vec(machine, &s->soc.cpus, + start_addr, + memmap[XIANGSHAN_KMH_ROM].base, + memmap[XIANGSHAN_KMH_ROM].size, 0, 0); + if (machine->firmware) { + riscv_load_firmware(machine->firmware, &start_addr, NULL); + } + + /* Note: dtb has been integrated into firmware(OpenSBI) when compiling */ +} + +static void xiangshan_kmh_machine_class_init(ObjectClass *klass, const void *data) +{ + MachineClass *mc = MACHINE_CLASS(klass); + static const char *const valid_cpu_types[] = { + TYPE_RISCV_CPU_XIANGSHAN_KMH, + NULL + }; + + mc->desc = "RISC-V Board compatible with the Xiangshan " \ + "Kunminghu FPGA prototype platform"; + mc->init = xiangshan_kmh_machine_init; + mc->max_cpus = XIANGSHAN_KMH_MAX_CPUS; + mc->default_cpu_type = TYPE_RISCV_CPU_XIANGSHAN_KMH; + mc->valid_cpu_types = valid_cpu_types; + mc->default_ram_id = "xiangshan.kunminghu.ram"; +} + +static const TypeInfo xiangshan_kmh_machine_info = { + .name = TYPE_XIANGSHAN_KMH_MACHINE, + .parent = TYPE_MACHINE, + .instance_size = sizeof(XiangshanKmhState), + .class_init = xiangshan_kmh_machine_class_init, +}; + +static void xiangshan_kmh_machine_register_types(void) +{ + type_register_static(&xiangshan_kmh_machine_info); +} +type_init(xiangshan_kmh_machine_register_types) diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -XXX,XX +XXX,XX @@ config SPIKE select HTIF select RISCV_ACLINT select SIFIVE_PLIC + +config XIANGSHAN_KUNMINGHU + bool + default y + depends on RISCV64 + select RISCV_ACLINT + select RISCV_APLIC + select RISCV_IMSIC + select SERIAL_MM diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/meson.build +++ b/hw/riscv/meson.build @@ -XXX,XX +XXX,XX @@ riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c')) riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files( 'riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c', 'riscv-iommu-hpm.c')) riscv_ss.add(when: 'CONFIG_MICROBLAZE_V', if_true: files('microblaze-v-generic.c')) +riscv_ss.add(when: 'CONFIG_XIANGSHAN_KUNMINGHU', if_true: files('xiangshan_kmh.c')) hw_arch += {'riscv': riscv_ss} -- 2.50.0
From: Max Chou <max.chou@sifive.com> According to the V spec, the vector fault-only-first load instructions may change the VL CSR. So the ldff_trans TCG translation function should generate the lookup_and_goto_ptr flow as the vsetvl/vsetvli translation function to make sure the vl_eq_vlmax TB flag is correct. Signed-off-by: Max Chou <max.chou@sifive.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-ID: <20250627133013.443997-1-max.chou@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/insn_trans/trans_rvv.c.inc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, fn(dest, mask, base, tcg_env, desc); finalize_rvv_inst(s); + + /* vector unit-stride fault-only-first load may modify vl CSR */ + gen_update_pc(s, s->cur_insn_len); + lookup_and_goto_ptr(s); + s->base.is_jmp = DISAS_NORETURN; + return true; } -- 2.50.0
From: "liu.xuemei1@zte.com.cn" <liu.xuemei1@zte.com.cn> Address an error in migration when aia is configured as 'aplic-imsic' in riscv kvm vm by adding riscv_aplic_state_needed() and riscv_imsic_state_needed() to determine whether the corresponding sates are needed. Previously, the fields in the vmsds of 'riscv_aplic' and 'riscv_imsic' can only be initialized under certain special conditions in commit 95a97b3fd2. However, the corresponding ses of these vmsds are inserted into the savevm_state.handlers unconditionally. This led to migration failure characterized by uninitialized fields when save vm state: qemu-system-riscv64: ../migration/vmstate.c:433: vmstate_save_state_v: Assertion 'first_elem || !n_elems || !size' failed. Fixes: 95a97b3fd2 ("target/riscv: update APLIC and IMSIC to support KVM AIA") Signed-off-by: Xuemei Liu <liu.xuemei1@zte.com.cn> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250616150034827wuHs_ffe3Qm8cqFXT7HeW@zte.com.cn> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/intc/riscv_aplic.c | 12 ++++++++++-- hw/intc/riscv_imsic.c | 10 ++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index XXXXXXX..XXXXXXX 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -XXX,XX +XXX,XX @@ static const Property riscv_aplic_properties[] = { DEFINE_PROP_BOOL("mmode", RISCVAPLICState, mmode, 0), }; +static bool riscv_aplic_state_needed(void *opaque) +{ + RISCVAPLICState *aplic = opaque; + + return riscv_use_emulated_aplic(aplic->msimode); +} + static const VMStateDescription vmstate_riscv_aplic = { .name = "riscv_aplic", - .version_id = 2, - .minimum_version_id = 2, + .version_id = 3, + .minimum_version_id = 3, + .needed = riscv_aplic_state_needed, .fields = (const VMStateField[]) { VMSTATE_UINT32(domaincfg, RISCVAPLICState), VMSTATE_UINT32(mmsicfgaddr, RISCVAPLICState), diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c index XXXXXXX..XXXXXXX 100644 --- a/hw/intc/riscv_imsic.c +++ b/hw/intc/riscv_imsic.c @@ -XXX,XX +XXX,XX @@ static const Property riscv_imsic_properties[] = { DEFINE_PROP_UINT32("num-irqs", RISCVIMSICState, num_irqs, 0), }; +static bool riscv_imsic_state_needed(void *opaque) +{ + return !kvm_irqchip_in_kernel(); +} + static const VMStateDescription vmstate_riscv_imsic = { .name = "riscv_imsic", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, + .needed = riscv_imsic_state_needed, .fields = (const VMStateField[]) { VMSTATE_VARRAY_UINT32(eidelivery, RISCVIMSICState, num_pages, 0, -- 2.50.0
From: Charalampos Mitrodimas <charmitro@posteo.net> According to the RISC-V Privileged Architecture specification, the low bit of MEPC/SEPC must always be zero. When IALIGN=32, the two low bits must be zero. This commit fixes the behavior of MEPC/SEPC CSR reads and writes, and the implicit reads by MRET/SRET instructions to properly mask the lowest bit(s) based on whether the C extension is enabled: - When C extension is enabled (IALIGN=16): mask bit 0 - When C extension is disabled (IALIGN=32): mask bits [1:0] Previously, when vectored mode bits from STVEC (which sets bit 0 for vectored mode) were written to MEPC, the bits would not be cleared correctly, causing incorrect behavior on MRET. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2855 Signed-off-by: Charalampos Mitrodimas <charmitro@posteo.net> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Message-ID: <20250703182157.281320-2-charmitro@posteo.net> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/internals.h | 11 +++++++++++ target/riscv/csr.c | 8 ++++---- target/riscv/op_helper.c | 4 ++-- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/target/riscv/internals.h b/target/riscv/internals.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -XXX,XX +XXX,XX @@ static inline float16 check_nanbox_bf16(CPURISCVState *env, uint64_t f) } } +static inline target_ulong get_xepc_mask(CPURISCVState *env) +{ + /* When IALIGN=32, both low bits must be zero. + * When IALIGN=16 (has C extension), only bit 0 must be zero. */ + if (riscv_has_ext(env, RVC)) { + return ~(target_ulong)1; + } else { + return ~(target_ulong)3; + } +} + #ifndef CONFIG_USER_ONLY /* Our implementation of SysemuCPUOps::has_work */ bool riscv_cpu_has_work(CPUState *cs); diff --git a/target/riscv/csr.c b/target/riscv/csr.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -XXX,XX +XXX,XX @@ static RISCVException write_mscratch(CPURISCVState *env, int csrno, static RISCVException read_mepc(CPURISCVState *env, int csrno, target_ulong *val) { - *val = env->mepc; + *val = env->mepc & get_xepc_mask(env); return RISCV_EXCP_NONE; } static RISCVException write_mepc(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { - env->mepc = val; + env->mepc = val & get_xepc_mask(env); return RISCV_EXCP_NONE; } @@ -XXX,XX +XXX,XX @@ static RISCVException write_sscratch(CPURISCVState *env, int csrno, static RISCVException read_sepc(CPURISCVState *env, int csrno, target_ulong *val) { - *val = env->sepc; + *val = env->sepc & get_xepc_mask(env); return RISCV_EXCP_NONE; } static RISCVException write_sepc(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { - env->sepc = val; + env->sepc = val & get_xepc_mask(env); return RISCV_EXCP_NONE; } diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/op_helper.c +++ b/target/riscv/op_helper.c @@ -XXX,XX +XXX,XX @@ target_ulong helper_sret(CPURISCVState *env) riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); } - target_ulong retpc = env->sepc; + target_ulong retpc = env->sepc & get_xepc_mask(env); if (!riscv_cpu_allow_16bit_insn(&env_archcpu(env)->cfg, env->priv_ver, env->misa_ext) && (retpc & 0x3)) { @@ -XXX,XX +XXX,XX @@ static target_ulong ssdbltrp_mxret(CPURISCVState *env, target_ulong mstatus, target_ulong helper_mret(CPURISCVState *env) { - target_ulong retpc = env->mepc; + target_ulong retpc = env->mepc & get_xepc_mask(env); uint64_t mstatus = env->mstatus; target_ulong prev_priv = get_field(mstatus, MSTATUS_MPP); -- 2.50.0
From: Charalampos Mitrodimas <charmitro@posteo.net> Add a regression test to verify that MEPC properly masks the lower bits when an address with mode bits is written to it, as required by the RISC-V Privileged Architecture specification. The test sets STVEC to an address with bit 0 set (vectored mode), triggers an illegal instruction exception, copies STVEC to MEPC in the trap handler, and verifies that MEPC masks bits [1:0] correctly for IALIGN=32. Without the fix, MEPC retains the mode bits (returns non-zero/FAIL). With the fix, MEPC clears bits [1:0] (returns 0/PASS). Signed-off-by: Charalampos Mitrodimas <charmitro@posteo.net> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Message-ID: <20250703182157.281320-3-charmitro@posteo.net> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- tests/tcg/riscv64/Makefile.softmmu-target | 4 ++ tests/tcg/riscv64/test-mepc-masking.S | 73 +++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 tests/tcg/riscv64/test-mepc-masking.S diff --git a/tests/tcg/riscv64/Makefile.softmmu-target b/tests/tcg/riscv64/Makefile.softmmu-target index XXXXXXX..XXXXXXX 100644 --- a/tests/tcg/riscv64/Makefile.softmmu-target +++ b/tests/tcg/riscv64/Makefile.softmmu-target @@ -XXX,XX +XXX,XX @@ EXTRA_RUNS += run-issue1060 run-issue1060: issue1060 $(call run-test, $<, $(QEMU) $(QEMU_OPTS)$<) +EXTRA_RUNS += run-test-mepc-masking +run-test-mepc-masking: test-mepc-masking + $(call run-test, $<, $(QEMU) $(QEMU_OPTS)$<) + # We don't currently support the multiarch system tests undefine MULTIARCH_TESTS diff --git a/tests/tcg/riscv64/test-mepc-masking.S b/tests/tcg/riscv64/test-mepc-masking.S new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/tests/tcg/riscv64/test-mepc-masking.S @@ -XXX,XX +XXX,XX @@ +/* + * Test for MEPC masking bug fix + * + * This test verifies that MEPC properly masks the lower bits according + * to the RISC-V specification when vectored mode bits from STVEC are + * written to MEPC. + */ + + .option norvc + + .text + .global _start +_start: + /* Set up machine trap vector */ + lla t0, machine_trap_handler + csrw mtvec, t0 + + /* Set STVEC with vectored mode (mode bits = 01) */ + li t0, 0x80004001 + csrw stvec, t0 + + /* Clear medeleg to handle exceptions in M-mode */ + csrw medeleg, zero + + /* Trigger illegal instruction exception */ + .word 0xffffffff + +test_completed: + /* Exit with result in a0 */ + /* a0 = 0: success (bits [1:0] were masked) */ + /* a0 != 0: failure (some bits were not masked) */ + j _exit + +machine_trap_handler: + /* Check if illegal instruction (mcause = 2) */ + csrr t0, mcause + li t1, 2 + bne t0, t1, skip_test + + /* Test: Copy STVEC (with mode bits) to MEPC */ + csrr t0, stvec /* t0 = 0x80004001 */ + csrw mepc, t0 /* Write to MEPC */ + csrr t1, mepc /* Read back MEPC */ + + /* Check if bits [1:0] are masked (IALIGN=32 without RVC) */ + andi a0, t1, 3 /* a0 = 0 if both bits masked correctly */ + + /* Set correct return address */ + lla t0, test_completed + csrw mepc, t0 + +skip_test: + mret + +/* Exit with semihosting */ +_exit: + lla a1, semiargs + li t0, 0x20026 /* ADP_Stopped_ApplicationExit */ + sd t0, 0(a1) + sd a0, 8(a1) + li a0, 0x20 /* TARGET_SYS_EXIT_EXTENDED */ + + /* Semihosting call sequence */ + .balign 16 + slli zero, zero, 0x1f + ebreak + srai zero, zero, 0x7 + j . + + .data + .balign 8 +semiargs: + .space 16 -- 2.50.0
From: Vasilis Liaskovitis <vliaskovitis@suse.com> Usage of vsetvli instruction is reserved if VLMAX is changed when vsetvli rs1 and rd arguments are x0. In this case, if the new property is true, only the vill bit will be set. See https://github.com/riscv/riscv-isa-manual/blob/main/src/v-st-ext.adoc#avl-encoding According to the spec, the above use cases are reserved, and "Implementations may set vill in either case." Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2422 Signed-off-by: Vasilis Liaskovitis <vliaskovitis@suse.com> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Message-ID: <20250618213542.22873-1-vliaskovitis@suse.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- target/riscv/helper.h | 2 +- target/riscv/cpu_cfg_fields.h.inc | 1 + target/riscv/cpu.c | 1 + target/riscv/vector_helper.c | 12 +++++++++++- target/riscv/insn_trans/trans_rvv.c.inc | 4 ++-- 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(hyp_hsv_d, TCG_CALL_NO_WG, void, env, tl, tl) #endif /* Vector functions */ -DEF_HELPER_3(vsetvl, tl, env, tl, tl) +DEF_HELPER_4(vsetvl, tl, env, tl, tl, tl) DEF_HELPER_5(vle8_v, void, ptr, ptr, tl, env, i32) DEF_HELPER_5(vle16_v, void, ptr, ptr, tl, env, i32) DEF_HELPER_5(vle32_v, void, ptr, ptr, tl, env, i32) diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_cfg_fields.h.inc +++ b/target/riscv/cpu_cfg_fields.h.inc @@ -XXX,XX +XXX,XX @@ BOOL_FIELD(ext_supm) BOOL_FIELD(rvv_ta_all_1s) BOOL_FIELD(rvv_ma_all_1s) BOOL_FIELD(rvv_vl_half_avl) +BOOL_FIELD(rvv_vsetvl_x0_vill) /* Named features */ BOOL_FIELD(ext_svade) BOOL_FIELD(ext_zic64b) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ static const Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false), DEFINE_PROP_BOOL("rvv_ma_all_1s", RISCVCPU, cfg.rvv_ma_all_1s, false), DEFINE_PROP_BOOL("rvv_vl_half_avl", RISCVCPU, cfg.rvv_vl_half_avl, false), + DEFINE_PROP_BOOL("rvv_vsetvl_x0_vill", RISCVCPU, cfg.rvv_vsetvl_x0_vill, false), /* * write_misa() is marked as experimental for now so mark diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -XXX,XX +XXX,XX @@ #include <math.h> target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, - target_ulong s2) + target_ulong s2, target_ulong x0) { int vlmax, vl; RISCVCPU *cpu = env_archcpu(env); @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, } else { vl = vlmax; } + + if (cpu->cfg.rvv_vsetvl_x0_vill && x0 && (env->vl != vl)) { + /* only set vill bit. */ + env->vill = 1; + env->vtype = 0; + env->vl = 0; + env->vstart = 0; + return 0; + } + env->vl = vl; env->vtype = s2; env->vstart = 0; diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -XXX,XX +XXX,XX @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2) s1 = get_gpr(s, rs1, EXT_ZERO); } - gen_helper_vsetvl(dst, tcg_env, s1, s2); + gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl((int) (rd == 0 && rs1 == 0))); gen_set_gpr(s, rd, dst); finalize_rvv_inst(s); @@ -XXX,XX +XXX,XX @@ static bool do_vsetivli(DisasContext *s, int rd, TCGv s1, TCGv s2) dst = dest_gpr(s, rd); - gen_helper_vsetvl(dst, tcg_env, s1, s2); + gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl(0)); gen_set_gpr(s, rd, dst); finalize_rvv_inst(s); gen_update_pc(s, s->cur_insn_len); -- 2.50.0
From: Alexandre Ghiti <alexghiti@rivosinc.com> The Svrsw60t59b extension allows to free the PTE reserved bits 60 and 59 for software to use. Reviewed-by: Deepak Gupta <debug@rivosinc.com> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> Reviewed-by: Nutty Liu<liujingqi@lanxincomputing.com> Message-ID: <20250702-dev-alex-svrsw60b59b_v2-v2-1-504ddf0f8530@rivosinc.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com> --- hw/riscv/riscv-iommu-bits.h | 1 + target/riscv/cpu_bits.h | 3 ++- target/riscv/cpu_cfg_fields.h.inc | 1 + hw/riscv/riscv-iommu.c | 3 ++- target/riscv/cpu.c | 2 ++ target/riscv/cpu_helper.c | 3 ++- target/riscv/tcg/tcg-cpu.c | 8 ++++++++ 7 files changed, 18 insertions(+), 3 deletions(-) diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/riscv-iommu-bits.h +++ b/hw/riscv/riscv-iommu-bits.h @@ -XXX,XX +XXX,XX @@ struct riscv_iommu_pq_record { #define RISCV_IOMMU_CAP_SV39 BIT_ULL(9) #define RISCV_IOMMU_CAP_SV48 BIT_ULL(10) #define RISCV_IOMMU_CAP_SV57 BIT_ULL(11) +#define RISCV_IOMMU_CAP_SVRSW60T59B BIT_ULL(14) #define RISCV_IOMMU_CAP_SV32X4 BIT_ULL(16) #define RISCV_IOMMU_CAP_SV39X4 BIT_ULL(17) #define RISCV_IOMMU_CAP_SV48X4 BIT_ULL(18) diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -XXX,XX +XXX,XX @@ typedef enum { #define PTE_SOFT 0x300 /* Reserved for Software */ #define PTE_PBMT 0x6000000000000000ULL /* Page-based memory types */ #define PTE_N 0x8000000000000000ULL /* NAPOT translation */ -#define PTE_RESERVED 0x1FC0000000000000ULL /* Reserved bits */ +#define PTE_RESERVED(svrsw60t59b) \ + (svrsw60t59b ? 0x07C0000000000000ULL : 0x1FC0000000000000ULL) /* Reserved bits */ #define PTE_ATTR (PTE_N | PTE_PBMT) /* All attributes bits */ /* Page table PPN shift amount */ diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_cfg_fields.h.inc +++ b/target/riscv/cpu_cfg_fields.h.inc @@ -XXX,XX +XXX,XX @@ BOOL_FIELD(ext_svadu) BOOL_FIELD(ext_svinval) BOOL_FIELD(ext_svnapot) BOOL_FIELD(ext_svpbmt) +BOOL_FIELD(ext_svrsw60t59b) BOOL_FIELD(ext_svvptc) BOOL_FIELD(ext_svukte) BOOL_FIELD(ext_zdinx) diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c index XXXXXXX..XXXXXXX 100644 --- a/hw/riscv/riscv-iommu.c +++ b/hw/riscv/riscv-iommu.c @@ -XXX,XX +XXX,XX @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp) } if (s->enable_g_stage) { s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | - RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; + RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4 | + RISCV_IOMMU_CAP_SVRSW60T59B; } if (s->hpm_cntrs > 0) { diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -XXX,XX +XXX,XX @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(svinval, PRIV_VERSION_1_12_0, ext_svinval), ISA_EXT_DATA_ENTRY(svnapot, PRIV_VERSION_1_12_0, ext_svnapot), ISA_EXT_DATA_ENTRY(svpbmt, PRIV_VERSION_1_12_0, ext_svpbmt), + ISA_EXT_DATA_ENTRY(svrsw60t59b, PRIV_VERSION_1_13_0, ext_svrsw60t59b), ISA_EXT_DATA_ENTRY(svukte, PRIV_VERSION_1_13_0, ext_svukte), ISA_EXT_DATA_ENTRY(svvptc, PRIV_VERSION_1_13_0, ext_svvptc), ISA_EXT_DATA_ENTRY(xtheadba, PRIV_VERSION_1_11_0, ext_xtheadba), @@ -XXX,XX +XXX,XX @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = { MULTI_EXT_CFG_BOOL("svinval", ext_svinval, false), MULTI_EXT_CFG_BOOL("svnapot", ext_svnapot, false), MULTI_EXT_CFG_BOOL("svpbmt", ext_svpbmt, false), + MULTI_EXT_CFG_BOOL("svrsw60t59b", ext_svrsw60t59b, false), MULTI_EXT_CFG_BOOL("svvptc", ext_svvptc, true), MULTI_EXT_CFG_BOOL("zicntr", ext_zicntr, true), diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -XXX,XX +XXX,XX @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, bool svade = riscv_cpu_cfg(env)->ext_svade; bool svadu = riscv_cpu_cfg(env)->ext_svadu; bool adue = svadu ? env->menvcfg & MENVCFG_ADUE : !svade; + bool svrsw60t59b = riscv_cpu_cfg(env)->ext_svrsw60t59b; if (first_stage && two_stage && env->virt_enabled) { pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE); @@ -XXX,XX +XXX,XX @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, if (riscv_cpu_sxl(env) == MXL_RV32) { ppn = pte >> PTE_PPN_SHIFT; } else { - if (pte & PTE_RESERVED) { + if (pte & PTE_RESERVED(svrsw60t59b)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: reserved bits set in PTE: " "addr: 0x%" HWADDR_PRIx " pte: 0x" TARGET_FMT_lx "\n", __func__, pte_addr, pte); diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -XXX,XX +XXX,XX @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) cpu->cfg.ext_ssctr = false; } + if (cpu->cfg.ext_svrsw60t59b && + (!cpu->cfg.mmu || mcc->def->misa_mxl_max == MXL_RV32)) { + error_setg(errp, "svrsw60t59b is not supported on RV32 and MMU-less platforms"); + return; + } + /* * Disable isa extensions based on priv spec after we * validated and set everything we need. @@ -XXX,XX +XXX,XX @@ static void riscv_init_max_cpu_extensions(Object *obj) if (env->misa_mxl != MXL_RV32) { isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_zcf), false); + } else { + isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_svrsw60t59b), false); } /* -- 2.50.0