[PATCH qemu v6 06/10] target/riscv: rvv: Add mask agnostic for vector fix-point arithmetic instructions

~eopxd posted 10 patches 3 years, 7 months ago
Maintainers: Palmer Dabbelt <palmer@dabbelt.com>, Alistair Francis <alistair.francis@wdc.com>, Bin Meng <bin.meng@windriver.com>
[PATCH qemu v6 06/10] target/riscv: rvv: Add mask agnostic for vector fix-point arithmetic instructions
Posted by ~eopxd 3 years, 10 months ago
From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>

Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
---
 target/riscv/vector_helper.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 07ce671879..597fa9c752 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -2129,10 +2129,12 @@ static inline void
 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
              CPURISCVState *env,
              uint32_t vl, uint32_t vm, int vxrm,
-             opivv2_rm_fn *fn)
+             opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
 {
     for (uint32_t i = env->vstart; i < vl; i++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
             continue;
         }
         fn(vd, vs1, vs2, i, env, vxrm);
@@ -2150,23 +2152,24 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
     uint32_t vl = env->vl;
     uint32_t total_elems = vext_get_total_elems(env, desc, esz);
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
 
     switch (env->vxrm) {
     case 0: /* rnu */
         vext_vv_rm_1(vd, v0, vs1, vs2,
-                     env, vl, vm, 0, fn);
+                     env, vl, vm, 0, fn, vma, esz);
         break;
     case 1: /* rne */
         vext_vv_rm_1(vd, v0, vs1, vs2,
-                     env, vl, vm, 1, fn);
+                     env, vl, vm, 1, fn, vma, esz);
         break;
     case 2: /* rdn */
         vext_vv_rm_1(vd, v0, vs1, vs2,
-                     env, vl, vm, 2, fn);
+                     env, vl, vm, 2, fn, vma, esz);
         break;
     default: /* rod */
         vext_vv_rm_1(vd, v0, vs1, vs2,
-                     env, vl, vm, 3, fn);
+                     env, vl, vm, 3, fn, vma, esz);
         break;
     }
     /* set tail elements to 1s */
@@ -2250,10 +2253,12 @@ static inline void
 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
              CPURISCVState *env,
              uint32_t vl, uint32_t vm, int vxrm,
-             opivx2_rm_fn *fn)
+             opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
 {
     for (uint32_t i = env->vstart; i < vl; i++) {
         if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
             continue;
         }
         fn(vd, s1, vs2, i, env, vxrm);
@@ -2271,23 +2276,24 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
     uint32_t vl = env->vl;
     uint32_t total_elems = vext_get_total_elems(env, desc, esz);
     uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
 
     switch (env->vxrm) {
     case 0: /* rnu */
         vext_vx_rm_1(vd, v0, s1, vs2,
-                     env, vl, vm, 0, fn);
+                     env, vl, vm, 0, fn, vma, esz);
         break;
     case 1: /* rne */
         vext_vx_rm_1(vd, v0, s1, vs2,
-                     env, vl, vm, 1, fn);
+                     env, vl, vm, 1, fn, vma, esz);
         break;
     case 2: /* rdn */
         vext_vx_rm_1(vd, v0, s1, vs2,
-                     env, vl, vm, 2, fn);
+                     env, vl, vm, 2, fn, vma, esz);
         break;
     default: /* rod */
         vext_vx_rm_1(vd, v0, s1, vs2,
-                     env, vl, vm, 3, fn);
+                     env, vl, vm, 3, fn, vma, esz);
         break;
     }
     /* set tail elements to 1s */
-- 
2.34.2
Re: [PATCH qemu v6 06/10] target/riscv: rvv: Add mask agnostic for vector fix-point arithmetic instructions
Posted by Alistair Francis 3 years, 6 months ago
On Mon, Jun 20, 2022 at 4:56 PM ~eopxd <eopxd@git.sr.ht> wrote:
>
> From: Yueh-Ting (eop) Chen <eop.chen@sifive.com>
>
> Signed-off-by: eop Chen <eop.chen@sifive.com>
> Reviewed-by: Frank Chang <frank.chang@sifive.com>
> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>

Acked-by: Alistair Francis <alistair.francis@wdc.com>

Alistair

> ---
>  target/riscv/vector_helper.c | 26 ++++++++++++++++----------
>  1 file changed, 16 insertions(+), 10 deletions(-)
>
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 07ce671879..597fa9c752 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -2129,10 +2129,12 @@ static inline void
>  vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
>               CPURISCVState *env,
>               uint32_t vl, uint32_t vm, int vxrm,
> -             opivv2_rm_fn *fn)
> +             opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
>  {
>      for (uint32_t i = env->vstart; i < vl; i++) {
>          if (!vm && !vext_elem_mask(v0, i)) {
> +            /* set masked-off elements to 1s */
> +            vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
>              continue;
>          }
>          fn(vd, vs1, vs2, i, env, vxrm);
> @@ -2150,23 +2152,24 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
>      uint32_t vl = env->vl;
>      uint32_t total_elems = vext_get_total_elems(env, desc, esz);
>      uint32_t vta = vext_vta(desc);
> +    uint32_t vma = vext_vma(desc);
>
>      switch (env->vxrm) {
>      case 0: /* rnu */
>          vext_vv_rm_1(vd, v0, vs1, vs2,
> -                     env, vl, vm, 0, fn);
> +                     env, vl, vm, 0, fn, vma, esz);
>          break;
>      case 1: /* rne */
>          vext_vv_rm_1(vd, v0, vs1, vs2,
> -                     env, vl, vm, 1, fn);
> +                     env, vl, vm, 1, fn, vma, esz);
>          break;
>      case 2: /* rdn */
>          vext_vv_rm_1(vd, v0, vs1, vs2,
> -                     env, vl, vm, 2, fn);
> +                     env, vl, vm, 2, fn, vma, esz);
>          break;
>      default: /* rod */
>          vext_vv_rm_1(vd, v0, vs1, vs2,
> -                     env, vl, vm, 3, fn);
> +                     env, vl, vm, 3, fn, vma, esz);
>          break;
>      }
>      /* set tail elements to 1s */
> @@ -2250,10 +2253,12 @@ static inline void
>  vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
>               CPURISCVState *env,
>               uint32_t vl, uint32_t vm, int vxrm,
> -             opivx2_rm_fn *fn)
> +             opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
>  {
>      for (uint32_t i = env->vstart; i < vl; i++) {
>          if (!vm && !vext_elem_mask(v0, i)) {
> +            /* set masked-off elements to 1s */
> +            vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
>              continue;
>          }
>          fn(vd, s1, vs2, i, env, vxrm);
> @@ -2271,23 +2276,24 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
>      uint32_t vl = env->vl;
>      uint32_t total_elems = vext_get_total_elems(env, desc, esz);
>      uint32_t vta = vext_vta(desc);
> +    uint32_t vma = vext_vma(desc);
>
>      switch (env->vxrm) {
>      case 0: /* rnu */
>          vext_vx_rm_1(vd, v0, s1, vs2,
> -                     env, vl, vm, 0, fn);
> +                     env, vl, vm, 0, fn, vma, esz);
>          break;
>      case 1: /* rne */
>          vext_vx_rm_1(vd, v0, s1, vs2,
> -                     env, vl, vm, 1, fn);
> +                     env, vl, vm, 1, fn, vma, esz);
>          break;
>      case 2: /* rdn */
>          vext_vx_rm_1(vd, v0, s1, vs2,
> -                     env, vl, vm, 2, fn);
> +                     env, vl, vm, 2, fn, vma, esz);
>          break;
>      default: /* rod */
>          vext_vx_rm_1(vd, v0, s1, vs2,
> -                     env, vl, vm, 3, fn);
> +                     env, vl, vm, 3, fn, vma, esz);
>          break;
>      }
>      /* set tail elements to 1s */
> --
> 2.34.2
>
>