[PATCH] target/riscv/vector_helper.c: Remove the check for extra tail elements

Xiao Wang posted 1 patch 11 months ago
Failed in applying to current master (apply log)
There is a newer version of this series
target/riscv/vector_helper.c | 21 ++++++---------------
1 file changed, 6 insertions(+), 15 deletions(-)
[PATCH] target/riscv/vector_helper.c: Remove the check for extra tail elements
Posted by Xiao Wang 11 months ago
Commit 752614cab8e6 ("target/riscv: rvv: Add tail agnostic for vector
load / store instructions") added an extra check for LMUL fragmentation,
intended for setting the "rest tail elements" in the last register for a
segment load insn.

Actually, the max_elements derived in vext_ld*() won't be a fraction of
vector register size, since the lmul encoded in desc is emul, which has
already been adjusted to 1 for LMUL fragmentation case by vext_get_emul()
in trans_rvv.c.inc, for ld_stride(), ld_us(), ld_index() and ldff().

Besides, vext_get_emul() has also taken EEW/SEW into consideration, so no
need to call vext_get_total_elems() which would base on the emul to derive
another emul, the second emul would be incorrect when esz differs from sew.

Thus this patch removes the check for extra tail elements.

Fixes: 752614cab8e6 ("target/riscv: rvv: Add tail agnostic for vector load / store instructions")

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
---
 target/riscv/vector_helper.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index f4d0438988..56a79bb5fa 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -264,26 +264,17 @@ GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
 
-static void vext_set_tail_elems_1s(CPURISCVState *env, target_ulong vl,
-                                   void *vd, uint32_t desc, uint32_t nf,
+static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
+                                   uint32_t desc, uint32_t nf,
                                    uint32_t esz, uint32_t max_elems)
 {
-    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
-    uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3;
     uint32_t vta = vext_vta(desc);
-    uint32_t registers_used;
     int k;
 
     for (k = 0; k < nf; ++k) {
         vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
                           (k * max_elems + max_elems) * esz);
     }
-
-    if (nf * max_elems % total_elems != 0) {
-        registers_used = ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
-        vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
-                          registers_used * vlenb);
-    }
 }
 
 /*
@@ -319,7 +310,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
     }
     env->vstart = 0;
 
-    vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
+    vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
 }
 
 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
@@ -378,7 +369,7 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
     }
     env->vstart = 0;
 
-    vext_set_tail_elems_1s(env, evl, vd, desc, nf, esz, max_elems);
+    vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
 }
 
 /*
@@ -499,7 +490,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
     }
     env->vstart = 0;
 
-    vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
+    vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
 }
 
 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
@@ -629,7 +620,7 @@ ProbeSuccess:
     }
     env->vstart = 0;
 
-    vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
+    vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
 }
 
 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
-- 
2.25.1
Re: [PATCH] target/riscv/vector_helper.c: Remove the check for extra tail elements
Posted by Daniel Henrique Barboza 11 months ago
Hi,


On 6/6/23 05:34, Xiao Wang wrote:
> Commit 752614cab8e6 ("target/riscv: rvv: Add tail agnostic for vector
> load / store instructions") added an extra check for LMUL fragmentation,
> intended for setting the "rest tail elements" in the last register for a
> segment load insn.
> 
> Actually, the max_elements derived in vext_ld*() won't be a fraction of
> vector register size, since the lmul encoded in desc is emul, which has
> already been adjusted to 1 for LMUL fragmentation case by vext_get_emul()
> in trans_rvv.c.inc, for ld_stride(), ld_us(), ld_index() and ldff().
> 
> Besides, vext_get_emul() has also taken EEW/SEW into consideration, so no
> need to call vext_get_total_elems() which would base on the emul to derive
> another emul, the second emul would be incorrect when esz differs from sew.
> 
> Thus this patch removes the check for extra tail elements.
> 
> Fixes: 752614cab8e6 ("target/riscv: rvv: Add tail agnostic for vector load / store instructions")
> 
> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> ---
>   target/riscv/vector_helper.c | 21 ++++++---------------
>   1 file changed, 6 insertions(+), 15 deletions(-)
> 
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index f4d0438988..56a79bb5fa 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -264,26 +264,17 @@ GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
>   GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
>   GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
>   
> -static void vext_set_tail_elems_1s(CPURISCVState *env, target_ulong vl,
> -                                   void *vd, uint32_t desc, uint32_t nf,
> +static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
> +                                   uint32_t desc, uint32_t nf,
>                                      uint32_t esz, uint32_t max_elems)
>   {
> -    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
> -    uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3;
>       uint32_t vta = vext_vta(desc);
> -    uint32_t registers_used;
>       int k;
>   
>       for (k = 0; k < nf; ++k) {
>           vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
>                             (k * max_elems + max_elems) * esz);
>       }
> -
> -    if (nf * max_elems % total_elems != 0) {
> -        registers_used = ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
> -        vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
> -                          registers_used * vlenb);
> -    }
>   }


Can you please rebase it on top of master? This function has at least one
change (a vta check right at the start) that isn't accounted for in this
patch.

Code LGTM otherwise. Thanks,


Daniel

>   
>   /*
> @@ -319,7 +310,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
>       }
>       env->vstart = 0;
>   
> -    vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
> +    vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
>   }
>   
>   #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
> @@ -378,7 +369,7 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
>       }
>       env->vstart = 0;
>   
> -    vext_set_tail_elems_1s(env, evl, vd, desc, nf, esz, max_elems);
> +    vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
>   }
>   
>   /*
> @@ -499,7 +490,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
>       }
>       env->vstart = 0;
>   
> -    vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
> +    vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
>   }
>   
>   #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
> @@ -629,7 +620,7 @@ ProbeSuccess:
>       }
>       env->vstart = 0;
>   
> -    vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems);
> +    vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
>   }
>   
>   #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
RE: [PATCH] target/riscv/vector_helper.c: Remove the check for extra tail elements
Posted by Wang, Xiao W 11 months ago
Hi,

> -----Original Message-----
> From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
> Sent: Tuesday, June 6, 2023 6:31 PM
> To: Wang, Xiao W <xiao.w.wang@intel.com>; qemu-devel@nongnu.org
> Cc: Palmer Dabbelt <palmer@dabbelt.com>; Alistair Francis
> <alistair.francis@wdc.com>; Meng, Bin <bin.meng@windriver.com>; Weiwei
> Li <liweiwei@iscas.ac.cn>; Liu Zhiwei <zhiwei_liu@linux.alibaba.com>; open
> list:RISC-V TCG CPUs <qemu-riscv@nongnu.org>
> Subject: Re: [PATCH] target/riscv/vector_helper.c: Remove the check for
> extra tail elements
> 
> Hi,
> 
> 
> On 6/6/23 05:34, Xiao Wang wrote:
> > Commit 752614cab8e6 ("target/riscv: rvv: Add tail agnostic for vector
> > load / store instructions") added an extra check for LMUL fragmentation,
> > intended for setting the "rest tail elements" in the last register for a
> > segment load insn.
> >
> > Actually, the max_elements derived in vext_ld*() won't be a fraction of
> > vector register size, since the lmul encoded in desc is emul, which has
> > already been adjusted to 1 for LMUL fragmentation case by
> vext_get_emul()
> > in trans_rvv.c.inc, for ld_stride(), ld_us(), ld_index() and ldff().
> >
> > Besides, vext_get_emul() has also taken EEW/SEW into consideration, so
> no
> > need to call vext_get_total_elems() which would base on the emul to
> derive
> > another emul, the second emul would be incorrect when esz differs from
> sew.
> >
> > Thus this patch removes the check for extra tail elements.
> >
> > Fixes: 752614cab8e6 ("target/riscv: rvv: Add tail agnostic for vector load /
> store instructions")
> >
> > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > ---
> >   target/riscv/vector_helper.c | 21 ++++++---------------
> >   1 file changed, 6 insertions(+), 15 deletions(-)
> >
> > diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> > index f4d0438988..56a79bb5fa 100644
> > --- a/target/riscv/vector_helper.c
> > +++ b/target/riscv/vector_helper.c
> > @@ -264,26 +264,17 @@ GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
> >   GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
> >   GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
> >
> > -static void vext_set_tail_elems_1s(CPURISCVState *env, target_ulong vl,
> > -                                   void *vd, uint32_t desc, uint32_t nf,
> > +static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
> > +                                   uint32_t desc, uint32_t nf,
> >                                      uint32_t esz, uint32_t max_elems)
> >   {
> > -    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
> > -    uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3;
> >       uint32_t vta = vext_vta(desc);
> > -    uint32_t registers_used;
> >       int k;
> >
> >       for (k = 0; k < nf; ++k) {
> >           vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
> >                             (k * max_elems + max_elems) * esz);
> >       }
> > -
> > -    if (nf * max_elems % total_elems != 0) {
> > -        registers_used = ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
> > -        vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
> > -                          registers_used * vlenb);
> > -    }
> >   }
> 
> 
> Can you please rebase it on top of master? This function has at least one
> change (a vta check right at the start) that isn't accounted for in this
> patch.
> 
> Code LGTM otherwise. Thanks,
> 
> 
> Daniel
> 

OK. I think you refer to the riscv-to-apply.next branch in https://github.com/alistair23/qemu.git, I see the vta check in that branch only.
Would do a rebase. Thanks.

-Xiao