[Qemu-devel] [PATCH 2/2] target/arm: Fix do_predset for large VL

Richard Henderson posted 2 patches 7 years, 4 months ago
[Qemu-devel] [PATCH 2/2] target/arm: Fix do_predset for large VL
Posted by Richard Henderson 7 years, 4 months ago
Use MAKE_64BIT_MASK instead of open-coding.  Remove an odd
vector size check that is unlikely to be more profitable
than 3 64-bit integer stores.  Correct the iteration for WORD
to avoid writing too much data.

Fixes RISU tests of PTRUE for VL 256.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/translate-sve.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index d41f1155f9..374051cd20 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -1438,7 +1438,7 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
         setsz = numelem << esz;
         lastword = word = pred_esz_masks[esz];
         if (setsz % 64) {
-            lastword &= ~(-1ull << (setsz % 64));
+            lastword &= MAKE_64BIT_MASK(0, setsz % 64);
         }
     }
 
@@ -1457,19 +1457,13 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
             goto done;
         }
-        if (oprsz * 8 == setsz + 8) {
-            tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
-            tcg_gen_movi_i64(t, 0);
-            tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
-            goto done;
-        }
     }
 
     setsz /= 8;
     fullsz /= 8;
 
     tcg_gen_movi_i64(t, word);
-    for (i = 0; i < setsz; i += 8) {
+    for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
         tcg_gen_st_i64(t, cpu_env, ofs + i);
     }
     if (lastword != word) {
-- 
2.17.1


Re: [Qemu-devel] [PATCH 2/2] target/arm: Fix do_predset for large VL
Posted by Alex Bennée 7 years, 4 months ago
Richard Henderson <richard.henderson@linaro.org> writes:

> Use MAKE_64BIT_MASK instead of open-coding.  Remove an odd
> vector size check that is unlikely to be more profitable
> than 3 64-bit integer stores.  Correct the iteration for WORD
> to avoid writing too much data.
>
> Fixes RISU tests of PTRUE for VL 256.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Tested-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  target/arm/translate-sve.c | 10 ++--------
>  1 file changed, 2 insertions(+), 8 deletions(-)
>
> diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
> index d41f1155f9..374051cd20 100644
> --- a/target/arm/translate-sve.c
> +++ b/target/arm/translate-sve.c
> @@ -1438,7 +1438,7 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
>          setsz = numelem << esz;
>          lastword = word = pred_esz_masks[esz];
>          if (setsz % 64) {
> -            lastword &= ~(-1ull << (setsz % 64));
> +            lastword &= MAKE_64BIT_MASK(0, setsz % 64);
>          }
>      }
>
> @@ -1457,19 +1457,13 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
>              tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
>              goto done;
>          }
> -        if (oprsz * 8 == setsz + 8) {
> -            tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
> -            tcg_gen_movi_i64(t, 0);
> -            tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
> -            goto done;
> -        }
>      }
>
>      setsz /= 8;
>      fullsz /= 8;
>
>      tcg_gen_movi_i64(t, word);
> -    for (i = 0; i < setsz; i += 8) {
> +    for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
>          tcg_gen_st_i64(t, cpu_env, ofs + i);
>      }
>      if (lastword != word) {


--
Alex Bennée