[Qemu-devel] [PATCH] target/ppc: Optimise VSX_LOAD_SCALAR_DS and VSX_VECTOR_LOAD_STORE

Anton Blanchard posted 1 patch 6 years, 6 months ago
Test asan passed
Test docker-mingw@fedora passed
Test docker-clang@ubuntu passed
Test checkpatch passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20190509103545.4a7fa71a@kryten
Maintainers: David Gibson <david@gibson.dropbear.id.au>
target/ppc/translate/vsx-impl.inc.c | 68 ++++++++++++++++++++++++-----
1 file changed, 58 insertions(+), 10 deletions(-)
[Qemu-devel] [PATCH] target/ppc: Optimise VSX_LOAD_SCALAR_DS and VSX_VECTOR_LOAD_STORE
Posted by Anton Blanchard 6 years, 6 months ago
A few small optimisations:

In VSX_LOAD_SCALAR_DS() we can don't need to read the VSR via
get_cpu_vsrh().

Split VSX_VECTOR_LOAD_STORE() into two functions. Loads only need to
write the VSRs (set_cpu_vsr*()) and stores only need to read the VSRs
(get_cpu_vsr*())

Thanks to Mark Cave-Ayland for the suggestions.

Signed-off-by: Anton Blanchard <anton@ozlabs.org>
---
 target/ppc/translate/vsx-impl.inc.c | 68 ++++++++++++++++++++++++-----
 1 file changed, 58 insertions(+), 10 deletions(-)

diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
index 4b7627f53b..cdb44b8b70 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -228,7 +228,7 @@ static void gen_lxvb16x(DisasContext *ctx)
     tcg_temp_free_i64(xtl);
 }
 
-#define VSX_VECTOR_LOAD_STORE(name, op, indexed)            \
+#define VSX_VECTOR_LOAD(name, op, indexed)                  \
 static void gen_##name(DisasContext *ctx)                   \
 {                                                           \
     int xt;                                                 \
@@ -255,8 +255,6 @@ static void gen_##name(DisasContext *ctx)                   \
     }                                                       \
     xth = tcg_temp_new_i64();                               \
     xtl = tcg_temp_new_i64();                               \
-    get_cpu_vsrh(xth, xt);                                  \
-    get_cpu_vsrl(xtl, xt);                                  \
     gen_set_access_type(ctx, ACCESS_INT);                   \
     EA = tcg_temp_new();                                    \
     if (indexed) {                                          \
@@ -282,10 +280,61 @@ static void gen_##name(DisasContext *ctx)                   \
     tcg_temp_free_i64(xtl);                                 \
 }
 
-VSX_VECTOR_LOAD_STORE(lxv, ld_i64, 0)
-VSX_VECTOR_LOAD_STORE(stxv, st_i64, 0)
-VSX_VECTOR_LOAD_STORE(lxvx, ld_i64, 1)
-VSX_VECTOR_LOAD_STORE(stxvx, st_i64, 1)
+VSX_VECTOR_LOAD(lxv, ld_i64, 0)
+VSX_VECTOR_LOAD(lxvx, ld_i64, 1)
+
+#define VSX_VECTOR_STORE(name, op, indexed)                 \
+static void gen_##name(DisasContext *ctx)                   \
+{                                                           \
+    int xt;                                                 \
+    TCGv EA;                                                \
+    TCGv_i64 xth;                                           \
+    TCGv_i64 xtl;                                           \
+                                                            \
+    if (indexed) {                                          \
+        xt = xT(ctx->opcode);                               \
+    } else {                                                \
+        xt = DQxT(ctx->opcode);                             \
+    }                                                       \
+                                                            \
+    if (xt < 32) {                                          \
+        if (unlikely(!ctx->vsx_enabled)) {                  \
+            gen_exception(ctx, POWERPC_EXCP_VSXU);          \
+            return;                                         \
+        }                                                   \
+    } else {                                                \
+        if (unlikely(!ctx->altivec_enabled)) {              \
+            gen_exception(ctx, POWERPC_EXCP_VPU);           \
+            return;                                         \
+        }                                                   \
+    }                                                       \
+    xth = tcg_temp_new_i64();                               \
+    xtl = tcg_temp_new_i64();                               \
+    get_cpu_vsrh(xth, xt);                                  \
+    get_cpu_vsrl(xtl, xt);                                  \
+    gen_set_access_type(ctx, ACCESS_INT);                   \
+    EA = tcg_temp_new();                                    \
+    if (indexed) {                                          \
+        gen_addr_reg_index(ctx, EA);                        \
+    } else {                                                \
+        gen_addr_imm_index(ctx, EA, 0x0F);                  \
+    }                                                       \
+    if (ctx->le_mode) {                                     \
+        tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_LEQ);   \
+        tcg_gen_addi_tl(EA, EA, 8);                         \
+        tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_LEQ);   \
+    } else {                                                \
+        tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_BEQ);   \
+        tcg_gen_addi_tl(EA, EA, 8);                         \
+        tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_BEQ);   \
+    }                                                       \
+    tcg_temp_free(EA);                                      \
+    tcg_temp_free_i64(xth);                                 \
+    tcg_temp_free_i64(xtl);                                 \
+}
+
+VSX_VECTOR_STORE(stxv, st_i64, 0)
+VSX_VECTOR_STORE(stxvx, st_i64, 1)
 
 #ifdef TARGET_PPC64
 #define VSX_VECTOR_LOAD_STORE_LENGTH(name)                      \
@@ -330,7 +379,6 @@ static void gen_##name(DisasContext *ctx)                         \
         return;                                                   \
     }                                                             \
     xth = tcg_temp_new_i64();                                     \
-    get_cpu_vsrh(xth, rD(ctx->opcode) + 32);                      \
     gen_set_access_type(ctx, ACCESS_INT);                         \
     EA = tcg_temp_new();                                          \
     gen_addr_imm_index(ctx, EA, 0x03);                            \
@@ -514,8 +562,8 @@ static void gen_##name(DisasContext *ctx)                         \
     tcg_temp_free_i64(xth);                                       \
 }
 
-VSX_LOAD_SCALAR_DS(stxsd, st64_i64)
-VSX_LOAD_SCALAR_DS(stxssp, st32fs)
+VSX_STORE_SCALAR_DS(stxsd, st64_i64)
+VSX_STORE_SCALAR_DS(stxssp, st32fs)
 
 static void gen_mfvsrwz(DisasContext *ctx)
 {
-- 
2.20.1


Re: [Qemu-devel] [PATCH] target/ppc: Optimise VSX_LOAD_SCALAR_DS and VSX_VECTOR_LOAD_STORE
Posted by Mark Cave-Ayland 6 years, 6 months ago
On 09/05/2019 01:35, Anton Blanchard wrote:

> A few small optimisations:
> 
> In VSX_LOAD_SCALAR_DS() we can don't need to read the VSR via
> get_cpu_vsrh().
> 
> Split VSX_VECTOR_LOAD_STORE() into two functions. Loads only need to
> write the VSRs (set_cpu_vsr*()) and stores only need to read the VSRs
> (get_cpu_vsr*())
> 
> Thanks to Mark Cave-Ayland for the suggestions.
> 
> Signed-off-by: Anton Blanchard <anton@ozlabs.org>
> ---
>  target/ppc/translate/vsx-impl.inc.c | 68 ++++++++++++++++++++++++-----
>  1 file changed, 58 insertions(+), 10 deletions(-)
> 
> diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
> index 4b7627f53b..cdb44b8b70 100644
> --- a/target/ppc/translate/vsx-impl.inc.c
> +++ b/target/ppc/translate/vsx-impl.inc.c
> @@ -228,7 +228,7 @@ static void gen_lxvb16x(DisasContext *ctx)
>      tcg_temp_free_i64(xtl);
>  }
>  
> -#define VSX_VECTOR_LOAD_STORE(name, op, indexed)            \
> +#define VSX_VECTOR_LOAD(name, op, indexed)                  \
>  static void gen_##name(DisasContext *ctx)                   \
>  {                                                           \
>      int xt;                                                 \
> @@ -255,8 +255,6 @@ static void gen_##name(DisasContext *ctx)                   \
>      }                                                       \
>      xth = tcg_temp_new_i64();                               \
>      xtl = tcg_temp_new_i64();                               \
> -    get_cpu_vsrh(xth, xt);                                  \
> -    get_cpu_vsrl(xtl, xt);                                  \
>      gen_set_access_type(ctx, ACCESS_INT);                   \
>      EA = tcg_temp_new();                                    \
>      if (indexed) {                                          \
> @@ -282,10 +280,61 @@ static void gen_##name(DisasContext *ctx)                   \
>      tcg_temp_free_i64(xtl);                                 \
>  }
>  
> -VSX_VECTOR_LOAD_STORE(lxv, ld_i64, 0)
> -VSX_VECTOR_LOAD_STORE(stxv, st_i64, 0)
> -VSX_VECTOR_LOAD_STORE(lxvx, ld_i64, 1)
> -VSX_VECTOR_LOAD_STORE(stxvx, st_i64, 1)
> +VSX_VECTOR_LOAD(lxv, ld_i64, 0)
> +VSX_VECTOR_LOAD(lxvx, ld_i64, 1)
> +
> +#define VSX_VECTOR_STORE(name, op, indexed)                 \
> +static void gen_##name(DisasContext *ctx)                   \
> +{                                                           \
> +    int xt;                                                 \
> +    TCGv EA;                                                \
> +    TCGv_i64 xth;                                           \
> +    TCGv_i64 xtl;                                           \
> +                                                            \
> +    if (indexed) {                                          \
> +        xt = xT(ctx->opcode);                               \
> +    } else {                                                \
> +        xt = DQxT(ctx->opcode);                             \
> +    }                                                       \
> +                                                            \
> +    if (xt < 32) {                                          \
> +        if (unlikely(!ctx->vsx_enabled)) {                  \
> +            gen_exception(ctx, POWERPC_EXCP_VSXU);          \
> +            return;                                         \
> +        }                                                   \
> +    } else {                                                \
> +        if (unlikely(!ctx->altivec_enabled)) {              \
> +            gen_exception(ctx, POWERPC_EXCP_VPU);           \
> +            return;                                         \
> +        }                                                   \
> +    }                                                       \
> +    xth = tcg_temp_new_i64();                               \
> +    xtl = tcg_temp_new_i64();                               \
> +    get_cpu_vsrh(xth, xt);                                  \
> +    get_cpu_vsrl(xtl, xt);                                  \
> +    gen_set_access_type(ctx, ACCESS_INT);                   \
> +    EA = tcg_temp_new();                                    \
> +    if (indexed) {                                          \
> +        gen_addr_reg_index(ctx, EA);                        \
> +    } else {                                                \
> +        gen_addr_imm_index(ctx, EA, 0x0F);                  \
> +    }                                                       \
> +    if (ctx->le_mode) {                                     \
> +        tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_LEQ);   \
> +        tcg_gen_addi_tl(EA, EA, 8);                         \
> +        tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_LEQ);   \
> +    } else {                                                \
> +        tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_BEQ);   \
> +        tcg_gen_addi_tl(EA, EA, 8);                         \
> +        tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_BEQ);   \
> +    }                                                       \
> +    tcg_temp_free(EA);                                      \
> +    tcg_temp_free_i64(xth);                                 \
> +    tcg_temp_free_i64(xtl);                                 \
> +}
> +
> +VSX_VECTOR_STORE(stxv, st_i64, 0)
> +VSX_VECTOR_STORE(stxvx, st_i64, 1)
>  
>  #ifdef TARGET_PPC64
>  #define VSX_VECTOR_LOAD_STORE_LENGTH(name)                      \
> @@ -330,7 +379,6 @@ static void gen_##name(DisasContext *ctx)                         \
>          return;                                                   \
>      }                                                             \
>      xth = tcg_temp_new_i64();                                     \
> -    get_cpu_vsrh(xth, rD(ctx->opcode) + 32);                      \
>      gen_set_access_type(ctx, ACCESS_INT);                         \
>      EA = tcg_temp_new();                                          \
>      gen_addr_imm_index(ctx, EA, 0x03);                            \
> @@ -514,8 +562,8 @@ static void gen_##name(DisasContext *ctx)                         \
>      tcg_temp_free_i64(xth);                                       \
>  }
>  
> -VSX_LOAD_SCALAR_DS(stxsd, st64_i64)
> -VSX_LOAD_SCALAR_DS(stxssp, st32fs)
> +VSX_STORE_SCALAR_DS(stxsd, st64_i64)
> +VSX_STORE_SCALAR_DS(stxssp, st32fs)
>  
>  static void gen_mfvsrwz(DisasContext *ctx)
>  {

Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>


ATB,

Mark.