[v2] target/ppc: switch fpr/vsrl registers so all VSX registers are in host endian order

[Qemu-devel] [PATCH v2 4/7] target/ppc: introduce avr_full_offset() function

Posted by Mark Cave-Ayland 6 years, 8 months ago

All TCG vector operations require pointers to the base address of the vector
rather than separate access to the top and bottom 64-bits. Convert the VMX TCG
instructions to use a new avr_full_offset() function instead of avr64_offset()
which can then itself be written as a simple wrapper onto vsr_full_offset().

This same function can also reused in cpu_avr_ptr() to avoid having more than
one copy of the offset calculation logic.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
---
 target/ppc/cpu.h                    | 12 +++++++++++-
 target/ppc/translate/vmx-impl.inc.c | 22 +++++++++++-----------
 target/ppc/translate/vsx-impl.inc.c |  5 -----
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index d0580c6b6d..2a2792306f 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -2598,14 +2598,24 @@ static inline int vsrl_offset(int i)
     return offsetof(CPUPPCState, vsr[i].u64[1]);
 }
 
+static inline int vsr_full_offset(int i)
+{
+    return offsetof(CPUPPCState, vsr[i].u64[0]);
+}
+
 static inline uint64_t *cpu_vsrl_ptr(CPUPPCState *env, int i)
 {
     return (uint64_t *)((uintptr_t)env + vsrl_offset(i));
 }
 
+static inline int avr_full_offset(int i)
+{
+    return vsr_full_offset(i + 32);
+}
+
 static inline ppc_avr_t *cpu_avr_ptr(CPUPPCState *env, int i)
 {
-    return &env->vsr[32 + i];
+    return (ppc_avr_t *)((uintptr_t)env + avr_full_offset(i));
 }
 
 void dump_mmu(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env);
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index f1b15ae2cb..4e5d0bc0e0 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -10,7 +10,7 @@
 static inline TCGv_ptr gen_avr_ptr(int reg)
 {
     TCGv_ptr r = tcg_temp_new_ptr();
-    tcg_gen_addi_ptr(r, cpu_env, offsetof(CPUPPCState, vsr[32 + reg].u64[0]));
+    tcg_gen_addi_ptr(r, cpu_env, avr_full_offset(reg));
     return r;
 }
 
@@ -205,7 +205,7 @@ static void gen_mtvscr(DisasContext *ctx)
     }
 
     val = tcg_temp_new_i32();
-    bofs = avr64_offset(rB(ctx->opcode), true);
+    bofs = avr_full_offset(rB(ctx->opcode));
 #ifdef HOST_WORDS_BIGENDIAN
     bofs += 3 * 4;
 #endif
@@ -284,9 +284,9 @@ static void glue(gen_, name)(DisasContext *ctx)                         \
     }                                                                   \
                                                                         \
     tcg_op(vece,                                                        \
-           avr64_offset(rD(ctx->opcode), true),                         \
-           avr64_offset(rA(ctx->opcode), true),                         \
-           avr64_offset(rB(ctx->opcode), true),                         \
+           avr_full_offset(rD(ctx->opcode)),                            \
+           avr_full_offset(rA(ctx->opcode)),                            \
+           avr_full_offset(rB(ctx->opcode)),                            \
            16, 16);                                                     \
 }
 
@@ -578,10 +578,10 @@ static void glue(gen_, NAME)(DisasContext *ctx)                         \
         gen_exception(ctx, POWERPC_EXCP_VPU);                           \
         return;                                                         \
     }                                                                   \
-    tcg_gen_gvec_4(avr64_offset(rD(ctx->opcode), true),                 \
+    tcg_gen_gvec_4(avr_full_offset(rD(ctx->opcode)),                    \
                    offsetof(CPUPPCState, vscr_sat),                     \
-                   avr64_offset(rA(ctx->opcode), true),                 \
-                   avr64_offset(rB(ctx->opcode), true),                 \
+                   avr_full_offset(rA(ctx->opcode)),                    \
+                   avr_full_offset(rB(ctx->opcode)),                    \
                    16, 16, &g);                                         \
 }
 
@@ -755,7 +755,7 @@ static void glue(gen_, name)(DisasContext *ctx)                         \
             return;                                                     \
         }                                                               \
         simm = SIMM5(ctx->opcode);                                      \
-        tcg_op(avr64_offset(rD(ctx->opcode), true), 16, 16, simm);      \
+        tcg_op(avr_full_offset(rD(ctx->opcode)), 16, 16, simm);         \
     }
 
 GEN_VXFORM_DUPI(vspltisb, tcg_gen_gvec_dup8i, 6, 12);
@@ -850,8 +850,8 @@ static void gen_vsplt(DisasContext *ctx, int vece)
     }
 
     uimm = UIMM5(ctx->opcode);
-    bofs = avr64_offset(rB(ctx->opcode), true);
-    dofs = avr64_offset(rD(ctx->opcode), true);
+    bofs = avr_full_offset(rB(ctx->opcode));
+    dofs = avr_full_offset(rD(ctx->opcode));
 
     /* Experimental testing shows that hardware masks the immediate.  */
     bofs += (uimm << vece) & 15;
diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
index 381ae0f2e9..7d02a235e7 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -10,11 +10,6 @@ static inline void set_vsrl(int n, TCGv_i64 src)
     tcg_gen_st_i64(src, cpu_env, vsrl_offset(n));
 }
 
-static inline int vsr_full_offset(int n)
-{
-    return offsetof(CPUPPCState, vsr[n].u64[0]);
-}
-
 static inline void get_cpu_vsrh(TCGv_i64 dst, int n)
 {
     if (n < 32) {
-- 
2.11.0

Re: [Qemu-devel] [PATCH v2 4/7] target/ppc: introduce avr_full_offset() function

Posted by Richard Henderson 6 years, 8 months ago

On 3/7/19 10:05 AM, Mark Cave-Ayland wrote:
> All TCG vector operations require pointers to the base address of the vector
> rather than separate access to the top and bottom 64-bits. Convert the VMX TCG
> instructions to use a new avr_full_offset() function instead of avr64_offset()
> which can then itself be written as a simple wrapper onto vsr_full_offset().
> 
> This same function can also reused in cpu_avr_ptr() to avoid having more than
> one copy of the offset calculation logic.
> 
> Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
> ---
>  target/ppc/cpu.h                    | 12 +++++++++++-
>  target/ppc/translate/vmx-impl.inc.c | 22 +++++++++++-----------
>  target/ppc/translate/vsx-impl.inc.c |  5 -----
>  3 files changed, 22 insertions(+), 17 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

Re: [Qemu-devel] [PATCH v2 4/7] target/ppc: introduce avr_full_offset() function

Posted by David Gibson 6 years, 8 months ago

On Thu, Mar 07, 2019 at 06:05:17PM +0000, Mark Cave-Ayland wrote:
> All TCG vector operations require pointers to the base address of the vector
> rather than separate access to the top and bottom 64-bits. Convert the VMX TCG
> instructions to use a new avr_full_offset() function instead of avr64_offset()
> which can then itself be written as a simple wrapper onto vsr_full_offset().
> 
> This same function can also reused in cpu_avr_ptr() to avoid having more than
> one copy of the offset calculation logic.
> 
> Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>

Applied, thanks.

> ---
>  target/ppc/cpu.h                    | 12 +++++++++++-
>  target/ppc/translate/vmx-impl.inc.c | 22 +++++++++++-----------
>  target/ppc/translate/vsx-impl.inc.c |  5 -----
>  3 files changed, 22 insertions(+), 17 deletions(-)
> 
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index d0580c6b6d..2a2792306f 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -2598,14 +2598,24 @@ static inline int vsrl_offset(int i)
>      return offsetof(CPUPPCState, vsr[i].u64[1]);
>  }
>  
> +static inline int vsr_full_offset(int i)
> +{
> +    return offsetof(CPUPPCState, vsr[i].u64[0]);
> +}
> +
>  static inline uint64_t *cpu_vsrl_ptr(CPUPPCState *env, int i)
>  {
>      return (uint64_t *)((uintptr_t)env + vsrl_offset(i));
>  }
>  
> +static inline int avr_full_offset(int i)
> +{
> +    return vsr_full_offset(i + 32);
> +}
> +
>  static inline ppc_avr_t *cpu_avr_ptr(CPUPPCState *env, int i)
>  {
> -    return &env->vsr[32 + i];
> +    return (ppc_avr_t *)((uintptr_t)env + avr_full_offset(i));
>  }
>  
>  void dump_mmu(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env);
> diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
> index f1b15ae2cb..4e5d0bc0e0 100644
> --- a/target/ppc/translate/vmx-impl.inc.c
> +++ b/target/ppc/translate/vmx-impl.inc.c
> @@ -10,7 +10,7 @@
>  static inline TCGv_ptr gen_avr_ptr(int reg)
>  {
>      TCGv_ptr r = tcg_temp_new_ptr();
> -    tcg_gen_addi_ptr(r, cpu_env, offsetof(CPUPPCState, vsr[32 + reg].u64[0]));
> +    tcg_gen_addi_ptr(r, cpu_env, avr_full_offset(reg));
>      return r;
>  }
>  
> @@ -205,7 +205,7 @@ static void gen_mtvscr(DisasContext *ctx)
>      }
>  
>      val = tcg_temp_new_i32();
> -    bofs = avr64_offset(rB(ctx->opcode), true);
> +    bofs = avr_full_offset(rB(ctx->opcode));
>  #ifdef HOST_WORDS_BIGENDIAN
>      bofs += 3 * 4;
>  #endif
> @@ -284,9 +284,9 @@ static void glue(gen_, name)(DisasContext *ctx)                         \
>      }                                                                   \
>                                                                          \
>      tcg_op(vece,                                                        \
> -           avr64_offset(rD(ctx->opcode), true),                         \
> -           avr64_offset(rA(ctx->opcode), true),                         \
> -           avr64_offset(rB(ctx->opcode), true),                         \
> +           avr_full_offset(rD(ctx->opcode)),                            \
> +           avr_full_offset(rA(ctx->opcode)),                            \
> +           avr_full_offset(rB(ctx->opcode)),                            \
>             16, 16);                                                     \
>  }
>  
> @@ -578,10 +578,10 @@ static void glue(gen_, NAME)(DisasContext *ctx)                         \
>          gen_exception(ctx, POWERPC_EXCP_VPU);                           \
>          return;                                                         \
>      }                                                                   \
> -    tcg_gen_gvec_4(avr64_offset(rD(ctx->opcode), true),                 \
> +    tcg_gen_gvec_4(avr_full_offset(rD(ctx->opcode)),                    \
>                     offsetof(CPUPPCState, vscr_sat),                     \
> -                   avr64_offset(rA(ctx->opcode), true),                 \
> -                   avr64_offset(rB(ctx->opcode), true),                 \
> +                   avr_full_offset(rA(ctx->opcode)),                    \
> +                   avr_full_offset(rB(ctx->opcode)),                    \
>                     16, 16, &g);                                         \
>  }
>  
> @@ -755,7 +755,7 @@ static void glue(gen_, name)(DisasContext *ctx)                         \
>              return;                                                     \
>          }                                                               \
>          simm = SIMM5(ctx->opcode);                                      \
> -        tcg_op(avr64_offset(rD(ctx->opcode), true), 16, 16, simm);      \
> +        tcg_op(avr_full_offset(rD(ctx->opcode)), 16, 16, simm);         \
>      }
>  
>  GEN_VXFORM_DUPI(vspltisb, tcg_gen_gvec_dup8i, 6, 12);
> @@ -850,8 +850,8 @@ static void gen_vsplt(DisasContext *ctx, int vece)
>      }
>  
>      uimm = UIMM5(ctx->opcode);
> -    bofs = avr64_offset(rB(ctx->opcode), true);
> -    dofs = avr64_offset(rD(ctx->opcode), true);
> +    bofs = avr_full_offset(rB(ctx->opcode));
> +    dofs = avr_full_offset(rD(ctx->opcode));
>  
>      /* Experimental testing shows that hardware masks the immediate.  */
>      bofs += (uimm << vece) & 15;
> diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
> index 381ae0f2e9..7d02a235e7 100644
> --- a/target/ppc/translate/vsx-impl.inc.c
> +++ b/target/ppc/translate/vsx-impl.inc.c
> @@ -10,11 +10,6 @@ static inline void set_vsrl(int n, TCGv_i64 src)
>      tcg_gen_st_i64(src, cpu_env, vsrl_offset(n));
>  }
>  
> -static inline int vsr_full_offset(int n)
> -{
> -    return offsetof(CPUPPCState, vsr[n].u64[0]);
> -}
> -
>  static inline void get_cpu_vsrh(TCGv_i64 dst, int n)
>  {
>      if (n < 32) {

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson