target/arm: Pass arguments by value for sve FMLA/FCMLA

[PATCH 1/2] tcg: Add tcg_gen_gvec_5_ptr

Posted by Richard Henderson 5 years, 9 months ago

Extend the vector generator infrastructure to handle
5 vector arguments.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg-op-gvec.h |  7 +++++++
 tcg/tcg-op-gvec.c         | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index 830d68f697..74534e2480 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -83,6 +83,13 @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
                         uint32_t maxsz, int32_t data,
                         gen_helper_gvec_4_ptr *fn);
 
+typedef void gen_helper_gvec_5_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr,
+                                   TCGv_ptr, TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                        uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
+                        uint32_t oprsz, uint32_t maxsz, int32_t data,
+                        gen_helper_gvec_5_ptr *fn);
+
 /* Expand a gvec operation.  Either inline or out-of-line depending on
    the actual vector size and the operations supported by the host.  */
 typedef struct {
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 41b4a3c661..327d9588e0 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -290,6 +290,38 @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
     tcg_temp_free_i32(desc);
 }
 
+/* Generate a call to a gvec-style helper with five vector operands
+   and an extra pointer operand.  */
+void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                        uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
+                        uint32_t oprsz, uint32_t maxsz, int32_t data,
+                        gen_helper_gvec_5_ptr *fn)
+{
+    TCGv_ptr a0, a1, a2, a3, a4;
+    TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
+
+    a0 = tcg_temp_new_ptr();
+    a1 = tcg_temp_new_ptr();
+    a2 = tcg_temp_new_ptr();
+    a3 = tcg_temp_new_ptr();
+    a4 = tcg_temp_new_ptr();
+
+    tcg_gen_addi_ptr(a0, cpu_env, dofs);
+    tcg_gen_addi_ptr(a1, cpu_env, aofs);
+    tcg_gen_addi_ptr(a2, cpu_env, bofs);
+    tcg_gen_addi_ptr(a3, cpu_env, cofs);
+    tcg_gen_addi_ptr(a4, cpu_env, eofs);
+
+    fn(a0, a1, a2, a3, a4, ptr, desc);
+
+    tcg_temp_free_ptr(a0);
+    tcg_temp_free_ptr(a1);
+    tcg_temp_free_ptr(a2);
+    tcg_temp_free_ptr(a3);
+    tcg_temp_free_ptr(a4);
+    tcg_temp_free_i32(desc);
+}
+
 /* Return true if we want to implement something of OPRSZ bytes
    in units of LNSZ.  This limits the expansion of inline code.  */
 static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
-- 
2.20.1

RE: [PATCH 1/2] tcg: Add tcg_gen_gvec_5_ptr

Posted by Taylor Simpson 5 years, 9 months ago

Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>


> -----Original Message-----
> From: Richard Henderson <richard.henderson@linaro.org>
> Sent: Tuesday, February 11, 2020 8:52 PM
> To: qemu-devel@nongnu.org
> Cc: peter.maydell@linaro.org; alex.bennee@linaro.org; Taylor Simpson
> <tsimpson@quicinc.com>
> Subject: [PATCH 1/2] tcg: Add tcg_gen_gvec_5_ptr
>
> Extend the vector generator infrastructure to handle
> 5 vector arguments.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  include/tcg/tcg-op-gvec.h |  7 +++++++
>  tcg/tcg-op-gvec.c         | 32 ++++++++++++++++++++++++++++++++
>  2 files changed, 39 insertions(+)
>
> diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
> index 830d68f697..74534e2480 100644
> --- a/include/tcg/tcg-op-gvec.h
> +++ b/include/tcg/tcg-op-gvec.h
> @@ -83,6 +83,13 @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs,
> uint32_t bofs,
>                          uint32_t maxsz, int32_t data,
>                          gen_helper_gvec_4_ptr *fn);
>
> +typedef void gen_helper_gvec_5_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr,
> TCGv_ptr,
> +                                   TCGv_ptr, TCGv_ptr, TCGv_i32);
> +void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
> +                        uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
> +                        uint32_t oprsz, uint32_t maxsz, int32_t data,
> +                        gen_helper_gvec_5_ptr *fn);
> +
>  /* Expand a gvec operation.  Either inline or out-of-line depending on
>     the actual vector size and the operations supported by the host.  */
>  typedef struct {
> diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
> index 41b4a3c661..327d9588e0 100644
> --- a/tcg/tcg-op-gvec.c
> +++ b/tcg/tcg-op-gvec.c
> @@ -290,6 +290,38 @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t
> aofs, uint32_t bofs,
>      tcg_temp_free_i32(desc);
>  }
>
> +/* Generate a call to a gvec-style helper with five vector operands
> +   and an extra pointer operand.  */
> +void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
> +                        uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
> +                        uint32_t oprsz, uint32_t maxsz, int32_t data,
> +                        gen_helper_gvec_5_ptr *fn)
> +{
> +    TCGv_ptr a0, a1, a2, a3, a4;
> +    TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
> +
> +    a0 = tcg_temp_new_ptr();
> +    a1 = tcg_temp_new_ptr();
> +    a2 = tcg_temp_new_ptr();
> +    a3 = tcg_temp_new_ptr();
> +    a4 = tcg_temp_new_ptr();
> +
> +    tcg_gen_addi_ptr(a0, cpu_env, dofs);
> +    tcg_gen_addi_ptr(a1, cpu_env, aofs);
> +    tcg_gen_addi_ptr(a2, cpu_env, bofs);
> +    tcg_gen_addi_ptr(a3, cpu_env, cofs);
> +    tcg_gen_addi_ptr(a4, cpu_env, eofs);
> +
> +    fn(a0, a1, a2, a3, a4, ptr, desc);
> +
> +    tcg_temp_free_ptr(a0);
> +    tcg_temp_free_ptr(a1);
> +    tcg_temp_free_ptr(a2);
> +    tcg_temp_free_ptr(a3);
> +    tcg_temp_free_ptr(a4);
> +    tcg_temp_free_i32(desc);
> +}
> +
>  /* Return true if we want to implement something of OPRSZ bytes
>     in units of LNSZ.  This limits the expansion of inline code.  */
>  static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
> --
> 2.20.1
>

Re: [PATCH 1/2] tcg: Add tcg_gen_gvec_5_ptr

Posted by Alex Bennée 5 years, 9 months ago

Richard Henderson <richard.henderson@linaro.org> writes:

> Extend the vector generator infrastructure to handle
> 5 vector arguments.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  include/tcg/tcg-op-gvec.h |  7 +++++++
>  tcg/tcg-op-gvec.c         | 32 ++++++++++++++++++++++++++++++++
>  2 files changed, 39 insertions(+)
>
> diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
> index 830d68f697..74534e2480 100644
> --- a/include/tcg/tcg-op-gvec.h
> +++ b/include/tcg/tcg-op-gvec.h
> @@ -83,6 +83,13 @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
>                          uint32_t maxsz, int32_t data,
>                          gen_helper_gvec_4_ptr *fn);
>  
> +typedef void gen_helper_gvec_5_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr,
> +                                   TCGv_ptr, TCGv_ptr, TCGv_i32);
> +void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
> +                        uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
> +                        uint32_t oprsz, uint32_t maxsz, int32_t data,
> +                        gen_helper_gvec_5_ptr *fn);
> +
>  /* Expand a gvec operation.  Either inline or out-of-line depending on
>     the actual vector size and the operations supported by the host.  */
>  typedef struct {
> diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
> index 41b4a3c661..327d9588e0 100644
> --- a/tcg/tcg-op-gvec.c
> +++ b/tcg/tcg-op-gvec.c
> @@ -290,6 +290,38 @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
>      tcg_temp_free_i32(desc);
>  }
>  
> +/* Generate a call to a gvec-style helper with five vector operands
> +   and an extra pointer operand.  */
> +void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
> +                        uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
> +                        uint32_t oprsz, uint32_t maxsz, int32_t data,
> +                        gen_helper_gvec_5_ptr *fn)
> +{
> +    TCGv_ptr a0, a1, a2, a3, a4;
> +    TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
> +
> +    a0 = tcg_temp_new_ptr();
> +    a1 = tcg_temp_new_ptr();
> +    a2 = tcg_temp_new_ptr();
> +    a3 = tcg_temp_new_ptr();
> +    a4 = tcg_temp_new_ptr();
> +
> +    tcg_gen_addi_ptr(a0, cpu_env, dofs);
> +    tcg_gen_addi_ptr(a1, cpu_env, aofs);
> +    tcg_gen_addi_ptr(a2, cpu_env, bofs);
> +    tcg_gen_addi_ptr(a3, cpu_env, cofs);
> +    tcg_gen_addi_ptr(a4, cpu_env, eofs);
> +
> +    fn(a0, a1, a2, a3, a4, ptr, desc);
> +
> +    tcg_temp_free_ptr(a0);
> +    tcg_temp_free_ptr(a1);
> +    tcg_temp_free_ptr(a2);
> +    tcg_temp_free_ptr(a3);
> +    tcg_temp_free_ptr(a4);
> +    tcg_temp_free_i32(desc);
> +}
> +
>  /* Return true if we want to implement something of OPRSZ bytes
>     in units of LNSZ.  This limits the expansion of inline code.  */
>  static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)


-- 
Alex Bennée

Re: [PATCH 1/2] tcg: Add tcg_gen_gvec_5_ptr

Posted by Philippe Mathieu-Daudé 5 years, 9 months ago

On 2/12/20 3:51 AM, Richard Henderson wrote:
> Extend the vector generator infrastructure to handle
> 5 vector arguments.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   include/tcg/tcg-op-gvec.h |  7 +++++++
>   tcg/tcg-op-gvec.c         | 32 ++++++++++++++++++++++++++++++++
>   2 files changed, 39 insertions(+)
> 
> diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
> index 830d68f697..74534e2480 100644
> --- a/include/tcg/tcg-op-gvec.h
> +++ b/include/tcg/tcg-op-gvec.h
> @@ -83,6 +83,13 @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
>                           uint32_t maxsz, int32_t data,
>                           gen_helper_gvec_4_ptr *fn);
>   
> +typedef void gen_helper_gvec_5_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr,
> +                                   TCGv_ptr, TCGv_ptr, TCGv_i32);
> +void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
> +                        uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
> +                        uint32_t oprsz, uint32_t maxsz, int32_t data,
> +                        gen_helper_gvec_5_ptr *fn);
> +
>   /* Expand a gvec operation.  Either inline or out-of-line depending on
>      the actual vector size and the operations supported by the host.  */
>   typedef struct {
> diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
> index 41b4a3c661..327d9588e0 100644
> --- a/tcg/tcg-op-gvec.c
> +++ b/tcg/tcg-op-gvec.c
> @@ -290,6 +290,38 @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
>       tcg_temp_free_i32(desc);
>   }
>   
> +/* Generate a call to a gvec-style helper with five vector operands
> +   and an extra pointer operand.  */
> +void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
> +                        uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
> +                        uint32_t oprsz, uint32_t maxsz, int32_t data,
> +                        gen_helper_gvec_5_ptr *fn)
> +{
> +    TCGv_ptr a0, a1, a2, a3, a4;
> +    TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
> +
> +    a0 = tcg_temp_new_ptr();
> +    a1 = tcg_temp_new_ptr();
> +    a2 = tcg_temp_new_ptr();
> +    a3 = tcg_temp_new_ptr();
> +    a4 = tcg_temp_new_ptr();
> +
> +    tcg_gen_addi_ptr(a0, cpu_env, dofs);
> +    tcg_gen_addi_ptr(a1, cpu_env, aofs);
> +    tcg_gen_addi_ptr(a2, cpu_env, bofs);
> +    tcg_gen_addi_ptr(a3, cpu_env, cofs);
> +    tcg_gen_addi_ptr(a4, cpu_env, eofs);
> +
> +    fn(a0, a1, a2, a3, a4, ptr, desc);
> +
> +    tcg_temp_free_ptr(a0);
> +    tcg_temp_free_ptr(a1);
> +    tcg_temp_free_ptr(a2);
> +    tcg_temp_free_ptr(a3);
> +    tcg_temp_free_ptr(a4);
> +    tcg_temp_free_i32(desc);
> +}
> +
>   /* Return true if we want to implement something of OPRSZ bytes
>      in units of LNSZ.  This limits the expansion of inline code.  */
>   static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
> 

Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>