[v2] tcg vector improvements

[Qemu-devel] [PATCH v2 12/29] tcg/aarch64: Implement tcg_out_dupm_vec

Posted by Richard Henderson 6 years, 9 months ago

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/aarch64/tcg-target.inc.c | 38 ++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 4a3cfa778a..411fb463ac 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -381,6 +381,9 @@ typedef enum {
     I3207_BLR       = 0xd63f0000,
     I3207_RET       = 0xd65f0000,
 
+    /* AdvSIMD load/store single structure.  */
+    I3303_LD1R      = 0x0d40c000,
+
     /* Load literal for loading the address at pc-relative offset */
     I3305_LDR       = 0x58000000,
     I3305_LDR_v64   = 0x5c000000,
@@ -414,6 +417,8 @@ typedef enum {
     I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
     I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
 
+
+
     I3312_TO_I3310  = 0x00200800,
     I3312_TO_I3313  = 0x01000000,
 
@@ -566,7 +571,14 @@ static inline uint32_t tcg_in32(TCGContext *s)
 #define tcg_out_insn(S, FMT, OP, ...) \
     glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
 
-static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
+static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
+                              TCGReg rt, TCGReg rn, unsigned size)
+{
+    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
+}
+
+static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
+                              int imm19, TCGReg rt)
 {
     tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
 }
@@ -825,7 +837,29 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
                              TCGReg r, TCGReg base, intptr_t offset)
 {
-    return false;
+    if (offset != 0) {
+        AArch64Insn add_insn = I3401_ADDI;
+        TCGReg temp = TCG_REG_TMP;
+
+        if (offset < 0) {
+            add_insn = I3401_SUBI;
+            offset = -offset;
+        }
+        if (offset <= 0xfff) {
+            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset);
+        } else if (offset <= 0xffffff) {
+            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
+            if (offset & 0xfff) {
+                tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
+            }
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
+            tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
+        }
+        base = temp;
+    }
+    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
+    return true;
 }
 
 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
-- 
2.17.1

Re: [Qemu-devel] [PATCH v2 12/29] tcg/aarch64: Implement tcg_out_dupm_vec

Posted by Alex Bennée 6 years, 9 months ago

Richard Henderson <richard.henderson@linaro.org> writes:

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/aarch64/tcg-target.inc.c | 38 ++++++++++++++++++++++++++++++++++--
>  1 file changed, 36 insertions(+), 2 deletions(-)
>
> diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
> index 4a3cfa778a..411fb463ac 100644
> --- a/tcg/aarch64/tcg-target.inc.c
> +++ b/tcg/aarch64/tcg-target.inc.c
> @@ -381,6 +381,9 @@ typedef enum {
>      I3207_BLR       = 0xd63f0000,
>      I3207_RET       = 0xd65f0000,
>
> +    /* AdvSIMD load/store single structure.  */
> +    I3303_LD1R      = 0x0d40c000,
> +

I can't recall where these magic numbers come from again? The (moving)
section numbers of the ARM ARM?

I was hoping the XML had a bit more guidance on the encoding names but
we get:

    <iclass name="No offset" oneof="2" id="LD1R_asisdlso_R1" no_encodings="1" isa="A64">
and
    <iclass name="Post-index" oneof="2" id="as_post_index" no_encodings="2" isa="A64">

Although the instruction does have:

   <instructionsection id="LD1R_advsimd" title="LD1R -- A64" type="instruction">


>      /* Load literal for loading the address at pc-relative offset */
>      I3305_LDR       = 0x58000000,
>      I3305_LDR_v64   = 0x5c000000,
> @@ -414,6 +417,8 @@ typedef enum {
>      I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
>      I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
>
> +
> +
>      I3312_TO_I3310  = 0x00200800,
>      I3312_TO_I3313  = 0x01000000,
>
> @@ -566,7 +571,14 @@ static inline uint32_t tcg_in32(TCGContext *s)
>  #define tcg_out_insn(S, FMT, OP, ...) \
>      glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ##
> __VA_ARGS__)

The above is basically a winge as to what do we really get out of this
"type checking"?

>
> -static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
> +static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
> +                              TCGReg rt, TCGReg rn, unsigned size)
> +{
> +    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
> +}
> +
> +static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
> +                              int imm19, TCGReg rt)
>  {
>      tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
>  }
> @@ -825,7 +837,29 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
>  static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
>                               TCGReg r, TCGReg base, intptr_t offset)
>  {
> -    return false;
> +    if (offset != 0) {
> +        AArch64Insn add_insn = I3401_ADDI;
> +        TCGReg temp = TCG_REG_TMP;
> +
> +        if (offset < 0) {
> +            add_insn = I3401_SUBI;
> +            offset = -offset;
> +        }
> +        if (offset <= 0xfff) {
> +            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset);
> +        } else if (offset <= 0xffffff) {
> +            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
> +            if (offset & 0xfff) {
> +                tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
> +            }
> +        } else {
> +            tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
> +            tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
> +        }
> +        base = temp;
> +    }
> +    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
> +    return true;
>  }
>
>  static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,


--
Alex Bennée

Re: [Qemu-devel] [PATCH v2 12/29] tcg/aarch64: Implement tcg_out_dupm_vec

Posted by Richard Henderson 6 years, 9 months ago

On 5/2/19 6:26 AM, Alex Bennée wrote:
>> +    /* AdvSIMD load/store single structure.  */
>> +    I3303_LD1R      = 0x0d40c000,
>> +
> 
> I can't recall where these magic numbers come from again? The (moving)
> section numbers of the ARM ARM?

They come from the A_a version of the ARM ARM.

The current D_a version has now even removed the section numbers, instead of
slowly modifying them as they did through B and C revisions.

> I was hoping the XML had a bit more guidance on the encoding names but
> we get:

Yeah, ARM doesn't name these at all.

I have wondered if they are adverse to naming encodings, because if they had to
name them all they would feel constrained to not invent so many strange
encodings.  ;-)

> The above is basically a winge as to what do we really get out of this
> "type checking"?

Well, ignore the "type checking" for a moment.

How would you distinguish all of the different encoding functions?
Or would you just open-code every single instruction like we do in
tcg/arm/ and tcg/ppc/?

Let me know if you come up with a scheme that works better than this.

r~