[PATCH v2] Hexagon (target/hexagon) Fix assignment to tmp registers

Marco Liebel posted 1 patch 11 months, 2 weeks ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20230522174708.464197-1-quic._5Fmliebel@quicinc.com
Maintainers: Taylor Simpson <tsimpson@quicinc.com>
target/hexagon/mmvec/decode_ext_mmvec.c |  8 +++----
tests/tcg/hexagon/hvx_misc.c            | 31 +++++++++++++++++++++++++
2 files changed, 35 insertions(+), 4 deletions(-)
[PATCH v2] Hexagon (target/hexagon) Fix assignment to tmp registers
Posted by Marco Liebel 11 months, 2 weeks ago
The order in which instructions are generated by gen_insn() influences
assignment to tmp registers. During generation, tmp instructions (e.g.
generate_V6_vassign_tmp) use vreg_src_off() to determine what kind of
register to use as source. If some instruction (e.g.
generate_V6_vmpyowh_64_acc) uses a tmp register but is generated prior
to the corresponding tmp instruction, the vregs_updated_tmp bit map
isn't updated in time.

Exmple:
    { v14.tmp = v16; v25 = v14 } This works properly because
    generate_V6_vassign_tmp is generated before generate_V6_vassign
    and the bit map is updated.

    { v15:14.tmp = vcombine(v21, v16); v25:24 += vmpyo(v18.w,v14.h) }
    This does not work properly because vmpyo is generated before
    vcombine and therefore the bit map does not yet know that there's
    a tmp register.

The parentheses in the decoding function were in the wrong place.
Moving them to the correct location makes shuffling of .tmp vector
registers work as expected.

Signed-off-by: Marco Liebel <quic_mliebel@quicinc.com>
---
 target/hexagon/mmvec/decode_ext_mmvec.c |  8 +++----
 tests/tcg/hexagon/hvx_misc.c            | 31 +++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/target/hexagon/mmvec/decode_ext_mmvec.c b/target/hexagon/mmvec/decode_ext_mmvec.c
index 061a65ab88..174eb3b78b 100644
--- a/target/hexagon/mmvec/decode_ext_mmvec.c
+++ b/target/hexagon/mmvec/decode_ext_mmvec.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -148,9 +148,9 @@ decode_shuffle_for_execution_vops(Packet *pkt)
     int i;
     for (i = 0; i < pkt->num_insns; i++) {
         uint16_t opcode = pkt->insn[i].opcode;
-        if (GET_ATTRIB(opcode, A_LOAD) &&
-            (GET_ATTRIB(opcode, A_CVI_NEW) ||
-             GET_ATTRIB(opcode, A_CVI_TMP))) {
+        if ((GET_ATTRIB(opcode, A_LOAD) &&
+             GET_ATTRIB(opcode, A_CVI_NEW)) ||
+            GET_ATTRIB(opcode, A_CVI_TMP)) {
             /*
              * Find prior consuming vector instructions
              * Move to end of packet
diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c
index 09dec8d7a1..b45170acd1 100644
--- a/tests/tcg/hexagon/hvx_misc.c
+++ b/tests/tcg/hexagon/hvx_misc.c
@@ -60,6 +60,36 @@ static void test_load_tmp(void)
     check_output_w(__LINE__, BUFSIZE);
 }
 
+static void test_load_tmp2(void)
+{
+    void *pout0 = &output[0];
+    void *pout1 = &output[1];
+
+    asm volatile(
+        "r0 = #0x03030303\n\t"
+        "v16 = vsplat(r0)\n\t"
+        "r0 = #0x04040404\n\t"
+        "v18 = vsplat(r0)\n\t"
+        "r0 = #0x05050505\n\t"
+        "v21 = vsplat(r0)\n\t"
+        "{\n\t"
+        "   v25:24 += vmpyo(v18.w, v14.h)\n\t"
+        "   v15:14.tmp = vcombine(v21, v16)\n\t"
+        "}\n\t"
+        "vmem(%0 + #0) = v24\n\t"
+        "vmem(%1 + #0) = v25\n\t"
+        : : "r"(pout0), "r"(pout1)
+        : "r0", "v16", "v18", "v21", "v24", "v25", "memory"
+    );
+
+    for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; ++i) {
+        expect[0].w[i] = 0x180c0000;
+        expect[1].w[i] = 0x000c1818;
+    }
+
+    check_output_w(__LINE__, 2);
+}
+
 static void test_load_cur(void)
 {
     void *p0 = buffer0;
@@ -435,6 +465,7 @@ int main()
     init_buffers();
 
     test_load_tmp();
+    test_load_tmp2();
     test_load_cur();
     test_load_aligned();
     test_load_unaligned();
-- 
2.25.1
RE: [PATCH v2] Hexagon (target/hexagon) Fix assignment to tmp registers
Posted by Brian Cain 11 months, 2 weeks ago

> -----Original Message-----
> From: Marco Liebel (QUIC) <quic_mliebel@quicinc.com>
> Sent: Monday, May 22, 2023 12:47 PM
> To: qemu-devel@nongnu.org
> Cc: Taylor Simpson <tsimpson@quicinc.com>; Brian Cain <bcain@quicinc.com>;
> Marco Liebel (QUIC) <quic_mliebel@quicinc.com>
> Subject: [PATCH v2] Hexagon (target/hexagon) Fix assignment to tmp registers
> 
> The order in which instructions are generated by gen_insn() influences
> assignment to tmp registers. During generation, tmp instructions (e.g.
> generate_V6_vassign_tmp) use vreg_src_off() to determine what kind of
> register to use as source. If some instruction (e.g.
> generate_V6_vmpyowh_64_acc) uses a tmp register but is generated prior
> to the corresponding tmp instruction, the vregs_updated_tmp bit map
> isn't updated in time.
> 
> Exmple:
>     { v14.tmp = v16; v25 = v14 } This works properly because
>     generate_V6_vassign_tmp is generated before generate_V6_vassign
>     and the bit map is updated.
> 
>     { v15:14.tmp = vcombine(v21, v16); v25:24 += vmpyo(v18.w,v14.h) }
>     This does not work properly because vmpyo is generated before
>     vcombine and therefore the bit map does not yet know that there's
>     a tmp register.
> 
> The parentheses in the decoding function were in the wrong place.
> Moving them to the correct location makes shuffling of .tmp vector
> registers work as expected.
> 
> Signed-off-by: Marco Liebel <quic_mliebel@quicinc.com>
> ---
>  target/hexagon/mmvec/decode_ext_mmvec.c |  8 +++----
>  tests/tcg/hexagon/hvx_misc.c            | 31 +++++++++++++++++++++++++
>  2 files changed, 35 insertions(+), 4 deletions(-)
> 
> diff --git a/target/hexagon/mmvec/decode_ext_mmvec.c
> b/target/hexagon/mmvec/decode_ext_mmvec.c
> index 061a65ab88..174eb3b78b 100644
> --- a/target/hexagon/mmvec/decode_ext_mmvec.c
> +++ b/target/hexagon/mmvec/decode_ext_mmvec.c
> @@ -1,5 +1,5 @@
>  /*
> - *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
> + *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
>   *
>   *  This program is free software; you can redistribute it and/or modify
>   *  it under the terms of the GNU General Public License as published by
> @@ -148,9 +148,9 @@ decode_shuffle_for_execution_vops(Packet *pkt)
>      int i;
>      for (i = 0; i < pkt->num_insns; i++) {
>          uint16_t opcode = pkt->insn[i].opcode;
> -        if (GET_ATTRIB(opcode, A_LOAD) &&
> -            (GET_ATTRIB(opcode, A_CVI_NEW) ||
> -             GET_ATTRIB(opcode, A_CVI_TMP))) {
> +        if ((GET_ATTRIB(opcode, A_LOAD) &&
> +             GET_ATTRIB(opcode, A_CVI_NEW)) ||
> +            GET_ATTRIB(opcode, A_CVI_TMP)) {
>              /*
>               * Find prior consuming vector instructions
>               * Move to end of packet
> diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c
> index 09dec8d7a1..b45170acd1 100644
> --- a/tests/tcg/hexagon/hvx_misc.c
> +++ b/tests/tcg/hexagon/hvx_misc.c
> @@ -60,6 +60,36 @@ static void test_load_tmp(void)
>      check_output_w(__LINE__, BUFSIZE);
>  }
> 
> +static void test_load_tmp2(void)
> +{
> +    void *pout0 = &output[0];
> +    void *pout1 = &output[1];
> +
> +    asm volatile(
> +        "r0 = #0x03030303\n\t"
> +        "v16 = vsplat(r0)\n\t"
> +        "r0 = #0x04040404\n\t"
> +        "v18 = vsplat(r0)\n\t"
> +        "r0 = #0x05050505\n\t"
> +        "v21 = vsplat(r0)\n\t"
> +        "{\n\t"
> +        "   v25:24 += vmpyo(v18.w, v14.h)\n\t"
> +        "   v15:14.tmp = vcombine(v21, v16)\n\t"
> +        "}\n\t"
> +        "vmem(%0 + #0) = v24\n\t"
> +        "vmem(%1 + #0) = v25\n\t"
> +        : : "r"(pout0), "r"(pout1)
> +        : "r0", "v16", "v18", "v21", "v24", "v25", "memory"
> +    );
> +
> +    for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; ++i) {
> +        expect[0].w[i] = 0x180c0000;
> +        expect[1].w[i] = 0x000c1818;
> +    }
> +
> +    check_output_w(__LINE__, 2);
> +}
> +
>  static void test_load_cur(void)
>  {
>      void *p0 = buffer0;
> @@ -435,6 +465,7 @@ int main()
>      init_buffers();
> 
>      test_load_tmp();
> +    test_load_tmp2();
>      test_load_cur();
>      test_load_aligned();
>      test_load_unaligned();
> --
> 2.25.1


Reviewed-by: Brian Cain <bcain@quicinc.com>