[PATCH 02/20] Hexagon HVX (target/hexagon) add Hexagon Vector eXtensions (HVX) to core

Taylor Simpson posted 20 patches 4 years, 5 months ago
There is a newer version of this series
[PATCH 02/20] Hexagon HVX (target/hexagon) add Hexagon Vector eXtensions (HVX) to core
Posted by Taylor Simpson 4 years, 5 months ago
HVX is a set of wide vector instructions.  Machine state includes
    vector registers (VRegs)
    vector predicate registers (QRegs)
    temporary registers for intermediate values
    store buffer (masked stores and scatter/gather)

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/cpu.h            | 35 ++++++++++++++++-
 target/hexagon/hex_arch_types.h |  5 +++
 target/hexagon/insn.h           |  3 ++
 target/hexagon/internal.h       |  3 ++
 target/hexagon/mmvec/mmvec.h    | 83 +++++++++++++++++++++++++++++++++++++++++
 target/hexagon/cpu.c            | 72 ++++++++++++++++++++++++++++++++++-
 6 files changed, 198 insertions(+), 3 deletions(-)
 create mode 100644 target/hexagon/mmvec/mmvec.h

diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 2855dd3..0b377c3 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -26,6 +26,7 @@ typedef struct CPUHexagonState CPUHexagonState;
 #include "qemu-common.h"
 #include "exec/cpu-defs.h"
 #include "hex_regs.h"
+#include "mmvec/mmvec.h"
 
 #define NUM_PREGS 4
 #define TOTAL_PER_THREAD_REGS 64
@@ -34,6 +35,7 @@ typedef struct CPUHexagonState CPUHexagonState;
 #define STORES_MAX 2
 #define REG_WRITES_MAX 32
 #define PRED_WRITES_MAX 5                   /* 4 insns + endloop */
+#define VSTORES_MAX 2
 
 #define TYPE_HEXAGON_CPU "hexagon-cpu"
 
@@ -52,6 +54,13 @@ typedef struct {
     uint64_t data64;
 } MemLog;
 
+typedef struct {
+    target_ulong va;
+    int size;
+    MMVector mask QEMU_ALIGNED(16);
+    MMVector data QEMU_ALIGNED(16);
+} VStoreLog;
+
 #define EXEC_STATUS_OK          0x0000
 #define EXEC_STATUS_STOP        0x0002
 #define EXEC_STATUS_REPLAY      0x0010
@@ -98,7 +107,31 @@ struct CPUHexagonState {
     uint64_t     llsc_val_i64;
 
     target_ulong is_gather_store_insn;
-    target_ulong gather_issued;
+    bool gather_issued;
+
+    MMVector VRegs[NUM_VREGS] QEMU_ALIGNED(16);
+    MMVector future_VRegs[NUM_VREGS] QEMU_ALIGNED(16);
+    MMVector tmp_VRegs[NUM_VREGS] QEMU_ALIGNED(16);
+
+    VRegMask VRegs_updated_tmp;
+    VRegMask VRegs_updated;
+    VRegMask VRegs_select;
+
+    MMQReg QRegs[NUM_QREGS] QEMU_ALIGNED(16);
+    MMQReg future_QRegs[NUM_QREGS] QEMU_ALIGNED(16);
+    QRegMask QRegs_updated;
+
+    /* Temporaries used within instructions */
+    MMVector zero_vector QEMU_ALIGNED(16);
+    MMVectorPair VddV QEMU_ALIGNED(16),
+                 VuuV QEMU_ALIGNED(16),
+                 VvvV QEMU_ALIGNED(16),
+                 VxxV QEMU_ALIGNED(16);
+
+    VStoreLog vstore[VSTORES_MAX];
+    target_ulong vstore_pending[VSTORES_MAX];
+    bool vtcm_pending;
+    VTCMStoreLog vtcm_log;
 };
 
 #define HEXAGON_CPU_CLASS(klass) \
diff --git a/target/hexagon/hex_arch_types.h b/target/hexagon/hex_arch_types.h
index d721e1f..78ad607 100644
--- a/target/hexagon/hex_arch_types.h
+++ b/target/hexagon/hex_arch_types.h
@@ -19,6 +19,7 @@
 #define HEXAGON_ARCH_TYPES_H
 
 #include "qemu/osdep.h"
+#include "mmvec/mmvec.h"
 #include "qemu/int128.h"
 
 /*
@@ -35,4 +36,8 @@ typedef uint64_t    size8u_t;
 typedef int64_t     size8s_t;
 typedef Int128      size16s_t;
 
+typedef MMVector          mmvector_t;
+typedef MMVectorPair      mmvector_pair_t;
+typedef MMQReg            mmqret_t;
+
 #endif
diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h
index 2e34591..3872f90 100644
--- a/target/hexagon/insn.h
+++ b/target/hexagon/insn.h
@@ -67,6 +67,9 @@ struct Packet {
     bool pkt_has_store_s0;
     bool pkt_has_store_s1;
 
+    bool pkt_has_hvx;
+    bool pkt_has_vhist;
+
     Insn insn[INSTRUCTIONS_MAX];
 };
 
diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h
index 6b20aff..82ac304 100644
--- a/target/hexagon/internal.h
+++ b/target/hexagon/internal.h
@@ -31,6 +31,9 @@
 
 int hexagon_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int hexagon_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+
+void hexagon_debug_vreg(CPUHexagonState *env, int regnum);
+void hexagon_debug_qreg(CPUHexagonState *env, int regnum);
 void hexagon_debug(CPUHexagonState *env);
 
 extern const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS];
diff --git a/target/hexagon/mmvec/mmvec.h b/target/hexagon/mmvec/mmvec.h
new file mode 100644
index 0000000..061ccce
--- /dev/null
+++ b/target/hexagon/mmvec/mmvec.h
@@ -0,0 +1,83 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HEXAGON_MMVEC_H
+#define HEXAGON_MMVEC_H
+
+#define MAX_VEC_SIZE_LOGBYTES 7
+#define MAX_VEC_SIZE_BYTES  (1 << MAX_VEC_SIZE_LOGBYTES)
+
+#define NUM_VREGS           32
+#define NUM_QREGS           4
+
+typedef uint32_t VRegMask; /* at least NUM_VREGS bits */
+typedef uint32_t QRegMask; /* at least NUM_QREGS bits */
+
+#define VECTOR_SIZE_BYTE    (fVECSIZE())
+
+typedef union {
+    uint64_t ud[MAX_VEC_SIZE_BYTES / 8];
+    int64_t   d[MAX_VEC_SIZE_BYTES / 8];
+    uint32_t uw[MAX_VEC_SIZE_BYTES / 4];
+    int32_t   w[MAX_VEC_SIZE_BYTES / 4];
+    uint16_t uh[MAX_VEC_SIZE_BYTES / 2];
+    int16_t   h[MAX_VEC_SIZE_BYTES / 2];
+    uint8_t  ub[MAX_VEC_SIZE_BYTES / 1];
+    int8_t    b[MAX_VEC_SIZE_BYTES / 1];
+} MMVector;
+
+typedef union {
+    uint64_t ud[2 * MAX_VEC_SIZE_BYTES / 8];
+    int64_t   d[2 * MAX_VEC_SIZE_BYTES / 8];
+    uint32_t uw[2 * MAX_VEC_SIZE_BYTES / 4];
+    int32_t   w[2 * MAX_VEC_SIZE_BYTES / 4];
+    uint16_t uh[2 * MAX_VEC_SIZE_BYTES / 2];
+    int16_t   h[2 * MAX_VEC_SIZE_BYTES / 2];
+    uint8_t  ub[2 * MAX_VEC_SIZE_BYTES / 1];
+    int8_t    b[2 * MAX_VEC_SIZE_BYTES / 1];
+    MMVector v[2];
+} MMVectorPair;
+
+typedef union {
+    uint64_t ud[MAX_VEC_SIZE_BYTES / 8 / 8];
+    int64_t   d[MAX_VEC_SIZE_BYTES / 8 / 8];
+    uint32_t uw[MAX_VEC_SIZE_BYTES / 4 / 8];
+    int32_t   w[MAX_VEC_SIZE_BYTES / 4 / 8];
+    uint16_t uh[MAX_VEC_SIZE_BYTES / 2 / 8];
+    int16_t   h[MAX_VEC_SIZE_BYTES / 2 / 8];
+    uint8_t  ub[MAX_VEC_SIZE_BYTES / 1 / 8];
+    int8_t    b[MAX_VEC_SIZE_BYTES / 1 / 8];
+} MMQReg;
+
+typedef struct {
+    MMVector data;
+    MMVector mask;
+    int size;
+    target_ulong va[MAX_VEC_SIZE_BYTES];
+    bool op;
+    int op_size;
+} VTCMStoreLog;
+
+
+/* Types of vector register assignment */
+typedef enum {
+    EXT_DFL,      /* Default */
+    EXT_NEW,      /* New - value used in the same packet */
+    EXT_TMP       /* Temp - value used but not stored to register */
+} VRegWriteType;
+
+#endif
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
index 3338365..4c7eaff 100644
--- a/target/hexagon/cpu.c
+++ b/target/hexagon/cpu.c
@@ -59,7 +59,7 @@ const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = {
   "r24", "r25", "r26", "r27", "r28",  "r29", "r30", "r31",
   "sa0", "lc0", "sa1", "lc1", "p3_0", "c5",  "m0",  "m1",
   "usr", "pc",  "ugp", "gp",  "cs0",  "cs1", "c14", "c15",
-  "c16", "c17", "c18", "c19", "pkt_cnt",  "insn_cnt", "c22", "c23",
+  "c16", "c17", "c18", "c19", "pkt_cnt",  "insn_cnt", "hvx_cnt", "c23",
   "c24", "c25", "c26", "c27", "c28",  "c29", "c30", "c31",
 };
 
@@ -113,6 +113,57 @@ static void print_reg(FILE *f, CPUHexagonState *env, int regnum)
                  hexagon_regnames[regnum], value);
 }
 
+static void print_vreg(FILE *f, CPUHexagonState *env, int regnum)
+{
+    bool nonzero_found = false;
+    int i;
+    qemu_fprintf(f, "  v%d = ( ", regnum);
+    qemu_fprintf(f, "0x%02x", env->VRegs[regnum].ub[MAX_VEC_SIZE_BYTES - 1]);
+    for (i = MAX_VEC_SIZE_BYTES - 2; i >= 0; i--) {
+        if (env->VRegs[regnum].ub[i] != 0) {
+            nonzero_found = true;
+            break;
+        }
+    }
+    if (nonzero_found) {
+        for (i = MAX_VEC_SIZE_BYTES - 2; i >= 0; i--) {
+            qemu_fprintf(f, ", 0x%02x", env->VRegs[regnum].ub[i]);
+        }
+    }
+    qemu_fprintf(f, " )\n");
+}
+
+void hexagon_debug_vreg(CPUHexagonState *env, int regnum)
+{
+    print_vreg(stdout, env, regnum);
+}
+
+static void print_qreg(FILE *f, CPUHexagonState *env, int regnum)
+{
+    bool nonzero_found = false;
+    int i;
+    qemu_fprintf(f, "  q%d = ( ", regnum);
+    qemu_fprintf(f, "0x%02x",
+                 env->QRegs[regnum].ub[MAX_VEC_SIZE_BYTES / 8 - 1]);
+    for (i = MAX_VEC_SIZE_BYTES / 8 - 2; i >= 0; i--) {
+        if (env->QRegs[regnum].ub[i] != 0) {
+            nonzero_found = true;
+            break;
+        }
+    }
+    if (nonzero_found) {
+        for (i = MAX_VEC_SIZE_BYTES / 8 - 2; i >= 0; i--) {
+            qemu_fprintf(f, ", 0x%02x", env->QRegs[regnum].ub[i]);
+        }
+    }
+    qemu_fprintf(f, " )\n");
+}
+
+void hexagon_debug_qreg(CPUHexagonState *env, int regnum)
+{
+    print_qreg(stdout, env, regnum);
+}
+
 static void hexagon_dump(CPUHexagonState *env, FILE *f)
 {
     HexagonCPU *cpu = env_archcpu(env);
@@ -159,6 +210,22 @@ static void hexagon_dump(CPUHexagonState *env, FILE *f)
     print_reg(f, env, HEX_REG_CS1);
 #endif
     qemu_fprintf(f, "}\n");
+
+/*
+ * The HVX register dump takes up a ton of space in the log
+ * Don't print it unless it is needed
+ */
+#define DUMP_HVX 0
+#if DUMP_HVX
+    qemu_fprintf(f, "Vector Registers = {\n");
+    for (int i = 0; i < NUM_VREGS; i++) {
+        print_vreg(f, env, i);
+    }
+    for (int i = 0; i < NUM_QREGS; i++) {
+        print_qreg(f, env, i);
+    }
+    qemu_fprintf(f, "}\n");
+#endif
 }
 
 static void hexagon_dump_state(CPUState *cs, FILE *f, int flags)
@@ -211,6 +278,7 @@ static void hexagon_cpu_reset(DeviceState *dev)
 
     set_default_nan_mode(1, &env->fp_status);
     set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status);
+    memset(&env->zero_vector, 0, sizeof(MMVector));
 }
 
 static void hexagon_cpu_disas_set_info(CPUState *s, disassemble_info *info)
@@ -292,7 +360,7 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data)
     cc->set_pc = hexagon_cpu_set_pc;
     cc->gdb_read_register = hexagon_gdb_read_register;
     cc->gdb_write_register = hexagon_gdb_write_register;
-    cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS;
+    cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS + NUM_VREGS + NUM_QREGS;
     cc->gdb_stop_before_watchpoint = true;
     cc->disas_set_info = hexagon_cpu_disas_set_info;
     cc->tcg_ops = &hexagon_tcg_ops;
-- 
2.7.4

Re: [PATCH 02/20] Hexagon HVX (target/hexagon) add Hexagon Vector eXtensions (HVX) to core
Posted by Richard Henderson 4 years, 4 months ago
On 7/5/21 1:34 PM, Taylor Simpson wrote:
> HVX is a set of wide vector instructions.  Machine state includes
>      vector registers (VRegs)
>      vector predicate registers (QRegs)
>      temporary registers for intermediate values
>      store buffer (masked stores and scatter/gather)
> 
> Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
> ---
>   target/hexagon/cpu.h            | 35 ++++++++++++++++-
>   target/hexagon/hex_arch_types.h |  5 +++
>   target/hexagon/insn.h           |  3 ++
>   target/hexagon/internal.h       |  3 ++
>   target/hexagon/mmvec/mmvec.h    | 83 +++++++++++++++++++++++++++++++++++++++++
>   target/hexagon/cpu.c            | 72 ++++++++++++++++++++++++++++++++++-
>   6 files changed, 198 insertions(+), 3 deletions(-)
>   create mode 100644 target/hexagon/mmvec/mmvec.h
> 
> diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
> index 2855dd3..0b377c3 100644
> --- a/target/hexagon/cpu.h
> +++ b/target/hexagon/cpu.h
> @@ -26,6 +26,7 @@ typedef struct CPUHexagonState CPUHexagonState;
>   #include "qemu-common.h"
>   #include "exec/cpu-defs.h"
>   #include "hex_regs.h"
> +#include "mmvec/mmvec.h"
>   
>   #define NUM_PREGS 4
>   #define TOTAL_PER_THREAD_REGS 64
> @@ -34,6 +35,7 @@ typedef struct CPUHexagonState CPUHexagonState;
>   #define STORES_MAX 2
>   #define REG_WRITES_MAX 32
>   #define PRED_WRITES_MAX 5                   /* 4 insns + endloop */
> +#define VSTORES_MAX 2
>   
>   #define TYPE_HEXAGON_CPU "hexagon-cpu"
>   
> @@ -52,6 +54,13 @@ typedef struct {
>       uint64_t data64;
>   } MemLog;
>   
> +typedef struct {
> +    target_ulong va;
> +    int size;
> +    MMVector mask QEMU_ALIGNED(16);
> +    MMVector data QEMU_ALIGNED(16);
> +} VStoreLog;

Do you really need a MMVector mask, or should this be a QRegMask?

> -    target_ulong gather_issued;
> +    bool gather_issued;

Surely unrelated to adding state.

> +    MMVector VRegs[NUM_VREGS] QEMU_ALIGNED(16);
> +    MMVector future_VRegs[NUM_VREGS] QEMU_ALIGNED(16);
> +    MMVector tmp_VRegs[NUM_VREGS] QEMU_ALIGNED(16);

Ok, this is where I'm not keen on how you handle this for integer code, and for vector 
code has got to be past the realm of acceptable.

You have exactly 4 slots in your vliw packet.  You cannot possibly use 32 future or tmp 
slots.  For integers this wastage was at least small, but for vectors these waste just shy 
of 8k.

All you need to do is to be smarter about mapping e.g. 5 to 8 temp slots during translation.

> +    MMQReg QRegs[NUM_QREGS] QEMU_ALIGNED(16);
> +    MMQReg future_QRegs[NUM_QREGS] QEMU_ALIGNED(16);

Likewise.

> +    /* Temporaries used within instructions */
> +    MMVector zero_vector QEMU_ALIGNED(16);

You must be doing something wrong to need zero in memory.

> +/*
> + * The HVX register dump takes up a ton of space in the log
> + * Don't print it unless it is needed
> + */
> +#define DUMP_HVX 0
> +#if DUMP_HVX
> +    qemu_fprintf(f, "Vector Registers = {\n");
> +    for (int i = 0; i < NUM_VREGS; i++) {
> +        print_vreg(f, env, i);
> +    }
> +    for (int i = 0; i < NUM_QREGS; i++) {
> +        print_qreg(f, env, i);
> +    }
> +    qemu_fprintf(f, "}\n");
> +#endif

Use CPU_DUMP_FPU, controlled by -d fpu.

> -    cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS;
> +    cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS + NUM_VREGS + NUM_QREGS;

They're not really core regs though are they?
Surely gdb_register_coprocessor is a better representation.


r~

RE: [PATCH 02/20] Hexagon HVX (target/hexagon) add Hexagon Vector eXtensions (HVX) to core
Posted by Taylor Simpson 4 years, 4 months ago

> -----Original Message-----
> From: Richard Henderson <richard.henderson@linaro.org>
> Sent: Sunday, July 25, 2021 8:08 AM
> To: Taylor Simpson <tsimpson@quicinc.com>; qemu-devel@nongnu.org
> Cc: philmd@redhat.com; ale@rev.ng; Brian Cain <bcain@quicinc.com>;
> peter.maydell@linaro.org
> Subject: Re: [PATCH 02/20] Hexagon HVX (target/hexagon) add Hexagon
> Vector eXtensions (HVX) to core
> 
> On 7/5/21 1:34 PM, Taylor Simpson wrote:
> > HVX is a set of wide vector instructions.  Machine state includes
> >      vector registers (VRegs)
> >      vector predicate registers (QRegs)
> >      temporary registers for intermediate values
> >      store buffer (masked stores and scatter/gather)
> >
> > Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
> > ---
> >   target/hexagon/cpu.h            | 35 ++++++++++++++++-
> >   target/hexagon/hex_arch_types.h |  5 +++
> >   target/hexagon/insn.h           |  3 ++
> >   target/hexagon/internal.h       |  3 ++
> >   target/hexagon/mmvec/mmvec.h    | 83
> +++++++++++++++++++++++++++++++++++++++++
> >   target/hexagon/cpu.c            | 72
> ++++++++++++++++++++++++++++++++++-
> >   6 files changed, 198 insertions(+), 3 deletions(-)
> >   create mode 100644 target/hexagon/mmvec/mmvec.h
> >
> > diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index
> > 2855dd3..0b377c3 100644
> > --- a/target/hexagon/cpu.h
> > +++ b/target/hexagon/cpu.h
> > @@ -26,6 +26,7 @@ typedef struct CPUHexagonState CPUHexagonState;
> >   #include "qemu-common.h"
> >   #include "exec/cpu-defs.h"
> >   #include "hex_regs.h"
> > +#include "mmvec/mmvec.h"
> >
> >   #define NUM_PREGS 4
> >   #define TOTAL_PER_THREAD_REGS 64
> > @@ -34,6 +35,7 @@ typedef struct CPUHexagonState CPUHexagonState;
> >   #define STORES_MAX 2
> >   #define REG_WRITES_MAX 32
> >   #define PRED_WRITES_MAX 5                   /* 4 insns + endloop */
> > +#define VSTORES_MAX 2
> >
> >   #define TYPE_HEXAGON_CPU "hexagon-cpu"
> >
> > @@ -52,6 +54,13 @@ typedef struct {
> >       uint64_t data64;
> >   } MemLog;
> >
> > +typedef struct {
> > +    target_ulong va;
> > +    int size;
> > +    MMVector mask QEMU_ALIGNED(16);
> > +    MMVector data QEMU_ALIGNED(16);
> > +} VStoreLog;
> 
> Do you really need a MMVector mask, or should this be a QRegMask?

You are correct.  I'll change this.

> 
> > -    target_ulong gather_issued;
> > +    bool gather_issued;
> 
> Surely unrelated to adding state.

This was unintentionally included in the patch series for the scalar core.  Based on previous feedback, I know it should be a bool.  However, this can actually be removed altogether.  So, in the next iteration of this series, you'll see it removed.

> 
> > +    MMVector VRegs[NUM_VREGS] QEMU_ALIGNED(16);
> > +    MMVector future_VRegs[NUM_VREGS] QEMU_ALIGNED(16);
> > +    MMVector tmp_VRegs[NUM_VREGS] QEMU_ALIGNED(16);
> 
> Ok, this is where I'm not keen on how you handle this for integer code, and
> for vector code has got to be past the realm of acceptable.
> 
> You have exactly 4 slots in your vliw packet.  You cannot possibly use 32
> future or tmp slots.  For integers this wastage was at least small, but for
> vectors these waste just shy of 8k.
> 
> All you need to do is to be smarter about mapping e.g. 5 to 8 temp slots
> during translation.

OK

> 
> > +    MMQReg QRegs[NUM_QREGS] QEMU_ALIGNED(16);
> > +    MMQReg future_QRegs[NUM_QREGS] QEMU_ALIGNED(16);
> 
> Likewise.
> 
> > +    /* Temporaries used within instructions */
> > +    MMVector zero_vector QEMU_ALIGNED(16);
> 
> You must be doing something wrong to need zero in memory.

The architecture specifies that if you use a .new in a packet where the vector register isn't  defined, it gets zero.  So, the generator produces the following for .new references
    const intptr_t OsN_off =
        test_bit(OsN_num, ctx->vregs_updated)
            ? offsetof(CPUHexagonState, future_VRegs[OsN_num])
            : offsetof(CPUHexagonState, zero_vector);

> 
> > +/*
> > + * The HVX register dump takes up a ton of space in the log
> > + * Don't print it unless it is needed  */ #define DUMP_HVX 0 #if
> > +DUMP_HVX
> > +    qemu_fprintf(f, "Vector Registers = {\n");
> > +    for (int i = 0; i < NUM_VREGS; i++) {
> > +        print_vreg(f, env, i);
> > +    }
> > +    for (int i = 0; i < NUM_QREGS; i++) {
> > +        print_qreg(f, env, i);
> > +    }
> > +    qemu_fprintf(f, "}\n");
> > +#endif
> 
> Use CPU_DUMP_FPU, controlled by -d fpu.

These aren't FP registers, but OK.


> 
> > -    cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS;
> > +    cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS + NUM_VREGS
> +
> > + NUM_QREGS;
> 
> They're not really core regs though are they?
> Surely gdb_register_coprocessor is a better representation.

I'll look into this.


Thanks,
Taylor

RE: [PATCH 02/20] Hexagon HVX (target/hexagon) add Hexagon Vector eXtensions (HVX) to core
Posted by Taylor Simpson 4 years, 4 months ago

> -----Original Message-----
> From: Taylor Simpson
> Sent: Sunday, July 25, 2021 10:02 PM
> To: Richard Henderson <richard.henderson@linaro.org>; qemu-
> devel@nongnu.org
> Cc: philmd@redhat.com; ale@rev.ng; Brian Cain <bcain@quicinc.com>;
> peter.maydell@linaro.org
> Subject: RE: [PATCH 02/20] Hexagon HVX (target/hexagon) add Hexagon
> Vector eXtensions (HVX) to core
> 
> 
> 
> > -----Original Message-----
> > From: Richard Henderson <richard.henderson@linaro.org>
> > Sent: Sunday, July 25, 2021 8:08 AM
> > To: Taylor Simpson <tsimpson@quicinc.com>; qemu-devel@nongnu.org
> > Cc: philmd@redhat.com; ale@rev.ng; Brian Cain <bcain@quicinc.com>;
> > peter.maydell@linaro.org
> > Subject: Re: [PATCH 02/20] Hexagon HVX (target/hexagon) add Hexagon
> > Vector eXtensions (HVX) to core
> >


> > > +    /* Temporaries used within instructions */
> > > +    MMVector zero_vector QEMU_ALIGNED(16);
> >
> > You must be doing something wrong to need zero in memory.
> 
> The architecture specifies that if you use a .new in a packet where the vector
> register isn't  defined, it gets zero.  So, the generator produces the following
> for .new references
>     const intptr_t OsN_off =
>         test_bit(OsN_num, ctx->vregs_updated)
>             ? offsetof(CPUHexagonState, future_VRegs[OsN_num])
>             : offsetof(CPUHexagonState, zero_vector);

Correction - the architecture does NOT allow a .new where the vector register isn't defined.

This code is being overly cautious, so I'll change it to always return future_VRegs and remove zero_vector from CPUHexagonState.

Thanks,
Taylor