[PATCH 12/39] target/hexagon: Add implementation of cycle counters

Brian Cain posted 39 patches 1 month ago
Only 37 patches received!
[PATCH 12/39] target/hexagon: Add implementation of cycle counters
Posted by Brian Cain 1 month ago
From: Brian Cain <bcain@quicinc.com>

Co-authored-by: Sid Manning <sidneym@quicinc.com>
Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com>
---
 target/hexagon/cpu.h        | 25 ++++++++++++++++++++++---
 target/hexagon/translate.h  |  2 ++
 target/hexagon/cpu_helper.c | 12 +++++++++---
 target/hexagon/translate.c  | 27 +++++++++++++++++++++++++++
 4 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 4b9c9873dc..7e2ea838c5 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -27,11 +27,15 @@
 
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
+#include "exec/cpu-common.h"
 #include "hex_regs.h"
 #include "mmvec/mmvec.h"
 #include "hw/registerfields.h"
 
+#ifndef CONFIG_USER_ONLY
+#include "reg_fields.h"
 typedef struct CPUHexagonTLBContext CPUHexagonTLBContext;
+#endif
 
 #define NUM_PREGS 4
 #define TOTAL_PER_THREAD_REGS 64
@@ -188,6 +192,7 @@ struct ArchCPU {
 
 FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1)
 FIELD(TB_FLAGS, MMU_INDEX, 1, 3)
+FIELD(TB_FLAGS, PCYCLE_ENABLED, 4, 1)
 
 G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env,
                                             uint32_t exception,
@@ -201,6 +206,11 @@ void hexagon_cpu_soft_reset(CPUHexagonState *env);
 #endif
 
 #include "exec/cpu-all.h"
+
+#ifndef CONFIG_USER_ONLY
+#include "cpu_helper.h"
+#endif
+
 static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
                                         uint64_t *cs_base, uint32_t *flags)
 {
@@ -210,16 +220,27 @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
     if (*pc == env->gpr[HEX_REG_SA0]) {
         hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP, 1);
     }
-    *flags = hex_flags;
     if (*pc & PCALIGN_MASK) {
         hexagon_raise_exception_err(env, HEX_CAUSE_PC_NOT_ALIGNED, 0);
     }
 #ifndef CONFIG_USER_ONLY
+    target_ulong syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
+
+    bool pcycle_enabled = extract32(syscfg,
+                                    reg_field_info[SYSCFG_PCYCLEEN].offset,
+                                    reg_field_info[SYSCFG_PCYCLEEN].width);
+
     hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX,
                            cpu_mmu_index(env_cpu(env), false));
+
+    if (pcycle_enabled) {
+        hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, 1);
+    }
 #else
+    hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, true);
     hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX, MMU_USER_IDX);
 #endif
+    *flags = hex_flags;
 }
 
 typedef HexagonCPU ArchCPU;
@@ -228,6 +249,4 @@ void hexagon_translate_init(void);
 void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
                             int *max_insns, vaddr pc, void *host_pc);
 
-#include "exec/cpu-all.h"
-
 #endif /* HEXAGON_CPU_H */
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 0eaa3db03e..9bc4b3ce8b 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -83,6 +83,8 @@ typedef struct DisasContext {
     TCGv new_pred_value[NUM_PREGS];
     TCGv branch_taken;
     TCGv dczero_addr;
+    bool pcycle_enabled;
+    uint32_t num_cycles;
 } DisasContext;
 
 bool is_gather_store_insn(DisasContext *ctx);
diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c
index 0b0802bfb9..1d9b9f8bef 100644
--- a/target/hexagon/cpu_helper.c
+++ b/target/hexagon/cpu_helper.c
@@ -48,17 +48,23 @@ uint32_t arch_get_system_reg(CPUHexagonState *env, uint32_t reg)
 
 uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env)
 {
-    g_assert_not_reached();
+    uint64_t cycles = 0;
+    CPUState *cs;
+    CPU_FOREACH(cs) {
+        CPUHexagonState *env_ = cpu_env(cs);
+        cycles += env_->t_cycle_count;
+    }
+    return *(env->g_pcycle_base) + cycles;
 }
 
 uint32_t hexagon_get_sys_pcycle_count_high(CPUHexagonState *env)
 {
-    g_assert_not_reached();
+    return hexagon_get_sys_pcycle_count(env) >> 32;
 }
 
 uint32_t hexagon_get_sys_pcycle_count_low(CPUHexagonState *env)
 {
-    g_assert_not_reached();
+    return extract64(hexagon_get_sys_pcycle_count(env), 0, 32);
 }
 
 void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env,
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 9119e42ff7..060df6e5eb 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -57,6 +57,7 @@ TCGv_i64 hex_store_val64[STORES_MAX];
 TCGv hex_llsc_addr;
 TCGv hex_llsc_val;
 TCGv_i64 hex_llsc_val_i64;
+TCGv_i64 hex_cycle_count;
 TCGv hex_vstore_addr[VSTORES_MAX];
 TCGv hex_vstore_size[VSTORES_MAX];
 TCGv hex_vstore_pending[VSTORES_MAX];
@@ -125,6 +126,22 @@ static void gen_exception_raw(int excp)
     gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp));
 }
 
+#ifndef CONFIG_USER_ONLY
+static inline void gen_precise_exception(int excp, target_ulong PC)
+{
+    tcg_gen_movi_tl(hex_cause_code, excp);
+    gen_exception(HEX_EVENT_PRECISE, PC);
+}
+
+static inline void gen_pcycle_counters(DisasContext *ctx)
+{
+    if (ctx->pcycle_enabled) {
+        tcg_gen_addi_i64(hex_cycle_count, hex_cycle_count, ctx->num_cycles);
+        ctx->num_cycles = 0;
+    }
+}
+#endif
+
 static void gen_exec_counters(DisasContext *ctx)
 {
     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
@@ -133,6 +150,10 @@ static void gen_exec_counters(DisasContext *ctx)
                     hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
     tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
                     hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
+
+#ifndef CONFIG_USER_ONLY
+   gen_pcycle_counters(ctx);
+#endif
 }
 
 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
@@ -785,6 +806,7 @@ static void gen_commit_hvx(DisasContext *ctx)
     }
 }
 
+static const int PCYCLES_PER_PACKET = 3;
 static void update_exec_counters(DisasContext *ctx)
 {
     Packet *pkt = ctx->pkt;
@@ -804,6 +826,7 @@ static void update_exec_counters(DisasContext *ctx)
     }
 
     ctx->num_packets++;
+    ctx->num_cycles += PCYCLES_PER_PACKET;
     ctx->num_insns += num_real_insns;
     ctx->num_hvx_insns += num_hvx_insns;
 }
@@ -946,11 +969,13 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
 
     ctx->mem_idx = FIELD_EX32(hex_flags, TB_FLAGS, MMU_INDEX);
     ctx->num_packets = 0;
+    ctx->num_cycles = 0;
     ctx->num_insns = 0;
     ctx->num_hvx_insns = 0;
     ctx->branch_cond = TCG_COND_NEVER;
     ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
     ctx->short_circuit = hex_cpu->short_circuit;
+    ctx->pcycle_enabled = FIELD_EX32(hex_flags, TB_FLAGS, PCYCLE_ENABLED);
 }
 
 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
@@ -1077,6 +1102,8 @@ void hexagon_translate_init(void)
         offsetof(CPUHexagonState, llsc_val), "llsc_val");
     hex_llsc_val_i64 = tcg_global_mem_new_i64(tcg_env,
         offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
+    hex_cycle_count = tcg_global_mem_new_i64(tcg_env,
+            offsetof(CPUHexagonState, t_cycle_count), "t_cycle_count");
     for (i = 0; i < STORES_MAX; i++) {
         snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
         hex_store_addr[i] = tcg_global_mem_new(tcg_env,
-- 
2.34.1

RE: [PATCH 12/39] target/hexagon: Add implementation of cycle counters
Posted by ltaylorsimpson@gmail.com 2 weeks ago

> -----Original Message-----
> From: Brian Cain <brian.cain@oss.qualcomm.com>
> Sent: Friday, February 28, 2025 11:28 PM
> To: qemu-devel@nongnu.org
> Cc: brian.cain@oss.qualcomm.com; richard.henderson@linaro.org;
> philmd@linaro.org; quic_mathbern@quicinc.com; ale@rev.ng; anjo@rev.ng;
> quic_mliebel@quicinc.com; ltaylorsimpson@gmail.com;
> alex.bennee@linaro.org; quic_mburton@quicinc.com;
> sidneym@quicinc.com; Brian Cain <bcain@quicinc.com>
> Subject: [PATCH 12/39] target/hexagon: Add implementation of cycle
> counters
> 
> From: Brian Cain <bcain@quicinc.com>
> 
> Co-authored-by: Sid Manning <sidneym@quicinc.com>
> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com>
> ---
>  target/hexagon/cpu.h        | 25 ++++++++++++++++++++++---
>  target/hexagon/translate.h  |  2 ++
>  target/hexagon/cpu_helper.c | 12 +++++++++---
> target/hexagon/translate.c  | 27 +++++++++++++++++++++++++++
>  4 files changed, 60 insertions(+), 6 deletions(-)
> 
> diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index
> 4b9c9873dc..7e2ea838c5 100644
> --- a/target/hexagon/cpu.h
> +++ b/target/hexagon/cpu.h
> @@ -27,11 +27,15 @@
> 
>  #include "cpu-qom.h"
>  #include "exec/cpu-defs.h"
> +#include "exec/cpu-common.h"
>  #include "hex_regs.h"
>  #include "mmvec/mmvec.h"
>  #include "hw/registerfields.h"
> 
> +#ifndef CONFIG_USER_ONLY
> +#include "reg_fields.h"
>  typedef struct CPUHexagonTLBContext CPUHexagonTLBContext;
> +#endif

Why is reg_fields.h guarded by #ifndef CONFIG_USER_ONLY?

Also, why wasn't the CPUHexagonTLBContext guarded when it was first inserted?

> 
>  #define NUM_PREGS 4
>  #define TOTAL_PER_THREAD_REGS 64
> @@ -188,6 +192,7 @@ struct ArchCPU {
> 
>  FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1)
>  FIELD(TB_FLAGS, MMU_INDEX, 1, 3)
> +FIELD(TB_FLAGS, PCYCLE_ENABLED, 4, 1)
> 
>  G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env,
>                                              uint32_t exception, @@ -201,6 +206,11 @@ void
> hexagon_cpu_soft_reset(CPUHexagonState *env);  #endif
> 
>  #include "exec/cpu-all.h"
> +
> +#ifndef CONFIG_USER_ONLY
> +#include "cpu_helper.h"
> +#endif
> +
>  static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
>                                          uint64_t *cs_base, uint32_t *flags)  { @@ -210,16
> +220,27 @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState
> *env, vaddr *pc,
>      if (*pc == env->gpr[HEX_REG_SA0]) {
>          hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP, 1);
>      }
> -    *flags = hex_flags;
>      if (*pc & PCALIGN_MASK) {
>          hexagon_raise_exception_err(env, HEX_CAUSE_PC_NOT_ALIGNED, 0);
>      }
>  #ifndef CONFIG_USER_ONLY
> +    target_ulong syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
> +
> +    bool pcycle_enabled = extract32(syscfg,
> +                                    reg_field_info[SYSCFG_PCYCLEEN].offset,
> +
> + reg_field_info[SYSCFG_PCYCLEEN].width);
> +
>      hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX,
>                             cpu_mmu_index(env_cpu(env), false));
> +
> +    if (pcycle_enabled) {
> +        hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, 1);
> +    }
>  #else
> +    hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, true);

Are pcycles exposed in linux-user mode?  If not, make this flag system-mode only. 

>      hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX,
> MMU_USER_IDX);  #endif
> +    *flags = hex_flags;
>  }
> 
>  typedef HexagonCPU ArchCPU;
> @@ -228,6 +249,4 @@ void hexagon_translate_init(void);  void
> hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
>                              int *max_insns, vaddr pc, void *host_pc);
> 
> -#include "exec/cpu-all.h"
> -
>  #endif /* HEXAGON_CPU_H */
> diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index
> 0eaa3db03e..9bc4b3ce8b 100644
> --- a/target/hexagon/translate.h
> +++ b/target/hexagon/translate.h
> @@ -83,6 +83,8 @@ typedef struct DisasContext {
>      TCGv new_pred_value[NUM_PREGS];
>      TCGv branch_taken;
>      TCGv dczero_addr;
> +    bool pcycle_enabled;

Guard with #ifndef CONFIG_USER_ONLY

> +    uint32_t num_cycles;
>  } DisasContext;
> 
>  bool is_gather_store_insn(DisasContext *ctx); diff --git
> a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index
> 0b0802bfb9..1d9b9f8bef 100644
> --- a/target/hexagon/cpu_helper.c
> +++ b/target/hexagon/cpu_helper.c
> @@ -48,17 +48,23 @@ uint32_t arch_get_system_reg(CPUHexagonState
> *env, uint32_t reg)
> 
>  uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env)  {
> -    g_assert_not_reached();

Do we need a lock here?

> +    uint64_t cycles = 0;
> +    CPUState *cs;
> +    CPU_FOREACH(cs) {
> +        CPUHexagonState *env_ = cpu_env(cs);
> +        cycles += env_->t_cycle_count;
> +    }
> +    return *(env->g_pcycle_base) + cycles;
>  }
> 
>  uint32_t hexagon_get_sys_pcycle_count_high(CPUHexagonState *env)  {
> -    g_assert_not_reached();
> +    return hexagon_get_sys_pcycle_count(env) >> 32;
>  }
> 
>  uint32_t hexagon_get_sys_pcycle_count_low(CPUHexagonState *env)  {
> -    g_assert_not_reached();
> +    return extract64(hexagon_get_sys_pcycle_count(env), 0, 32);
>  }
> 
>  void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, diff --git
> a/target/hexagon/translate.c b/target/hexagon/translate.c index
> 9119e42ff7..060df6e5eb 100644
> --- a/target/hexagon/translate.c
> +++ b/target/hexagon/translate.c
> @@ -57,6 +57,7 @@ TCGv_i64 hex_store_val64[STORES_MAX];  TCGv
> hex_llsc_addr;  TCGv hex_llsc_val;
>  TCGv_i64 hex_llsc_val_i64;
> +TCGv_i64 hex_cycle_count;

Guard with #ifndef CONFIG_USER_ONLY

>  TCGv hex_vstore_addr[VSTORES_MAX];
>  TCGv hex_vstore_size[VSTORES_MAX];
>  TCGv hex_vstore_pending[VSTORES_MAX];
> @@ -125,6 +126,22 @@ static void gen_exception_raw(int excp)
>      gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp));  }
> 
> +#ifndef CONFIG_USER_ONLY
> +static inline void gen_precise_exception(int excp, target_ulong PC) {
> +    tcg_gen_movi_tl(hex_cause_code, excp);
> +    gen_exception(HEX_EVENT_PRECISE, PC); }

Belongs in a different patch.

> +
> +static inline void gen_pcycle_counters(DisasContext *ctx) {
> +    if (ctx->pcycle_enabled) {
> +        tcg_gen_addi_i64(hex_cycle_count, hex_cycle_count, ctx-
> >num_cycles);
> +        ctx->num_cycles = 0;
> +    }
> +}
> +#endif
> +
>  static void gen_exec_counters(DisasContext *ctx)  {
>      tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
> @@ -133,6 +150,10 @@ static void gen_exec_counters(DisasContext *ctx)
>                      hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
>      tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
>                      hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
> +
> +#ifndef CONFIG_USER_ONLY
> +   gen_pcycle_counters(ctx);
> +#endif
>  }
> 
>  static bool use_goto_tb(DisasContext *ctx, target_ulong dest) @@ -785,6
> +806,7 @@ static void gen_commit_hvx(DisasContext *ctx)
>      }
>  }
> 
> +static const int PCYCLES_PER_PACKET = 3;
>  static void update_exec_counters(DisasContext *ctx)  {
>      Packet *pkt = ctx->pkt;
> @@ -804,6 +826,7 @@ static void update_exec_counters(DisasContext *ctx)
>      }
> 
>      ctx->num_packets++;
> +    ctx->num_cycles += PCYCLES_PER_PACKET;

Guard

>      ctx->num_insns += num_real_insns;
>      ctx->num_hvx_insns += num_hvx_insns;  } @@ -946,11 +969,13 @@ static
> void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
> 
>      ctx->mem_idx = FIELD_EX32(hex_flags, TB_FLAGS, MMU_INDEX);
>      ctx->num_packets = 0;
> +    ctx->num_cycles = 0;

Guard

>      ctx->num_insns = 0;
>      ctx->num_hvx_insns = 0;
>      ctx->branch_cond = TCG_COND_NEVER;
>      ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
>      ctx->short_circuit = hex_cpu->short_circuit;
> +    ctx->pcycle_enabled = FIELD_EX32(hex_flags, TB_FLAGS,
> + PCYCLE_ENABLED);

Guard

>  }
> 
>  static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@
> -1077,6 +1102,8 @@ void hexagon_translate_init(void)
>          offsetof(CPUHexagonState, llsc_val), "llsc_val");
>      hex_llsc_val_i64 = tcg_global_mem_new_i64(tcg_env,
>          offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
> +    hex_cycle_count = tcg_global_mem_new_i64(tcg_env,
> +            offsetof(CPUHexagonState, t_cycle_count), "t_cycle_count");

Guard

>      for (i = 0; i < STORES_MAX; i++) {
>          snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
>          hex_store_addr[i] = tcg_global_mem_new(tcg_env,
> --
> 2.34.1
Re: [PATCH 12/39] target/hexagon: Add implementation of cycle counters
Posted by Brian Cain 1 day, 9 hours ago
On 3/19/2025 2:50 PM, ltaylorsimpson@gmail.com wrote:
>
>> -----Original Message-----
>> From: Brian Cain <brian.cain@oss.qualcomm.com>
>> Sent: Friday, February 28, 2025 11:28 PM
>> To: qemu-devel@nongnu.org
>> Cc: brian.cain@oss.qualcomm.com; richard.henderson@linaro.org;
>> philmd@linaro.org; quic_mathbern@quicinc.com; ale@rev.ng; anjo@rev.ng;
>> quic_mliebel@quicinc.com; ltaylorsimpson@gmail.com;
>> alex.bennee@linaro.org; quic_mburton@quicinc.com;
>> sidneym@quicinc.com; Brian Cain <bcain@quicinc.com>
>> Subject: [PATCH 12/39] target/hexagon: Add implementation of cycle
>> counters
>>
>> From: Brian Cain <bcain@quicinc.com>
>>
>> Co-authored-by: Sid Manning <sidneym@quicinc.com>
>> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com>
>> ---
>>   target/hexagon/cpu.h        | 25 ++++++++++++++++++++++---
>>   target/hexagon/translate.h  |  2 ++
>>   target/hexagon/cpu_helper.c | 12 +++++++++---
>> target/hexagon/translate.c  | 27 +++++++++++++++++++++++++++
>>   4 files changed, 60 insertions(+), 6 deletions(-)
>>
>> diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index
>> 4b9c9873dc..7e2ea838c5 100644
>> --- a/target/hexagon/cpu.h
>> +++ b/target/hexagon/cpu.h
>> @@ -27,11 +27,15 @@
>>
>>   #include "cpu-qom.h"
>>   #include "exec/cpu-defs.h"
>> +#include "exec/cpu-common.h"
>>   #include "hex_regs.h"
>>   #include "mmvec/mmvec.h"
>>   #include "hw/registerfields.h"
>>
>> +#ifndef CONFIG_USER_ONLY
>> +#include "reg_fields.h"
>>   typedef struct CPUHexagonTLBContext CPUHexagonTLBContext;
>> +#endif
> Why is reg_fields.h guarded by #ifndef CONFIG_USER_ONLY?


It's to get syscfg field definitions like "SYSCFG_PCYCLEEN".  We can 
move it to a more general place, though.


> Also, why wasn't the CPUHexagonTLBContext guarded when it was first inserted?


It should have been.  I will fix this.


>>   #define NUM_PREGS 4
>>   #define TOTAL_PER_THREAD_REGS 64
>> @@ -188,6 +192,7 @@ struct ArchCPU {
>>
>>   FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1)
>>   FIELD(TB_FLAGS, MMU_INDEX, 1, 3)
>> +FIELD(TB_FLAGS, PCYCLE_ENABLED, 4, 1)
>>
>>   G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env,
>>                                               uint32_t exception, @@ -201,6 +206,11 @@ void
>> hexagon_cpu_soft_reset(CPUHexagonState *env);  #endif
>>
>>   #include "exec/cpu-all.h"
>> +
>> +#ifndef CONFIG_USER_ONLY
>> +#include "cpu_helper.h"
>> +#endif
>> +
>>   static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
>>                                           uint64_t *cs_base, uint32_t *flags)  { @@ -210,16
>> +220,27 @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState
>> *env, vaddr *pc,
>>       if (*pc == env->gpr[HEX_REG_SA0]) {
>>           hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP, 1);
>>       }
>> -    *flags = hex_flags;
>>       if (*pc & PCALIGN_MASK) {
>>           hexagon_raise_exception_err(env, HEX_CAUSE_PC_NOT_ALIGNED, 0);
>>       }
>>   #ifndef CONFIG_USER_ONLY
>> +    target_ulong syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
>> +
>> +    bool pcycle_enabled = extract32(syscfg,
>> +                                    reg_field_info[SYSCFG_PCYCLEEN].offset,
>> +
>> + reg_field_info[SYSCFG_PCYCLEEN].width);
>> +
>>       hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX,
>>                              cpu_mmu_index(env_cpu(env), false));
>> +
>> +    if (pcycle_enabled) {
>> +        hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, 1);
>> +    }
>>   #else
>> +    hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, true);
> Are pcycles exposed in linux-user mode?  If not, make this flag system-mode only.


Yes, they are (for the "upcycle" registers).


>>       hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX,
>> MMU_USER_IDX);  #endif
>> +    *flags = hex_flags;
>>   }
>>
>>   typedef HexagonCPU ArchCPU;
>> @@ -228,6 +249,4 @@ void hexagon_translate_init(void);  void
>> hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
>>                               int *max_insns, vaddr pc, void *host_pc);
>>
>> -#include "exec/cpu-all.h"
>> -
>>   #endif /* HEXAGON_CPU_H */
>> diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index
>> 0eaa3db03e..9bc4b3ce8b 100644
>> --- a/target/hexagon/translate.h
>> +++ b/target/hexagon/translate.h
>> @@ -83,6 +83,8 @@ typedef struct DisasContext {
>>       TCGv new_pred_value[NUM_PREGS];
>>       TCGv branch_taken;
>>       TCGv dczero_addr;
>> +    bool pcycle_enabled;
> Guard with #ifndef CONFIG_USER_ONLY
>
>> +    uint32_t num_cycles;
>>   } DisasContext;
>>
>>   bool is_gather_store_insn(DisasContext *ctx); diff --git
>> a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index
>> 0b0802bfb9..1d9b9f8bef 100644
>> --- a/target/hexagon/cpu_helper.c
>> +++ b/target/hexagon/cpu_helper.c
>> @@ -48,17 +48,23 @@ uint32_t arch_get_system_reg(CPUHexagonState
>> *env, uint32_t reg)
>>
>>   uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env)  {
>> -    g_assert_not_reached();
> Do we need a lock here?


I didn't think we did.  But now that you mention it, we may indeed.


>> +    uint64_t cycles = 0;
>> +    CPUState *cs;
>> +    CPU_FOREACH(cs) {
>> +        CPUHexagonState *env_ = cpu_env(cs);
>> +        cycles += env_->t_cycle_count;
>> +    }
>> +    return *(env->g_pcycle_base) + cycles;
>>   }
>>
>>   uint32_t hexagon_get_sys_pcycle_count_high(CPUHexagonState *env)  {
>> -    g_assert_not_reached();
>> +    return hexagon_get_sys_pcycle_count(env) >> 32;
>>   }
>>
>>   uint32_t hexagon_get_sys_pcycle_count_low(CPUHexagonState *env)  {
>> -    g_assert_not_reached();
>> +    return extract64(hexagon_get_sys_pcycle_count(env), 0, 32);
>>   }
>>
>>   void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, diff --git
>> a/target/hexagon/translate.c b/target/hexagon/translate.c index
>> 9119e42ff7..060df6e5eb 100644
>> --- a/target/hexagon/translate.c
>> +++ b/target/hexagon/translate.c
>> @@ -57,6 +57,7 @@ TCGv_i64 hex_store_val64[STORES_MAX];  TCGv
>> hex_llsc_addr;  TCGv hex_llsc_val;
>>   TCGv_i64 hex_llsc_val_i64;
>> +TCGv_i64 hex_cycle_count;
> Guard with #ifndef CONFIG_USER_ONLY


This feature belongs to both usermode and system emulation.


>>   TCGv hex_vstore_addr[VSTORES_MAX];
>>   TCGv hex_vstore_size[VSTORES_MAX];
>>   TCGv hex_vstore_pending[VSTORES_MAX];
>> @@ -125,6 +126,22 @@ static void gen_exception_raw(int excp)
>>       gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp));  }
>>
>> +#ifndef CONFIG_USER_ONLY
>> +static inline void gen_precise_exception(int excp, target_ulong PC) {
>> +    tcg_gen_movi_tl(hex_cause_code, excp);
>> +    gen_exception(HEX_EVENT_PRECISE, PC); }
> Belongs in a different patch.


I will fix this.


>> +
>> +static inline void gen_pcycle_counters(DisasContext *ctx) {
>> +    if (ctx->pcycle_enabled) {
>> +        tcg_gen_addi_i64(hex_cycle_count, hex_cycle_count, ctx-
>>> num_cycles);
>> +        ctx->num_cycles = 0;
>> +    }
>> +}
>> +#endif
>> +
>>   static void gen_exec_counters(DisasContext *ctx)  {
>>       tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
>> @@ -133,6 +150,10 @@ static void gen_exec_counters(DisasContext *ctx)
>>                       hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
>>       tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
>>                       hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
>> +
>> +#ifndef CONFIG_USER_ONLY
>> +   gen_pcycle_counters(ctx);
>> +#endif
>>   }
>>
>>   static bool use_goto_tb(DisasContext *ctx, target_ulong dest) @@ -785,6
>> +806,7 @@ static void gen_commit_hvx(DisasContext *ctx)
>>       }
>>   }
>>
>> +static const int PCYCLES_PER_PACKET = 3;
>>   static void update_exec_counters(DisasContext *ctx)  {
>>       Packet *pkt = ctx->pkt;
>> @@ -804,6 +826,7 @@ static void update_exec_counters(DisasContext *ctx)
>>       }
>>
>>       ctx->num_packets++;
>> +    ctx->num_cycles += PCYCLES_PER_PACKET;
> Guard
>
>>       ctx->num_insns += num_real_insns;
>>       ctx->num_hvx_insns += num_hvx_insns;  } @@ -946,11 +969,13 @@ static
>> void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
>>
>>       ctx->mem_idx = FIELD_EX32(hex_flags, TB_FLAGS, MMU_INDEX);
>>       ctx->num_packets = 0;
>> +    ctx->num_cycles = 0;
> Guard
>
>>       ctx->num_insns = 0;
>>       ctx->num_hvx_insns = 0;
>>       ctx->branch_cond = TCG_COND_NEVER;
>>       ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
>>       ctx->short_circuit = hex_cpu->short_circuit;
>> +    ctx->pcycle_enabled = FIELD_EX32(hex_flags, TB_FLAGS,
>> + PCYCLE_ENABLED);
> Guard
>
>>   }
>>
>>   static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@
>> -1077,6 +1102,8 @@ void hexagon_translate_init(void)
>>           offsetof(CPUHexagonState, llsc_val), "llsc_val");
>>       hex_llsc_val_i64 = tcg_global_mem_new_i64(tcg_env,
>>           offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
>> +    hex_cycle_count = tcg_global_mem_new_i64(tcg_env,
>> +            offsetof(CPUHexagonState, t_cycle_count), "t_cycle_count");
> Guard


These will remain unguarded as referenced above - it's a general feature.


>>       for (i = 0; i < STORES_MAX; i++) {
>>           snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
>>           hex_store_addr[i] = tcg_global_mem_new(tcg_env,
>> --
>> 2.34.1
>