[PATCH v7 10/11] target/riscv: More accurately model priv mode filtering.

Atish Patra posted 11 patches 5 months ago
Maintainers: Palmer Dabbelt <palmer@dabbelt.com>, Alistair Francis <alistair.francis@wdc.com>, Bin Meng <bmeng.cn@gmail.com>, Weiwei Li <liwei1518@gmail.com>, Daniel Henrique Barboza <dbarboza@ventanamicro.com>, Liu Zhiwei <zhiwei_liu@linux.alibaba.com>
There is a newer version of this series
[PATCH v7 10/11] target/riscv: More accurately model priv mode filtering.
Posted by Atish Patra 5 months ago
From: Rajnesh Kanwal <rkanwal@rivosinc.com>

In case of programmable counters configured to count inst/cycles
we often end-up with counter not incrementing at all from kernel's
perspective.

For example:
- Kernel configures hpm3 to count instructions and sets hpmcounter
  to -10000 and all modes except U mode are inhibited.
- In QEMU we configure a timer to expire after ~10000 instructions.
- Problem is, it's often the case that kernel might not even schedule
  Umode task and we hit the timer callback in QEMU.
- In the timer callback we inject the interrupt into kernel, kernel
  runs the handler and reads hpmcounter3 value.
- Given QEMU maintains individual counters to count for each privilege
  mode, and given umode never ran, the umode counter didn't increment
  and QEMU returns same value as was programmed by the kernel when
  starting the counter.
- Kernel checks for overflow using previous and current value of the
  counter and reprograms the counter given there wasn't an overflow
  as per the counter value. (Which itself is a problem. We have QEMU
  telling kernel that counter3 overflowed but the counter value
  returned by QEMU doesn't seem to reflect that.).

This change makes sure that timer is reprogrammed from the handler
if the counter didn't overflow based on the counter value.

Second, this change makes sure that whenever the counter is read,
it's value is updated to reflect the latest count.

Signed-off-by: Rajnesh Kanwal <rkanwal@rivosinc.com>
---
 target/riscv/csr.c |  5 ++++-
 target/riscv/pmu.c | 30 +++++++++++++++++++++++++++---
 target/riscv/pmu.h |  2 ++
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 150e02f080ec..91172b90e000 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -970,6 +970,9 @@ static target_ulong riscv_pmu_ctr_get_fixed_counters_val(CPURISCVState *env,
         goto done;
     }
 
+    /* Update counter before reading. */
+    riscv_pmu_update_fixed_ctrs(env, env->priv, env->virt_enabled);
+
     if (!(cfg_val & MCYCLECFG_BIT_MINH)) {
         curr_val += counter_arr[PRV_M];
     }
@@ -1053,7 +1056,7 @@ static RISCVException write_mhpmcounterh(CPURISCVState *env, int csrno,
     return RISCV_EXCP_NONE;
 }
 
-static RISCVException riscv_pmu_read_ctr(CPURISCVState *env, target_ulong *val,
+RISCVException riscv_pmu_read_ctr(CPURISCVState *env, target_ulong *val,
                                          bool upper_half, uint32_t ctr_idx)
 {
     PMUCTRState *counter = &env->pmu_ctrs[ctr_idx];
diff --git a/target/riscv/pmu.c b/target/riscv/pmu.c
index 63420d9f3679..a4729f6c53bb 100644
--- a/target/riscv/pmu.c
+++ b/target/riscv/pmu.c
@@ -425,6 +425,8 @@ static void pmu_timer_trigger_irq(RISCVCPU *cpu,
     target_ulong *mhpmevent_val;
     uint64_t of_bit_mask;
     int64_t irq_trigger_at;
+    uint64_t curr_ctr_val, curr_ctrh_val;
+    uint64_t ctr_val;
 
     if (evt_idx != RISCV_PMU_EVENT_HW_CPU_CYCLES &&
         evt_idx != RISCV_PMU_EVENT_HW_INSTRUCTIONS) {
@@ -454,6 +456,26 @@ static void pmu_timer_trigger_irq(RISCVCPU *cpu,
         return;
     }
 
+    riscv_pmu_read_ctr(env, (target_ulong *)&curr_ctr_val, false, ctr_idx);
+    ctr_val = counter->mhpmcounter_val;
+    if (riscv_cpu_mxl(env) == MXL_RV32) {
+        riscv_pmu_read_ctr(env, (target_ulong *)&curr_ctrh_val, true, ctr_idx);
+        curr_ctr_val = curr_ctr_val | (curr_ctrh_val << 32);
+        ctr_val = ctr_val |
+                ((uint64_t)counter->mhpmcounterh_val << 32);
+    }
+
+    /*
+     * We can not accommodate for inhibited modes when setting up timer. Check
+     * if the counter has actually overflowed or not by comparing current
+     * counter value (accommodated for inhibited modes) with software written
+     * counter value.
+     */
+    if (curr_ctr_val >= ctr_val) {
+        riscv_pmu_setup_timer(env, curr_ctr_val, ctr_idx);
+        return;
+    }
+
     if (cpu->pmu_avail_ctrs & BIT(ctr_idx)) {
         /* Generate interrupt only if OF bit is clear */
         if (!(*mhpmevent_val & of_bit_mask)) {
@@ -475,7 +497,7 @@ void riscv_pmu_timer_cb(void *priv)
 
 int riscv_pmu_setup_timer(CPURISCVState *env, uint64_t value, uint32_t ctr_idx)
 {
-    uint64_t overflow_delta, overflow_at;
+    uint64_t overflow_delta, overflow_at, curr_ns;
     int64_t overflow_ns, overflow_left = 0;
     RISCVCPU *cpu = env_archcpu(env);
     PMUCTRState *counter = &env->pmu_ctrs[ctr_idx];
@@ -506,8 +528,10 @@ int riscv_pmu_setup_timer(CPURISCVState *env, uint64_t value, uint32_t ctr_idx)
     } else {
         return -1;
     }
-    overflow_at = (uint64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-                  overflow_ns;
+    curr_ns = (uint64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    overflow_at =  curr_ns + overflow_ns;
+    if (overflow_at <= curr_ns)
+        overflow_at = UINT64_MAX;
 
     if (overflow_at > INT64_MAX) {
         overflow_left += overflow_at - INT64_MAX;
diff --git a/target/riscv/pmu.h b/target/riscv/pmu.h
index ca40cfeed647..3853d0e2629e 100644
--- a/target/riscv/pmu.h
+++ b/target/riscv/pmu.h
@@ -36,5 +36,7 @@ int riscv_pmu_setup_timer(CPURISCVState *env, uint64_t value,
                           uint32_t ctr_idx);
 void riscv_pmu_update_fixed_ctrs(CPURISCVState *env, target_ulong newpriv,
                                  bool new_virt);
+RISCVException riscv_pmu_read_ctr(CPURISCVState *env, target_ulong *val,
+                                  bool upper_half, uint32_t ctr_idx);
 
 #endif /* RISCV_PMU_H */

-- 
2.34.1
Re: [PATCH v7 10/11] target/riscv: More accurately model priv mode filtering.
Posted by Daniel Henrique Barboza 4 months, 3 weeks ago

On 6/26/24 8:57 PM, Atish Patra wrote:
> From: Rajnesh Kanwal <rkanwal@rivosinc.com>
> 
> In case of programmable counters configured to count inst/cycles
> we often end-up with counter not incrementing at all from kernel's
> perspective.
> 
> For example:
> - Kernel configures hpm3 to count instructions and sets hpmcounter
>    to -10000 and all modes except U mode are inhibited.
> - In QEMU we configure a timer to expire after ~10000 instructions.
> - Problem is, it's often the case that kernel might not even schedule
>    Umode task and we hit the timer callback in QEMU.
> - In the timer callback we inject the interrupt into kernel, kernel
>    runs the handler and reads hpmcounter3 value.
> - Given QEMU maintains individual counters to count for each privilege
>    mode, and given umode never ran, the umode counter didn't increment
>    and QEMU returns same value as was programmed by the kernel when
>    starting the counter.
> - Kernel checks for overflow using previous and current value of the
>    counter and reprograms the counter given there wasn't an overflow
>    as per the counter value. (Which itself is a problem. We have QEMU
>    telling kernel that counter3 overflowed but the counter value
>    returned by QEMU doesn't seem to reflect that.).
> 
> This change makes sure that timer is reprogrammed from the handler
> if the counter didn't overflow based on the counter value.
> 
> Second, this change makes sure that whenever the counter is read,
> it's value is updated to reflect the latest count.
> 
> Signed-off-by: Rajnesh Kanwal <rkanwal@rivosinc.com>
> ---

Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>

>   target/riscv/csr.c |  5 ++++-
>   target/riscv/pmu.c | 30 +++++++++++++++++++++++++++---
>   target/riscv/pmu.h |  2 ++
>   3 files changed, 33 insertions(+), 4 deletions(-)
> 
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 150e02f080ec..91172b90e000 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -970,6 +970,9 @@ static target_ulong riscv_pmu_ctr_get_fixed_counters_val(CPURISCVState *env,
>           goto done;
>       }
>   
> +    /* Update counter before reading. */
> +    riscv_pmu_update_fixed_ctrs(env, env->priv, env->virt_enabled);
> +
>       if (!(cfg_val & MCYCLECFG_BIT_MINH)) {
>           curr_val += counter_arr[PRV_M];
>       }
> @@ -1053,7 +1056,7 @@ static RISCVException write_mhpmcounterh(CPURISCVState *env, int csrno,
>       return RISCV_EXCP_NONE;
>   }
>   
> -static RISCVException riscv_pmu_read_ctr(CPURISCVState *env, target_ulong *val,
> +RISCVException riscv_pmu_read_ctr(CPURISCVState *env, target_ulong *val,
>                                            bool upper_half, uint32_t ctr_idx)
>   {
>       PMUCTRState *counter = &env->pmu_ctrs[ctr_idx];
> diff --git a/target/riscv/pmu.c b/target/riscv/pmu.c
> index 63420d9f3679..a4729f6c53bb 100644
> --- a/target/riscv/pmu.c
> +++ b/target/riscv/pmu.c
> @@ -425,6 +425,8 @@ static void pmu_timer_trigger_irq(RISCVCPU *cpu,
>       target_ulong *mhpmevent_val;
>       uint64_t of_bit_mask;
>       int64_t irq_trigger_at;
> +    uint64_t curr_ctr_val, curr_ctrh_val;
> +    uint64_t ctr_val;
>   
>       if (evt_idx != RISCV_PMU_EVENT_HW_CPU_CYCLES &&
>           evt_idx != RISCV_PMU_EVENT_HW_INSTRUCTIONS) {
> @@ -454,6 +456,26 @@ static void pmu_timer_trigger_irq(RISCVCPU *cpu,
>           return;
>       }
>   
> +    riscv_pmu_read_ctr(env, (target_ulong *)&curr_ctr_val, false, ctr_idx);
> +    ctr_val = counter->mhpmcounter_val;
> +    if (riscv_cpu_mxl(env) == MXL_RV32) {
> +        riscv_pmu_read_ctr(env, (target_ulong *)&curr_ctrh_val, true, ctr_idx);
> +        curr_ctr_val = curr_ctr_val | (curr_ctrh_val << 32);
> +        ctr_val = ctr_val |
> +                ((uint64_t)counter->mhpmcounterh_val << 32);
> +    }
> +
> +    /*
> +     * We can not accommodate for inhibited modes when setting up timer. Check
> +     * if the counter has actually overflowed or not by comparing current
> +     * counter value (accommodated for inhibited modes) with software written
> +     * counter value.
> +     */
> +    if (curr_ctr_val >= ctr_val) {
> +        riscv_pmu_setup_timer(env, curr_ctr_val, ctr_idx);
> +        return;
> +    }
> +
>       if (cpu->pmu_avail_ctrs & BIT(ctr_idx)) {
>           /* Generate interrupt only if OF bit is clear */
>           if (!(*mhpmevent_val & of_bit_mask)) {
> @@ -475,7 +497,7 @@ void riscv_pmu_timer_cb(void *priv)
>   
>   int riscv_pmu_setup_timer(CPURISCVState *env, uint64_t value, uint32_t ctr_idx)
>   {
> -    uint64_t overflow_delta, overflow_at;
> +    uint64_t overflow_delta, overflow_at, curr_ns;
>       int64_t overflow_ns, overflow_left = 0;
>       RISCVCPU *cpu = env_archcpu(env);
>       PMUCTRState *counter = &env->pmu_ctrs[ctr_idx];
> @@ -506,8 +528,10 @@ int riscv_pmu_setup_timer(CPURISCVState *env, uint64_t value, uint32_t ctr_idx)
>       } else {
>           return -1;
>       }
> -    overflow_at = (uint64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> -                  overflow_ns;
> +    curr_ns = (uint64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> +    overflow_at =  curr_ns + overflow_ns;
> +    if (overflow_at <= curr_ns)
> +        overflow_at = UINT64_MAX;
>   
>       if (overflow_at > INT64_MAX) {
>           overflow_left += overflow_at - INT64_MAX;
> diff --git a/target/riscv/pmu.h b/target/riscv/pmu.h
> index ca40cfeed647..3853d0e2629e 100644
> --- a/target/riscv/pmu.h
> +++ b/target/riscv/pmu.h
> @@ -36,5 +36,7 @@ int riscv_pmu_setup_timer(CPURISCVState *env, uint64_t value,
>                             uint32_t ctr_idx);
>   void riscv_pmu_update_fixed_ctrs(CPURISCVState *env, target_ulong newpriv,
>                                    bool new_virt);
> +RISCVException riscv_pmu_read_ctr(CPURISCVState *env, target_ulong *val,
> +                                  bool upper_half, uint32_t ctr_idx);
>   
>   #endif /* RISCV_PMU_H */
>