[PATCH v2 3/5] perf/x86/amd/uncore: Use hrtimer for handling overflows

Sandipan Das posted 5 patches 8 months ago
[PATCH v2 3/5] perf/x86/amd/uncore: Use hrtimer for handling overflows
Posted by Sandipan Das 8 months ago
Uncore counters do not provide mechanisms like interrupts to report
overflows and the accumulated user-visible count is incorrect if there
is more than one overflow between two successive read requests for the
same event because the value of prev_count goes out-of-date for
calculating the correct delta.

To avoid this, start a hrtimer to periodically initiate a pmu->read() of
the active counters for keeping prev_count up-to-date. It should be
noted that the hrtimer duration should be lesser than the shortest time
it takes for a counter to overflow for this approach to be effective.

Signed-off-by: Sandipan Das <sandipan.das@amd.com>
---
 arch/x86/events/amd/uncore.c | 63 ++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 010024f09f2c..e09bfbb4a4cd 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -21,6 +21,7 @@
 #define NUM_COUNTERS_NB		4
 #define NUM_COUNTERS_L2		4
 #define NUM_COUNTERS_L3		6
+#define NUM_COUNTERS_MAX	64
 
 #define RDPMC_BASE_NB		6
 #define RDPMC_BASE_LLC		10
@@ -38,6 +39,10 @@ struct amd_uncore_ctx {
 	int refcnt;
 	int cpu;
 	struct perf_event **events;
+	unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
+	int nr_active;
+	struct hrtimer hrtimer;
+	u64 hrtimer_duration;
 };
 
 struct amd_uncore_pmu {
@@ -87,6 +92,42 @@ static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
 	return container_of(event->pmu, struct amd_uncore_pmu, pmu);
 }
 
+static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
+{
+	struct amd_uncore_ctx *ctx;
+	struct perf_event *event;
+	int bit;
+
+	ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
+
+	if (!ctx->nr_active || ctx->cpu != smp_processor_id())
+		return HRTIMER_NORESTART;
+
+	for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
+		event = ctx->events[bit];
+		event->pmu->read(event);
+	}
+
+	hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
+	return HRTIMER_RESTART;
+}
+
+static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
+{
+	hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
+		      HRTIMER_MODE_REL_PINNED_HARD);
+}
+
+static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx)
+{
+	hrtimer_cancel(&ctx->hrtimer);
+}
+
+static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx)
+{
+	hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
+}
+
 static void amd_uncore_read(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -117,18 +158,26 @@ static void amd_uncore_read(struct perf_event *event)
 
 static void amd_uncore_start(struct perf_event *event, int flags)
 {
+	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
 	struct hw_perf_event *hwc = &event->hw;
 
+	if (!ctx->nr_active++)
+		amd_uncore_start_hrtimer(ctx);
+
 	if (flags & PERF_EF_RELOAD)
 		wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
 
 	hwc->state = 0;
+	__set_bit(hwc->idx, ctx->active_mask);
 	wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
 	perf_event_update_userpage(event);
 }
 
 static void amd_uncore_stop(struct perf_event *event, int flags)
 {
+	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
 	struct hw_perf_event *hwc = &event->hw;
 
 	wrmsrl(hwc->config_base, hwc->config);
@@ -138,6 +187,11 @@ static void amd_uncore_stop(struct perf_event *event, int flags)
 		event->pmu->read(event);
 		hwc->state |= PERF_HES_UPTODATE;
 	}
+
+	if (!--ctx->nr_active)
+		amd_uncore_cancel_hrtimer(ctx);
+
+	__clear_bit(hwc->idx, ctx->active_mask);
 }
 
 static int amd_uncore_add(struct perf_event *event, int flags)
@@ -490,6 +544,9 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 				goto fail;
 			}
 
+			amd_uncore_init_hrtimer(curr);
+			curr->hrtimer_duration = 60LL * NSEC_PER_SEC;
+
 			cpumask_set_cpu(cpu, &pmu->active_mask);
 		}
 
@@ -879,12 +936,18 @@ static int amd_uncore_umc_event_init(struct perf_event *event)
 
 static void amd_uncore_umc_start(struct perf_event *event, int flags)
 {
+	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
 	struct hw_perf_event *hwc = &event->hw;
 
+	if (!ctx->nr_active++)
+		amd_uncore_start_hrtimer(ctx);
+
 	if (flags & PERF_EF_RELOAD)
 		wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
 
 	hwc->state = 0;
+	__set_bit(hwc->idx, ctx->active_mask);
 	wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
 	perf_event_update_userpage(event);
 }
-- 
2.43.0
Re: [PATCH v2 3/5] perf/x86/amd/uncore: Use hrtimer for handling overflows
Posted by Stephane Eranian 8 months ago
On Thu, Apr 17, 2025 at 8:44 PM Sandipan Das <sandipan.das@amd.com> wrote:
>
> Uncore counters do not provide mechanisms like interrupts to report
> overflows and the accumulated user-visible count is incorrect if there
> is more than one overflow between two successive read requests for the
> same event because the value of prev_count goes out-of-date for
> calculating the correct delta.
>
> To avoid this, start a hrtimer to periodically initiate a pmu->read() of
> the active counters for keeping prev_count up-to-date. It should be
> noted that the hrtimer duration should be lesser than the shortest time
> it takes for a counter to overflow for this approach to be effective.
>
The problem I see is that the number of uncore PMU varies a lot based
on the CPU model, in particular due to the L3 PMU.
Is there a timer armed per CCX or only a global one that will generate
IPI to all other CPUs?

> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
> ---
>  arch/x86/events/amd/uncore.c | 63 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 63 insertions(+)
>
> diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
> index 010024f09f2c..e09bfbb4a4cd 100644
> --- a/arch/x86/events/amd/uncore.c
> +++ b/arch/x86/events/amd/uncore.c
> @@ -21,6 +21,7 @@
>  #define NUM_COUNTERS_NB                4
>  #define NUM_COUNTERS_L2                4
>  #define NUM_COUNTERS_L3                6
> +#define NUM_COUNTERS_MAX       64
>
>  #define RDPMC_BASE_NB          6
>  #define RDPMC_BASE_LLC         10
> @@ -38,6 +39,10 @@ struct amd_uncore_ctx {
>         int refcnt;
>         int cpu;
>         struct perf_event **events;
> +       unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
> +       int nr_active;
> +       struct hrtimer hrtimer;
> +       u64 hrtimer_duration;
>  };
>
>  struct amd_uncore_pmu {
> @@ -87,6 +92,42 @@ static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
>         return container_of(event->pmu, struct amd_uncore_pmu, pmu);
>  }
>
> +static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
> +{
> +       struct amd_uncore_ctx *ctx;
> +       struct perf_event *event;
> +       int bit;
> +
> +       ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
> +
> +       if (!ctx->nr_active || ctx->cpu != smp_processor_id())
> +               return HRTIMER_NORESTART;
> +
> +       for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
> +               event = ctx->events[bit];
> +               event->pmu->read(event);
> +       }
> +
> +       hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
> +       return HRTIMER_RESTART;
> +}
> +
> +static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
> +{
> +       hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
> +                     HRTIMER_MODE_REL_PINNED_HARD);
> +}
> +
> +static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx)
> +{
> +       hrtimer_cancel(&ctx->hrtimer);
> +}
> +
> +static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx)
> +{
> +       hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
> +}
> +
>  static void amd_uncore_read(struct perf_event *event)
>  {
>         struct hw_perf_event *hwc = &event->hw;
> @@ -117,18 +158,26 @@ static void amd_uncore_read(struct perf_event *event)
>
>  static void amd_uncore_start(struct perf_event *event, int flags)
>  {
> +       struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
> +       struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
>         struct hw_perf_event *hwc = &event->hw;
>
> +       if (!ctx->nr_active++)
> +               amd_uncore_start_hrtimer(ctx);
> +
>         if (flags & PERF_EF_RELOAD)
>                 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
>
>         hwc->state = 0;
> +       __set_bit(hwc->idx, ctx->active_mask);
>         wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
>         perf_event_update_userpage(event);
>  }
>
>  static void amd_uncore_stop(struct perf_event *event, int flags)
>  {
> +       struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
> +       struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
>         struct hw_perf_event *hwc = &event->hw;
>
>         wrmsrl(hwc->config_base, hwc->config);
> @@ -138,6 +187,11 @@ static void amd_uncore_stop(struct perf_event *event, int flags)
>                 event->pmu->read(event);
>                 hwc->state |= PERF_HES_UPTODATE;
>         }
> +
> +       if (!--ctx->nr_active)
> +               amd_uncore_cancel_hrtimer(ctx);
> +
> +       __clear_bit(hwc->idx, ctx->active_mask);
>  }
>
>  static int amd_uncore_add(struct perf_event *event, int flags)
> @@ -490,6 +544,9 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
>                                 goto fail;
>                         }
>
> +                       amd_uncore_init_hrtimer(curr);
> +                       curr->hrtimer_duration = 60LL * NSEC_PER_SEC;
> +
>                         cpumask_set_cpu(cpu, &pmu->active_mask);
>                 }
>
> @@ -879,12 +936,18 @@ static int amd_uncore_umc_event_init(struct perf_event *event)
>
>  static void amd_uncore_umc_start(struct perf_event *event, int flags)
>  {
> +       struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
> +       struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
>         struct hw_perf_event *hwc = &event->hw;
>
> +       if (!ctx->nr_active++)
> +               amd_uncore_start_hrtimer(ctx);
> +
>         if (flags & PERF_EF_RELOAD)
>                 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
>
>         hwc->state = 0;
> +       __set_bit(hwc->idx, ctx->active_mask);
>         wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
>         perf_event_update_userpage(event);
>  }
> --
> 2.43.0
>
Re: [PATCH v2 3/5] perf/x86/amd/uncore: Use hrtimer for handling overflows
Posted by Sandipan Das 8 months ago
On 4/18/2025 10:08 AM, Stephane Eranian wrote:
> On Thu, Apr 17, 2025 at 8:44 PM Sandipan Das <sandipan.das@amd.com> wrote:
>>
>> Uncore counters do not provide mechanisms like interrupts to report
>> overflows and the accumulated user-visible count is incorrect if there
>> is more than one overflow between two successive read requests for the
>> same event because the value of prev_count goes out-of-date for
>> calculating the correct delta.
>>
>> To avoid this, start a hrtimer to periodically initiate a pmu->read() of
>> the active counters for keeping prev_count up-to-date. It should be
>> noted that the hrtimer duration should be lesser than the shortest time
>> it takes for a counter to overflow for this approach to be effective.
>>
> The problem I see is that the number of uncore PMU varies a lot based
> on the CPU model, in particular due to the L3 PMU.
> Is there a timer armed per CCX or only a global one that will generate
> IPI to all other CPUs?
> 

For L3 PMU, its on a per-CCX basis.

>> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
>> ---
>>  arch/x86/events/amd/uncore.c | 63 ++++++++++++++++++++++++++++++++++++
>>  1 file changed, 63 insertions(+)
>>
>> diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
>> index 010024f09f2c..e09bfbb4a4cd 100644
>> --- a/arch/x86/events/amd/uncore.c
>> +++ b/arch/x86/events/amd/uncore.c
>> @@ -21,6 +21,7 @@
>>  #define NUM_COUNTERS_NB                4
>>  #define NUM_COUNTERS_L2                4
>>  #define NUM_COUNTERS_L3                6
>> +#define NUM_COUNTERS_MAX       64
>>
>>  #define RDPMC_BASE_NB          6
>>  #define RDPMC_BASE_LLC         10
>> @@ -38,6 +39,10 @@ struct amd_uncore_ctx {
>>         int refcnt;
>>         int cpu;
>>         struct perf_event **events;
>> +       unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
>> +       int nr_active;
>> +       struct hrtimer hrtimer;
>> +       u64 hrtimer_duration;
>>  };
>>
>>  struct amd_uncore_pmu {
>> @@ -87,6 +92,42 @@ static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
>>         return container_of(event->pmu, struct amd_uncore_pmu, pmu);
>>  }
>>
>> +static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
>> +{
>> +       struct amd_uncore_ctx *ctx;
>> +       struct perf_event *event;
>> +       int bit;
>> +
>> +       ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
>> +
>> +       if (!ctx->nr_active || ctx->cpu != smp_processor_id())
>> +               return HRTIMER_NORESTART;
>> +
>> +       for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
>> +               event = ctx->events[bit];
>> +               event->pmu->read(event);
>> +       }
>> +
>> +       hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
>> +       return HRTIMER_RESTART;
>> +}
>> +
>> +static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
>> +{
>> +       hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
>> +                     HRTIMER_MODE_REL_PINNED_HARD);
>> +}
>> +
>> +static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx)
>> +{
>> +       hrtimer_cancel(&ctx->hrtimer);
>> +}
>> +
>> +static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx)
>> +{
>> +       hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
>> +}
>> +
>>  static void amd_uncore_read(struct perf_event *event)
>>  {
>>         struct hw_perf_event *hwc = &event->hw;
>> @@ -117,18 +158,26 @@ static void amd_uncore_read(struct perf_event *event)
>>
>>  static void amd_uncore_start(struct perf_event *event, int flags)
>>  {
>> +       struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
>> +       struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
>>         struct hw_perf_event *hwc = &event->hw;
>>
>> +       if (!ctx->nr_active++)
>> +               amd_uncore_start_hrtimer(ctx);
>> +
>>         if (flags & PERF_EF_RELOAD)
>>                 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
>>
>>         hwc->state = 0;
>> +       __set_bit(hwc->idx, ctx->active_mask);
>>         wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
>>         perf_event_update_userpage(event);
>>  }
>>
>>  static void amd_uncore_stop(struct perf_event *event, int flags)
>>  {
>> +       struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
>> +       struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
>>         struct hw_perf_event *hwc = &event->hw;
>>
>>         wrmsrl(hwc->config_base, hwc->config);
>> @@ -138,6 +187,11 @@ static void amd_uncore_stop(struct perf_event *event, int flags)
>>                 event->pmu->read(event);
>>                 hwc->state |= PERF_HES_UPTODATE;
>>         }
>> +
>> +       if (!--ctx->nr_active)
>> +               amd_uncore_cancel_hrtimer(ctx);
>> +
>> +       __clear_bit(hwc->idx, ctx->active_mask);
>>  }
>>
>>  static int amd_uncore_add(struct perf_event *event, int flags)
>> @@ -490,6 +544,9 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
>>                                 goto fail;
>>                         }
>>
>> +                       amd_uncore_init_hrtimer(curr);
>> +                       curr->hrtimer_duration = 60LL * NSEC_PER_SEC;
>> +
>>                         cpumask_set_cpu(cpu, &pmu->active_mask);
>>                 }
>>
>> @@ -879,12 +936,18 @@ static int amd_uncore_umc_event_init(struct perf_event *event)
>>
>>  static void amd_uncore_umc_start(struct perf_event *event, int flags)
>>  {
>> +       struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
>> +       struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
>>         struct hw_perf_event *hwc = &event->hw;
>>
>> +       if (!ctx->nr_active++)
>> +               amd_uncore_start_hrtimer(ctx);
>> +
>>         if (flags & PERF_EF_RELOAD)
>>                 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
>>
>>         hwc->state = 0;
>> +       __set_bit(hwc->idx, ctx->active_mask);
>>         wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
>>         perf_event_update_userpage(event);
>>  }
>> --
>> 2.43.0
>>