Uncore counters do not provide mechanisms like interrupts to report
overflows and the accumulated user-visible count is incorrect if there
is more than one overflow between two successive read requests for the
same event because the value of prev_count goes out-of-date for
calculating the correct delta.
To avoid this, start a hrtimer to periodically initiate a pmu->read() of
the active counters for keeping prev_count up-to-date. It should be
noted that the hrtimer duration should be lesser than the shortest time
it takes for a counter to overflow for this approach to be effective.
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
---
arch/x86/events/amd/uncore.c | 72 ++++++++++++++++++++++++++++++++++++
1 file changed, 72 insertions(+)
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 010024f09f2c..ee6528f2189f 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -21,6 +21,7 @@
#define NUM_COUNTERS_NB 4
#define NUM_COUNTERS_L2 4
#define NUM_COUNTERS_L3 6
+#define NUM_COUNTERS_MAX 64
#define RDPMC_BASE_NB 6
#define RDPMC_BASE_LLC 10
@@ -38,6 +39,10 @@ struct amd_uncore_ctx {
int refcnt;
int cpu;
struct perf_event **events;
+ unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
+ int nr_active;
+ struct hrtimer hrtimer;
+ u64 hrtimer_duration;
};
struct amd_uncore_pmu {
@@ -87,6 +92,51 @@ static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
return container_of(event->pmu, struct amd_uncore_pmu, pmu);
}
+static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
+{
+ struct amd_uncore_ctx *ctx;
+ struct perf_event *event;
+ unsigned long flags;
+ int bit;
+
+ ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
+
+ if (!ctx->nr_active || ctx->cpu != smp_processor_id())
+ return HRTIMER_NORESTART;
+
+ /*
+ * Disable local interrupts to prevent pmu->start() or pmu->stop()
+ * from interrupting the update process
+ */
+ local_irq_save(flags);
+
+ for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
+ event = ctx->events[bit];
+ event->pmu->read(event);
+ }
+
+ local_irq_restore(flags);
+
+ hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
+ return HRTIMER_RESTART;
+}
+
+static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
+{
+ hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
+ HRTIMER_MODE_REL_PINNED);
+}
+
+static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx)
+{
+ hrtimer_cancel(&ctx->hrtimer);
+}
+
+static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx)
+{
+ hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+}
+
static void amd_uncore_read(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -117,18 +167,26 @@ static void amd_uncore_read(struct perf_event *event)
static void amd_uncore_start(struct perf_event *event, int flags)
{
+ struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+ struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
struct hw_perf_event *hwc = &event->hw;
+ if (!ctx->nr_active++)
+ amd_uncore_start_hrtimer(ctx);
+
if (flags & PERF_EF_RELOAD)
wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
hwc->state = 0;
+ __set_bit(hwc->idx, ctx->active_mask);
wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
perf_event_update_userpage(event);
}
static void amd_uncore_stop(struct perf_event *event, int flags)
{
+ struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+ struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
struct hw_perf_event *hwc = &event->hw;
wrmsrl(hwc->config_base, hwc->config);
@@ -138,6 +196,11 @@ static void amd_uncore_stop(struct perf_event *event, int flags)
event->pmu->read(event);
hwc->state |= PERF_HES_UPTODATE;
}
+
+ if (!--ctx->nr_active)
+ amd_uncore_cancel_hrtimer(ctx);
+
+ __clear_bit(hwc->idx, ctx->active_mask);
}
static int amd_uncore_add(struct perf_event *event, int flags)
@@ -490,6 +553,9 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
goto fail;
}
+ amd_uncore_init_hrtimer(curr);
+ curr->hrtimer_duration = 60LL * NSEC_PER_SEC;
+
cpumask_set_cpu(cpu, &pmu->active_mask);
}
@@ -879,12 +945,18 @@ static int amd_uncore_umc_event_init(struct perf_event *event)
static void amd_uncore_umc_start(struct perf_event *event, int flags)
{
+ struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+ struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
struct hw_perf_event *hwc = &event->hw;
+ if (!ctx->nr_active++)
+ amd_uncore_start_hrtimer(ctx);
+
if (flags & PERF_EF_RELOAD)
wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
hwc->state = 0;
+ __set_bit(hwc->idx, ctx->active_mask);
wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
perf_event_update_userpage(event);
}
--
2.43.0
On Wed, Apr 09, 2025 at 01:27:07PM +0530, Sandipan Das wrote:
> Uncore counters do not provide mechanisms like interrupts to report
> overflows and the accumulated user-visible count is incorrect if there
> is more than one overflow between two successive read requests for the
> same event because the value of prev_count goes out-of-date for
> calculating the correct delta.
>
> To avoid this, start a hrtimer to periodically initiate a pmu->read() of
> the active counters for keeping prev_count up-to-date. It should be
> noted that the hrtimer duration should be lesser than the shortest time
> it takes for a counter to overflow for this approach to be effective.
>
> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
> ---
> arch/x86/events/amd/uncore.c | 72 ++++++++++++++++++++++++++++++++++++
> 1 file changed, 72 insertions(+)
>
> diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
> index 010024f09f2c..ee6528f2189f 100644
> --- a/arch/x86/events/amd/uncore.c
> +++ b/arch/x86/events/amd/uncore.c
> @@ -21,6 +21,7 @@
> #define NUM_COUNTERS_NB 4
> #define NUM_COUNTERS_L2 4
> #define NUM_COUNTERS_L3 6
> +#define NUM_COUNTERS_MAX 64
>
> #define RDPMC_BASE_NB 6
> #define RDPMC_BASE_LLC 10
> @@ -38,6 +39,10 @@ struct amd_uncore_ctx {
> int refcnt;
> int cpu;
> struct perf_event **events;
> + unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
> + int nr_active;
> + struct hrtimer hrtimer;
> + u64 hrtimer_duration;
> };
>
> struct amd_uncore_pmu {
> @@ -87,6 +92,51 @@ static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
> return container_of(event->pmu, struct amd_uncore_pmu, pmu);
> }
>
> +static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
> +{
> + struct amd_uncore_ctx *ctx;
> + struct perf_event *event;
> + unsigned long flags;
> + int bit;
> +
> + ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
> +
> + if (!ctx->nr_active || ctx->cpu != smp_processor_id())
> + return HRTIMER_NORESTART;
> +
> + /*
> + * Disable local interrupts to prevent pmu->start() or pmu->stop()
> + * from interrupting the update process
> + */
> + local_irq_save(flags);
> +
> + for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
> + event = ctx->events[bit];
> + event->pmu->read(event);
> + }
> +
> + local_irq_restore(flags);
> +
> + hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
> + return HRTIMER_RESTART;
> +}
> +
> +static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
> +{
> + hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
> + HRTIMER_MODE_REL_PINNED);
> +}
So I know you copied this from the Intel uncore driver; but would not
both be improved by using HRTIMER_MODE_HARD?
It makes no sense to me to bounce the thing to SoftIRQ only to then
disable IRQs in the handler again. Not to mention that the whole SoftIRQ
things delays things further, giving more room/time to reach overflow
wrap.
On 4/10/2025 1:50 PM, Peter Zijlstra wrote:
> On Wed, Apr 09, 2025 at 01:27:07PM +0530, Sandipan Das wrote:
>> Uncore counters do not provide mechanisms like interrupts to report
>> overflows and the accumulated user-visible count is incorrect if there
>> is more than one overflow between two successive read requests for the
>> same event because the value of prev_count goes out-of-date for
>> calculating the correct delta.
>>
>> To avoid this, start a hrtimer to periodically initiate a pmu->read() of
>> the active counters for keeping prev_count up-to-date. It should be
>> noted that the hrtimer duration should be lesser than the shortest time
>> it takes for a counter to overflow for this approach to be effective.
>>
>> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
>> ---
>> arch/x86/events/amd/uncore.c | 72 ++++++++++++++++++++++++++++++++++++
>> 1 file changed, 72 insertions(+)
>>
>> diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
>> index 010024f09f2c..ee6528f2189f 100644
>> --- a/arch/x86/events/amd/uncore.c
>> +++ b/arch/x86/events/amd/uncore.c
>> @@ -21,6 +21,7 @@
>> #define NUM_COUNTERS_NB 4
>> #define NUM_COUNTERS_L2 4
>> #define NUM_COUNTERS_L3 6
>> +#define NUM_COUNTERS_MAX 64
>>
>> #define RDPMC_BASE_NB 6
>> #define RDPMC_BASE_LLC 10
>> @@ -38,6 +39,10 @@ struct amd_uncore_ctx {
>> int refcnt;
>> int cpu;
>> struct perf_event **events;
>> + unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
>> + int nr_active;
>> + struct hrtimer hrtimer;
>> + u64 hrtimer_duration;
>> };
>>
>> struct amd_uncore_pmu {
>> @@ -87,6 +92,51 @@ static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
>> return container_of(event->pmu, struct amd_uncore_pmu, pmu);
>> }
>>
>> +static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
>> +{
>> + struct amd_uncore_ctx *ctx;
>> + struct perf_event *event;
>> + unsigned long flags;
>> + int bit;
>> +
>> + ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
>> +
>> + if (!ctx->nr_active || ctx->cpu != smp_processor_id())
>> + return HRTIMER_NORESTART;
>> +
>> + /*
>> + * Disable local interrupts to prevent pmu->start() or pmu->stop()
>> + * from interrupting the update process
>> + */
>> + local_irq_save(flags);
>> +
>> + for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
>> + event = ctx->events[bit];
>> + event->pmu->read(event);
>> + }
>> +
>> + local_irq_restore(flags);
>> +
>> + hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
>> + return HRTIMER_RESTART;
>> +}
>> +
>> +static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
>> +{
>> + hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
>> + HRTIMER_MODE_REL_PINNED);
>> +}
>
> So I know you copied this from the Intel uncore driver; but would not
> both be improved by using HRTIMER_MODE_HARD?
>
> It makes no sense to me to bounce the thing to SoftIRQ only to then
> disable IRQs in the handler again. Not to mention that the whole SoftIRQ
> things delays things further, giving more room/time to reach overflow
> wrap.
Agreed. Will address this in v2 for both the drivers.
© 2016 - 2026 Red Hat, Inc.