Provide all the necessary bits in the generic riscv pmu driver to be
able to mmap perf events in userspace: the heavy lifting lies in the
driver backend, namely the legacy and sbi implementations.
Note that arch_perf_update_userpage is almost a copy of arm64 code.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/kernel/Makefile | 2 +-
arch/riscv/kernel/perf_event.c | 58 ++++++++++++++++++++++++++++++++++
drivers/perf/riscv_pmu.c | 41 ++++++++++++++++++++++++
include/linux/perf/riscv_pmu.h | 4 +++
4 files changed, 104 insertions(+), 1 deletion(-)
create mode 100644 arch/riscv/kernel/perf_event.c
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 4cf303a779ab..0d215fd9860d 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -70,7 +70,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o
-obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
+obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o perf_event.o
obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
obj-$(CONFIG_RISCV_SBI) += sbi.o
ifeq ($(CONFIG_RISCV_SBI), y)
diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
new file mode 100644
index 000000000000..94174a0fc251
--- /dev/null
+++ b/arch/riscv/kernel/perf_event.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/sched_clock.h>
+
+void arch_perf_update_userpage(struct perf_event *event,
+ struct perf_event_mmap_page *userpg, u64 now)
+{
+ struct clock_read_data *rd;
+ unsigned int seq;
+ u64 ns;
+
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_time_short = 0;
+ userpg->cap_user_rdpmc =
+ !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT);
+
+ userpg->pmc_width = 64;
+
+ do {
+ rd = sched_clock_read_begin(&seq);
+
+ userpg->time_mult = rd->mult;
+ userpg->time_shift = rd->shift;
+ userpg->time_zero = rd->epoch_ns;
+ userpg->time_cycles = rd->epoch_cyc;
+ userpg->time_mask = rd->sched_clock_mask;
+
+ /*
+ * Subtract the cycle base, such that software that
+ * doesn't know about cap_user_time_short still 'works'
+ * assuming no wraps.
+ */
+ ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+ userpg->time_zero -= ns;
+
+ } while (sched_clock_read_retry(seq));
+
+ userpg->time_offset = userpg->time_zero - now;
+
+ /*
+ * time_shift is not expected to be greater than 31 due to
+ * the original published conversion algorithm shifting a
+ * 32-bit value (now specifies a 64-bit value) - refer
+ * perf_event_mmap_page documentation in perf_event.h.
+ */
+ if (userpg->time_shift == 32) {
+ userpg->time_shift = 31;
+ userpg->time_mult >>= 1;
+ }
+
+ /*
+ * Internal timekeeping for enabled/running/stopped times
+ * is always computed with the sched_clock.
+ */
+ userpg->cap_user_time = 1;
+ userpg->cap_user_time_zero = 1;
+ userpg->cap_user_time_short = 1;
+}
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index ebca5eab9c9b..af69da268246 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -171,6 +171,8 @@ int riscv_pmu_event_set_period(struct perf_event *event)
local64_set(&hwc->prev_count, (u64)-left);
+ perf_event_update_userpage(event);
+
return overflow;
}
@@ -267,6 +269,9 @@ static int riscv_pmu_event_init(struct perf_event *event)
hwc->idx = -1;
hwc->event_base = mapped_event;
+ if (rvpmu->event_init)
+ rvpmu->event_init(event);
+
if (!is_sampling_event(event)) {
/*
* For non-sampling runs, limit the sample_period to half
@@ -283,6 +288,39 @@ static int riscv_pmu_event_init(struct perf_event *event)
return 0;
}
+static int riscv_pmu_event_idx(struct perf_event *event)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
+ return 0;
+
+ if (rvpmu->csr_index)
+ return rvpmu->csr_index(event) + 1;
+
+ return 0;
+}
+
+static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ if (rvpmu->event_mapped) {
+ rvpmu->event_mapped(event, mm);
+ perf_event_update_userpage(event);
+ }
+}
+
+static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ if (rvpmu->event_unmapped) {
+ rvpmu->event_unmapped(event, mm);
+ perf_event_update_userpage(event);
+ }
+}
+
struct riscv_pmu *riscv_pmu_alloc(void)
{
struct riscv_pmu *pmu;
@@ -307,6 +345,9 @@ struct riscv_pmu *riscv_pmu_alloc(void)
}
pmu->pmu = (struct pmu) {
.event_init = riscv_pmu_event_init,
+ .event_mapped = riscv_pmu_event_mapped,
+ .event_unmapped = riscv_pmu_event_unmapped,
+ .event_idx = riscv_pmu_event_idx,
.add = riscv_pmu_add,
.del = riscv_pmu_del,
.start = riscv_pmu_start,
diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
index 9f70d94942e0..1452c8af3b67 100644
--- a/include/linux/perf/riscv_pmu.h
+++ b/include/linux/perf/riscv_pmu.h
@@ -55,6 +55,10 @@ struct riscv_pmu {
void (*ctr_start)(struct perf_event *event, u64 init_val);
void (*ctr_stop)(struct perf_event *event, unsigned long flag);
int (*event_map)(struct perf_event *event, u64 *config);
+ void (*event_init)(struct perf_event *event);
+ void (*event_mapped)(struct perf_event *event, struct mm_struct *mm);
+ void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm);
+ uint8_t (*csr_index)(struct perf_event *event);
struct cpu_hw_events __percpu *hw_events;
struct hlist_node node;
--
2.37.2
On Fri, May 12, 2023 at 1:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> Provide all the necessary bits in the generic riscv pmu driver to be
> able to mmap perf events in userspace: the heavy lifting lies in the
> driver backend, namely the legacy and sbi implementations.
>
> Note that arch_perf_update_userpage is almost a copy of arm64 code.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/kernel/Makefile | 2 +-
> arch/riscv/kernel/perf_event.c | 58 ++++++++++++++++++++++++++++++++++
> drivers/perf/riscv_pmu.c | 41 ++++++++++++++++++++++++
> include/linux/perf/riscv_pmu.h | 4 +++
> 4 files changed, 104 insertions(+), 1 deletion(-)
> create mode 100644 arch/riscv/kernel/perf_event.c
>
> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> index 4cf303a779ab..0d215fd9860d 100644
> --- a/arch/riscv/kernel/Makefile
> +++ b/arch/riscv/kernel/Makefile
> @@ -70,7 +70,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
>
> obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o
>
> -obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
> +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o perf_event.o
> obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
> obj-$(CONFIG_RISCV_SBI) += sbi.o
> ifeq ($(CONFIG_RISCV_SBI), y)
> diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
> new file mode 100644
> index 000000000000..94174a0fc251
> --- /dev/null
> +++ b/arch/riscv/kernel/perf_event.c
> @@ -0,0 +1,58 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +#include <linux/sched_clock.h>
> +
> +void arch_perf_update_userpage(struct perf_event *event,
> + struct perf_event_mmap_page *userpg, u64 now)
> +{
> + struct clock_read_data *rd;
> + unsigned int seq;
> + u64 ns;
> +
> + userpg->cap_user_time = 0;
> + userpg->cap_user_time_zero = 0;
> + userpg->cap_user_time_short = 0;
> + userpg->cap_user_rdpmc =
> + !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT);
> +
> + userpg->pmc_width = 64;
> +
The counter width is 64 for cycle & instret. Other hpmcounter can have
different width.
This information should retrieved from counter info.
> + do {
> + rd = sched_clock_read_begin(&seq);
> +
> + userpg->time_mult = rd->mult;
> + userpg->time_shift = rd->shift;
> + userpg->time_zero = rd->epoch_ns;
> + userpg->time_cycles = rd->epoch_cyc;
> + userpg->time_mask = rd->sched_clock_mask;
> +
> + /*
> + * Subtract the cycle base, such that software that
> + * doesn't know about cap_user_time_short still 'works'
> + * assuming no wraps.
> + */
> + ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
> + userpg->time_zero -= ns;
> +
> + } while (sched_clock_read_retry(seq));
> +
> + userpg->time_offset = userpg->time_zero - now;
> +
> + /*
> + * time_shift is not expected to be greater than 31 due to
> + * the original published conversion algorithm shifting a
> + * 32-bit value (now specifies a 64-bit value) - refer
> + * perf_event_mmap_page documentation in perf_event.h.
> + */
> + if (userpg->time_shift == 32) {
> + userpg->time_shift = 31;
> + userpg->time_mult >>= 1;
> + }
> +
> + /*
> + * Internal timekeeping for enabled/running/stopped times
> + * is always computed with the sched_clock.
> + */
> + userpg->cap_user_time = 1;
> + userpg->cap_user_time_zero = 1;
> + userpg->cap_user_time_short = 1;
> +}
> diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
> index ebca5eab9c9b..af69da268246 100644
> --- a/drivers/perf/riscv_pmu.c
> +++ b/drivers/perf/riscv_pmu.c
> @@ -171,6 +171,8 @@ int riscv_pmu_event_set_period(struct perf_event *event)
>
> local64_set(&hwc->prev_count, (u64)-left);
>
> + perf_event_update_userpage(event);
> +
> return overflow;
> }
>
> @@ -267,6 +269,9 @@ static int riscv_pmu_event_init(struct perf_event *event)
> hwc->idx = -1;
> hwc->event_base = mapped_event;
>
> + if (rvpmu->event_init)
> + rvpmu->event_init(event);
> +
> if (!is_sampling_event(event)) {
> /*
> * For non-sampling runs, limit the sample_period to half
> @@ -283,6 +288,39 @@ static int riscv_pmu_event_init(struct perf_event *event)
> return 0;
> }
>
> +static int riscv_pmu_event_idx(struct perf_event *event)
> +{
> + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +
> + if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
> + return 0;
> +
> + if (rvpmu->csr_index)
> + return rvpmu->csr_index(event) + 1;
> +
> + return 0;
> +}
> +
> +static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
> +{
> + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +
> + if (rvpmu->event_mapped) {
> + rvpmu->event_mapped(event, mm);
> + perf_event_update_userpage(event);
> + }
> +}
> +
> +static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
> +{
> + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +
> + if (rvpmu->event_unmapped) {
> + rvpmu->event_unmapped(event, mm);
> + perf_event_update_userpage(event);
> + }
> +}
> +
> struct riscv_pmu *riscv_pmu_alloc(void)
> {
> struct riscv_pmu *pmu;
> @@ -307,6 +345,9 @@ struct riscv_pmu *riscv_pmu_alloc(void)
> }
> pmu->pmu = (struct pmu) {
> .event_init = riscv_pmu_event_init,
> + .event_mapped = riscv_pmu_event_mapped,
> + .event_unmapped = riscv_pmu_event_unmapped,
> + .event_idx = riscv_pmu_event_idx,
> .add = riscv_pmu_add,
> .del = riscv_pmu_del,
> .start = riscv_pmu_start,
> diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
> index 9f70d94942e0..1452c8af3b67 100644
> --- a/include/linux/perf/riscv_pmu.h
> +++ b/include/linux/perf/riscv_pmu.h
> @@ -55,6 +55,10 @@ struct riscv_pmu {
> void (*ctr_start)(struct perf_event *event, u64 init_val);
> void (*ctr_stop)(struct perf_event *event, unsigned long flag);
> int (*event_map)(struct perf_event *event, u64 *config);
> + void (*event_init)(struct perf_event *event);
> + void (*event_mapped)(struct perf_event *event, struct mm_struct *mm);
> + void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm);
> + uint8_t (*csr_index)(struct perf_event *event);
>
> struct cpu_hw_events __percpu *hw_events;
> struct hlist_node node;
> --
> 2.37.2
>
--
Regards,
Atish
On Fri, Jun 16, 2023 at 10:28 AM Atish Patra <atishp@atishpatra.org> wrote:
>
> On Fri, May 12, 2023 at 1:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >
> > Provide all the necessary bits in the generic riscv pmu driver to be
> > able to mmap perf events in userspace: the heavy lifting lies in the
> > driver backend, namely the legacy and sbi implementations.
> >
> > Note that arch_perf_update_userpage is almost a copy of arm64 code.
> >
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> > arch/riscv/kernel/Makefile | 2 +-
> > arch/riscv/kernel/perf_event.c | 58 ++++++++++++++++++++++++++++++++++
> > drivers/perf/riscv_pmu.c | 41 ++++++++++++++++++++++++
> > include/linux/perf/riscv_pmu.h | 4 +++
> > 4 files changed, 104 insertions(+), 1 deletion(-)
> > create mode 100644 arch/riscv/kernel/perf_event.c
> >
> > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> > index 4cf303a779ab..0d215fd9860d 100644
> > --- a/arch/riscv/kernel/Makefile
> > +++ b/arch/riscv/kernel/Makefile
> > @@ -70,7 +70,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
> >
> > obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o
> >
> > -obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
> > +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o perf_event.o
> > obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
> > obj-$(CONFIG_RISCV_SBI) += sbi.o
> > ifeq ($(CONFIG_RISCV_SBI), y)
> > diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
> > new file mode 100644
> > index 000000000000..94174a0fc251
> > --- /dev/null
> > +++ b/arch/riscv/kernel/perf_event.c
> > @@ -0,0 +1,58 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +#include <linux/sched_clock.h>
> > +
> > +void arch_perf_update_userpage(struct perf_event *event,
> > + struct perf_event_mmap_page *userpg, u64 now)
> > +{
> > + struct clock_read_data *rd;
> > + unsigned int seq;
> > + u64 ns;
> > +
> > + userpg->cap_user_time = 0;
> > + userpg->cap_user_time_zero = 0;
> > + userpg->cap_user_time_short = 0;
> > + userpg->cap_user_rdpmc =
> > + !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT);
> > +
> > + userpg->pmc_width = 64;
> > +
>
> The counter width is 64 for cycle & instret. Other hpmcounter can have
> different width.
> This information should retrieved from counter info.
Yes, this is done in patch 7 when I adapt the perf SBI backend to
allow the user access.
>
> > + do {
> > + rd = sched_clock_read_begin(&seq);
> > +
> > + userpg->time_mult = rd->mult;
> > + userpg->time_shift = rd->shift;
> > + userpg->time_zero = rd->epoch_ns;
> > + userpg->time_cycles = rd->epoch_cyc;
> > + userpg->time_mask = rd->sched_clock_mask;
> > +
> > + /*
> > + * Subtract the cycle base, such that software that
> > + * doesn't know about cap_user_time_short still 'works'
> > + * assuming no wraps.
> > + */
> > + ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
> > + userpg->time_zero -= ns;
> > +
> > + } while (sched_clock_read_retry(seq));
> > +
> > + userpg->time_offset = userpg->time_zero - now;
> > +
> > + /*
> > + * time_shift is not expected to be greater than 31 due to
> > + * the original published conversion algorithm shifting a
> > + * 32-bit value (now specifies a 64-bit value) - refer
> > + * perf_event_mmap_page documentation in perf_event.h.
> > + */
> > + if (userpg->time_shift == 32) {
> > + userpg->time_shift = 31;
> > + userpg->time_mult >>= 1;
> > + }
> > +
> > + /*
> > + * Internal timekeeping for enabled/running/stopped times
> > + * is always computed with the sched_clock.
> > + */
> > + userpg->cap_user_time = 1;
> > + userpg->cap_user_time_zero = 1;
> > + userpg->cap_user_time_short = 1;
> > +}
> > diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
> > index ebca5eab9c9b..af69da268246 100644
> > --- a/drivers/perf/riscv_pmu.c
> > +++ b/drivers/perf/riscv_pmu.c
> > @@ -171,6 +171,8 @@ int riscv_pmu_event_set_period(struct perf_event *event)
> >
> > local64_set(&hwc->prev_count, (u64)-left);
> >
> > + perf_event_update_userpage(event);
> > +
> > return overflow;
> > }
> >
> > @@ -267,6 +269,9 @@ static int riscv_pmu_event_init(struct perf_event *event)
> > hwc->idx = -1;
> > hwc->event_base = mapped_event;
> >
> > + if (rvpmu->event_init)
> > + rvpmu->event_init(event);
> > +
> > if (!is_sampling_event(event)) {
> > /*
> > * For non-sampling runs, limit the sample_period to half
> > @@ -283,6 +288,39 @@ static int riscv_pmu_event_init(struct perf_event *event)
> > return 0;
> > }
> >
> > +static int riscv_pmu_event_idx(struct perf_event *event)
> > +{
> > + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> > +
> > + if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
> > + return 0;
> > +
> > + if (rvpmu->csr_index)
> > + return rvpmu->csr_index(event) + 1;
> > +
> > + return 0;
> > +}
> > +
> > +static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
> > +{
> > + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> > +
> > + if (rvpmu->event_mapped) {
> > + rvpmu->event_mapped(event, mm);
> > + perf_event_update_userpage(event);
> > + }
> > +}
> > +
> > +static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
> > +{
> > + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> > +
> > + if (rvpmu->event_unmapped) {
> > + rvpmu->event_unmapped(event, mm);
> > + perf_event_update_userpage(event);
> > + }
> > +}
> > +
> > struct riscv_pmu *riscv_pmu_alloc(void)
> > {
> > struct riscv_pmu *pmu;
> > @@ -307,6 +345,9 @@ struct riscv_pmu *riscv_pmu_alloc(void)
> > }
> > pmu->pmu = (struct pmu) {
> > .event_init = riscv_pmu_event_init,
> > + .event_mapped = riscv_pmu_event_mapped,
> > + .event_unmapped = riscv_pmu_event_unmapped,
> > + .event_idx = riscv_pmu_event_idx,
> > .add = riscv_pmu_add,
> > .del = riscv_pmu_del,
> > .start = riscv_pmu_start,
> > diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
> > index 9f70d94942e0..1452c8af3b67 100644
> > --- a/include/linux/perf/riscv_pmu.h
> > +++ b/include/linux/perf/riscv_pmu.h
> > @@ -55,6 +55,10 @@ struct riscv_pmu {
> > void (*ctr_start)(struct perf_event *event, u64 init_val);
> > void (*ctr_stop)(struct perf_event *event, unsigned long flag);
> > int (*event_map)(struct perf_event *event, u64 *config);
> > + void (*event_init)(struct perf_event *event);
> > + void (*event_mapped)(struct perf_event *event, struct mm_struct *mm);
> > + void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm);
> > + uint8_t (*csr_index)(struct perf_event *event);
> >
> > struct cpu_hw_events __percpu *hw_events;
> > struct hlist_node node;
> > --
> > 2.37.2
> >
>
>
> --
> Regards,
> Atish
On Fri, Jun 16, 2023 at 1:57 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> On Fri, Jun 16, 2023 at 10:28 AM Atish Patra <atishp@atishpatra.org> wrote:
> >
> > On Fri, May 12, 2023 at 1:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > >
> > > Provide all the necessary bits in the generic riscv pmu driver to be
> > > able to mmap perf events in userspace: the heavy lifting lies in the
> > > driver backend, namely the legacy and sbi implementations.
> > >
> > > Note that arch_perf_update_userpage is almost a copy of arm64 code.
> > >
> > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > ---
> > > arch/riscv/kernel/Makefile | 2 +-
> > > arch/riscv/kernel/perf_event.c | 58 ++++++++++++++++++++++++++++++++++
> > > drivers/perf/riscv_pmu.c | 41 ++++++++++++++++++++++++
> > > include/linux/perf/riscv_pmu.h | 4 +++
> > > 4 files changed, 104 insertions(+), 1 deletion(-)
> > > create mode 100644 arch/riscv/kernel/perf_event.c
> > >
> > > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> > > index 4cf303a779ab..0d215fd9860d 100644
> > > --- a/arch/riscv/kernel/Makefile
> > > +++ b/arch/riscv/kernel/Makefile
> > > @@ -70,7 +70,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
> > >
> > > obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o
> > >
> > > -obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
> > > +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o perf_event.o
> > > obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
> > > obj-$(CONFIG_RISCV_SBI) += sbi.o
> > > ifeq ($(CONFIG_RISCV_SBI), y)
> > > diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
> > > new file mode 100644
> > > index 000000000000..94174a0fc251
> > > --- /dev/null
> > > +++ b/arch/riscv/kernel/perf_event.c
> > > @@ -0,0 +1,58 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +#include <linux/sched_clock.h>
> > > +
> > > +void arch_perf_update_userpage(struct perf_event *event,
> > > + struct perf_event_mmap_page *userpg, u64 now)
> > > +{
> > > + struct clock_read_data *rd;
> > > + unsigned int seq;
> > > + u64 ns;
> > > +
> > > + userpg->cap_user_time = 0;
> > > + userpg->cap_user_time_zero = 0;
> > > + userpg->cap_user_time_short = 0;
> > > + userpg->cap_user_rdpmc =
> > > + !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT);
> > > +
> > > + userpg->pmc_width = 64;
> > > +
> >
> > The counter width is 64 for cycle & instret. Other hpmcounter can have
> > different width.
> > This information should retrieved from counter info.
>
> Yes, this is done in patch 7 when I adapt the perf SBI backend to
> allow the user access.
>
Yes. I missed that earlier. Thanks.
> >
> > > + do {
> > > + rd = sched_clock_read_begin(&seq);
> > > +
> > > + userpg->time_mult = rd->mult;
> > > + userpg->time_shift = rd->shift;
> > > + userpg->time_zero = rd->epoch_ns;
> > > + userpg->time_cycles = rd->epoch_cyc;
> > > + userpg->time_mask = rd->sched_clock_mask;
> > > +
> > > + /*
> > > + * Subtract the cycle base, such that software that
> > > + * doesn't know about cap_user_time_short still 'works'
> > > + * assuming no wraps.
> > > + */
> > > + ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
> > > + userpg->time_zero -= ns;
> > > +
> > > + } while (sched_clock_read_retry(seq));
> > > +
> > > + userpg->time_offset = userpg->time_zero - now;
> > > +
> > > + /*
> > > + * time_shift is not expected to be greater than 31 due to
> > > + * the original published conversion algorithm shifting a
> > > + * 32-bit value (now specifies a 64-bit value) - refer
> > > + * perf_event_mmap_page documentation in perf_event.h.
> > > + */
> > > + if (userpg->time_shift == 32) {
> > > + userpg->time_shift = 31;
> > > + userpg->time_mult >>= 1;
> > > + }
> > > +
> > > + /*
> > > + * Internal timekeeping for enabled/running/stopped times
> > > + * is always computed with the sched_clock.
> > > + */
> > > + userpg->cap_user_time = 1;
> > > + userpg->cap_user_time_zero = 1;
> > > + userpg->cap_user_time_short = 1;
> > > +}
> > > diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
> > > index ebca5eab9c9b..af69da268246 100644
> > > --- a/drivers/perf/riscv_pmu.c
> > > +++ b/drivers/perf/riscv_pmu.c
> > > @@ -171,6 +171,8 @@ int riscv_pmu_event_set_period(struct perf_event *event)
> > >
> > > local64_set(&hwc->prev_count, (u64)-left);
> > >
> > > + perf_event_update_userpage(event);
> > > +
> > > return overflow;
> > > }
> > >
> > > @@ -267,6 +269,9 @@ static int riscv_pmu_event_init(struct perf_event *event)
> > > hwc->idx = -1;
> > > hwc->event_base = mapped_event;
> > >
> > > + if (rvpmu->event_init)
> > > + rvpmu->event_init(event);
> > > +
> > > if (!is_sampling_event(event)) {
> > > /*
> > > * For non-sampling runs, limit the sample_period to half
> > > @@ -283,6 +288,39 @@ static int riscv_pmu_event_init(struct perf_event *event)
> > > return 0;
> > > }
> > >
> > > +static int riscv_pmu_event_idx(struct perf_event *event)
> > > +{
> > > + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> > > +
> > > + if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
> > > + return 0;
> > > +
> > > + if (rvpmu->csr_index)
> > > + return rvpmu->csr_index(event) + 1;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
> > > +{
> > > + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> > > +
> > > + if (rvpmu->event_mapped) {
> > > + rvpmu->event_mapped(event, mm);
> > > + perf_event_update_userpage(event);
> > > + }
> > > +}
> > > +
> > > +static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
> > > +{
> > > + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> > > +
> > > + if (rvpmu->event_unmapped) {
> > > + rvpmu->event_unmapped(event, mm);
> > > + perf_event_update_userpage(event);
> > > + }
> > > +}
> > > +
> > > struct riscv_pmu *riscv_pmu_alloc(void)
> > > {
> > > struct riscv_pmu *pmu;
> > > @@ -307,6 +345,9 @@ struct riscv_pmu *riscv_pmu_alloc(void)
> > > }
> > > pmu->pmu = (struct pmu) {
> > > .event_init = riscv_pmu_event_init,
> > > + .event_mapped = riscv_pmu_event_mapped,
> > > + .event_unmapped = riscv_pmu_event_unmapped,
> > > + .event_idx = riscv_pmu_event_idx,
> > > .add = riscv_pmu_add,
> > > .del = riscv_pmu_del,
> > > .start = riscv_pmu_start,
> > > diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
> > > index 9f70d94942e0..1452c8af3b67 100644
> > > --- a/include/linux/perf/riscv_pmu.h
> > > +++ b/include/linux/perf/riscv_pmu.h
> > > @@ -55,6 +55,10 @@ struct riscv_pmu {
> > > void (*ctr_start)(struct perf_event *event, u64 init_val);
> > > void (*ctr_stop)(struct perf_event *event, unsigned long flag);
> > > int (*event_map)(struct perf_event *event, u64 *config);
> > > + void (*event_init)(struct perf_event *event);
> > > + void (*event_mapped)(struct perf_event *event, struct mm_struct *mm);
> > > + void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm);
> > > + uint8_t (*csr_index)(struct perf_event *event);
> > >
> > > struct cpu_hw_events __percpu *hw_events;
> > > struct hlist_node node;
> > > --
> > > 2.37.2
> > >
> >
> >
> > --
> > Regards,
> > Atish
--
Regards,
Atish
On Fri, May 12, 2023 at 10:53:16AM +0200, Alexandre Ghiti wrote: > Provide all the necessary bits in the generic riscv pmu driver to be > able to mmap perf events in userspace: the heavy lifting lies in the > driver backend, namely the legacy and sbi implementations. > > Note that arch_perf_update_userpage is almost a copy of arm64 code. > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > --- > arch/riscv/kernel/Makefile | 2 +- > arch/riscv/kernel/perf_event.c | 58 ++++++++++++++++++++++++++++++++++ > drivers/perf/riscv_pmu.c | 41 ++++++++++++++++++++++++ > include/linux/perf/riscv_pmu.h | 4 +++ > 4 files changed, 104 insertions(+), 1 deletion(-) > create mode 100644 arch/riscv/kernel/perf_event.c > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
© 2016 - 2026 Red Hat, Inc.