[PATCH 1/2] iommu/riscv: add RISC-V IOMMU PMU support

Zong Li posted 2 patches 11 months ago
[PATCH 1/2] iommu/riscv: add RISC-V IOMMU PMU support
Posted by Zong Li 11 months ago
Support for the RISC-V IOMMU hardware performance monitor includes
both counting and sampling modes.

The specification does not define an event ID for counting the
number of clock cycles, meaning there is no associated `iohpmevt0`.
However, we need an event for counting cycle, so we reserve the
maximum event ID for this purpose.

Signed-off-by: Zong Li <zong.li@sifive.com>
Tested-by: Xu Lu <luxu.kernel@bytedance.com>
---
 drivers/iommu/riscv/Makefile     |   2 +-
 drivers/iommu/riscv/iommu-bits.h |  16 +
 drivers/iommu/riscv/iommu-pmu.c  | 486 +++++++++++++++++++++++++++++++
 drivers/iommu/riscv/iommu.h      |   8 +
 4 files changed, 511 insertions(+), 1 deletion(-)
 create mode 100644 drivers/iommu/riscv/iommu-pmu.c

diff --git a/drivers/iommu/riscv/Makefile b/drivers/iommu/riscv/Makefile
index f54c9ed17d41..d36625a1fd08 100644
--- a/drivers/iommu/riscv/Makefile
+++ b/drivers/iommu/riscv/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o
+obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o iommu-pmu.o
 obj-$(CONFIG_RISCV_IOMMU_PCI) += iommu-pci.o
diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
index 98daf0e1a306..60523449f016 100644
--- a/drivers/iommu/riscv/iommu-bits.h
+++ b/drivers/iommu/riscv/iommu-bits.h
@@ -17,6 +17,7 @@
 #include <linux/types.h>
 #include <linux/bitfield.h>
 #include <linux/bits.h>
+#include <linux/perf_event.h>
 
 /*
  * Chapter 5: Memory Mapped register interface
@@ -207,6 +208,7 @@ enum riscv_iommu_ddtp_modes {
 /* 5.22 Performance monitoring event counters (31 * 64bits) */
 #define RISCV_IOMMU_REG_IOHPMCTR_BASE	0x0068
 #define RISCV_IOMMU_REG_IOHPMCTR(_n)	(RISCV_IOMMU_REG_IOHPMCTR_BASE + ((_n) * 0x8))
+#define RISCV_IOMMU_IOHPMCTR_COUNTER	GENMASK_ULL(63, 0)
 
 /* 5.23 Performance monitoring event selectors (31 * 64bits) */
 #define RISCV_IOMMU_REG_IOHPMEVT_BASE	0x0160
@@ -250,6 +252,20 @@ enum riscv_iommu_hpmevent_id {
 	RISCV_IOMMU_HPMEVENT_MAX        = 9
 };
 
+/* Use maximum event ID for cycle event */
+#define RISCV_IOMMU_HPMEVENT_CYCLE	GENMASK_ULL(14, 0)
+
+#define RISCV_IOMMU_HPM_COUNTER_NUM	32
+
+struct riscv_iommu_pmu {
+	struct pmu pmu;
+	void __iomem *reg;
+	int num_counters;
+	u64 mask_counter;
+	struct perf_event *events[RISCV_IOMMU_IOHPMEVT_CNT + 1];
+	DECLARE_BITMAP(used_counters, RISCV_IOMMU_IOHPMEVT_CNT + 1);
+};
+
 /* 5.24 Translation request IOVA (64bits) */
 #define RISCV_IOMMU_REG_TR_REQ_IOVA     0x0258
 #define RISCV_IOMMU_TR_REQ_IOVA_VPN	GENMASK_ULL(63, 12)
diff --git a/drivers/iommu/riscv/iommu-pmu.c b/drivers/iommu/riscv/iommu-pmu.c
new file mode 100644
index 000000000000..74eb1525cd32
--- /dev/null
+++ b/drivers/iommu/riscv/iommu-pmu.c
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 SiFive
+ *
+ * Authors
+ *	Zong Li <zong.li@sifive.com>
+ */
+
+#include <linux/io-64-nonatomic-hi-lo.h>
+
+#include "iommu.h"
+#include "iommu-bits.h"
+
+#define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
+
+#define RISCV_IOMMU_PMU_ATTR_EXTRACTOR(_name, _mask)			\
+	static inline u32 get_##_name(struct perf_event *event)		\
+	{								\
+		return FIELD_GET(_mask, event->attr.config);		\
+	}								\
+
+RISCV_IOMMU_PMU_ATTR_EXTRACTOR(event, RISCV_IOMMU_IOHPMEVT_EVENTID);
+RISCV_IOMMU_PMU_ATTR_EXTRACTOR(partial_matching, RISCV_IOMMU_IOHPMEVT_DMASK);
+RISCV_IOMMU_PMU_ATTR_EXTRACTOR(pid_pscid, RISCV_IOMMU_IOHPMEVT_PID_PSCID);
+RISCV_IOMMU_PMU_ATTR_EXTRACTOR(did_gscid, RISCV_IOMMU_IOHPMEVT_DID_GSCID);
+RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_pid_pscid, RISCV_IOMMU_IOHPMEVT_PV_PSCV);
+RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_did_gscid, RISCV_IOMMU_IOHPMEVT_DV_GSCV);
+RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_id_type, RISCV_IOMMU_IOHPMEVT_IDT);
+
+/* Formats */
+PMU_FORMAT_ATTR(event, "config:0-14");
+PMU_FORMAT_ATTR(partial_matching, "config:15");
+PMU_FORMAT_ATTR(pid_pscid, "config:16-35");
+PMU_FORMAT_ATTR(did_gscid, "config:36-59");
+PMU_FORMAT_ATTR(filter_pid_pscid, "config:60");
+PMU_FORMAT_ATTR(filter_did_gscid, "config:61");
+PMU_FORMAT_ATTR(filter_id_type, "config:62");
+
+static struct attribute *riscv_iommu_pmu_formats[] = {
+	&format_attr_event.attr,
+	&format_attr_partial_matching.attr,
+	&format_attr_pid_pscid.attr,
+	&format_attr_did_gscid.attr,
+	&format_attr_filter_pid_pscid.attr,
+	&format_attr_filter_did_gscid.attr,
+	&format_attr_filter_id_type.attr,
+	NULL,
+};
+
+static const struct attribute_group riscv_iommu_pmu_format_group = {
+	.name = "format",
+	.attrs = riscv_iommu_pmu_formats,
+};
+
+/* Events */
+static ssize_t riscv_iommu_pmu_event_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+PMU_EVENT_ATTR(cycle, event_attr_cycle,
+	       RISCV_IOMMU_HPMEVENT_CYCLE, riscv_iommu_pmu_event_show);
+PMU_EVENT_ATTR(dont_count, event_attr_dont_count,
+	       RISCV_IOMMU_HPMEVENT_INVALID, riscv_iommu_pmu_event_show);
+PMU_EVENT_ATTR(untranslated_req, event_attr_untranslated_req,
+	       RISCV_IOMMU_HPMEVENT_URQ, riscv_iommu_pmu_event_show);
+PMU_EVENT_ATTR(translated_req, event_attr_translated_req,
+	       RISCV_IOMMU_HPMEVENT_TRQ, riscv_iommu_pmu_event_show);
+PMU_EVENT_ATTR(ats_trans_req, event_attr_ats_trans_req,
+	       RISCV_IOMMU_HPMEVENT_ATS_RQ, riscv_iommu_pmu_event_show);
+PMU_EVENT_ATTR(tlb_miss, event_attr_tlb_miss,
+	       RISCV_IOMMU_HPMEVENT_TLB_MISS, riscv_iommu_pmu_event_show);
+PMU_EVENT_ATTR(ddt_walks, event_attr_ddt_walks,
+	       RISCV_IOMMU_HPMEVENT_DD_WALK, riscv_iommu_pmu_event_show);
+PMU_EVENT_ATTR(pdt_walks, event_attr_pdt_walks,
+	       RISCV_IOMMU_HPMEVENT_PD_WALK, riscv_iommu_pmu_event_show);
+PMU_EVENT_ATTR(s_vs_pt_walks, event_attr_s_vs_pt_walks,
+	       RISCV_IOMMU_HPMEVENT_S_VS_WALKS, riscv_iommu_pmu_event_show);
+PMU_EVENT_ATTR(g_pt_walks, event_attr_g_pt_walks,
+	       RISCV_IOMMU_HPMEVENT_G_WALKS, riscv_iommu_pmu_event_show);
+
+static struct attribute *riscv_iommu_pmu_events[] = {
+	&event_attr_cycle.attr.attr,
+	&event_attr_dont_count.attr.attr,
+	&event_attr_untranslated_req.attr.attr,
+	&event_attr_translated_req.attr.attr,
+	&event_attr_ats_trans_req.attr.attr,
+	&event_attr_tlb_miss.attr.attr,
+	&event_attr_ddt_walks.attr.attr,
+	&event_attr_pdt_walks.attr.attr,
+	&event_attr_s_vs_pt_walks.attr.attr,
+	&event_attr_g_pt_walks.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group riscv_iommu_pmu_events_group = {
+	.name = "events",
+	.attrs = riscv_iommu_pmu_events,
+};
+
+static const struct attribute_group *riscv_iommu_pmu_attr_grps[] = {
+	&riscv_iommu_pmu_format_group,
+	&riscv_iommu_pmu_events_group,
+	NULL,
+};
+
+/* PMU Operations */
+static void riscv_iommu_pmu_set_counter(struct riscv_iommu_pmu *pmu, u32 idx,
+					u64 value)
+{
+	void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES;
+
+	if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
+		return;
+
+	if (idx == 0)
+		value = (value & ~RISCV_IOMMU_IOHPMCYCLES_OF) |
+			 (readq(addr) & RISCV_IOMMU_IOHPMCYCLES_OF);
+
+	writeq(FIELD_PREP(RISCV_IOMMU_IOHPMCTR_COUNTER, value), addr + idx * 8);
+}
+
+static u64 riscv_iommu_pmu_get_counter(struct riscv_iommu_pmu *pmu, u32 idx)
+{
+	void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES;
+	u64 value;
+
+	if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
+		return -EINVAL;
+
+	value = readq(addr + idx * 8);
+
+	if (idx == 0)
+		return FIELD_GET(RISCV_IOMMU_IOHPMCYCLES_COUNTER, value);
+
+	return FIELD_GET(RISCV_IOMMU_IOHPMCTR_COUNTER, value);
+}
+
+static u64 riscv_iommu_pmu_get_event(struct riscv_iommu_pmu *pmu, u32 idx)
+{
+	void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE;
+
+	if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
+		return 0;
+
+	/* There is no associtated IOHPMEVT0 for IOHPMCYCLES */
+	if (idx == 0)
+		return 0;
+
+	return readq(addr + (idx - 1) * 8);
+}
+
+static void riscv_iommu_pmu_set_event(struct riscv_iommu_pmu *pmu, u32 idx,
+				      u64 value)
+{
+	void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE;
+
+	if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
+		return;
+
+	/* There is no associtated IOHPMEVT0 for IOHPMCYCLES */
+	if (idx == 0)
+		return;
+
+	writeq(value, addr + (idx - 1) * 8);
+}
+
+static void riscv_iommu_pmu_enable_counter(struct riscv_iommu_pmu *pmu, u32 idx)
+{
+	void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH;
+	u32 value = readl(addr);
+
+	writel(value & ~BIT(idx), addr);
+}
+
+static void riscv_iommu_pmu_disable_counter(struct riscv_iommu_pmu *pmu, u32 idx)
+{
+	void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH;
+	u32 value = readl(addr);
+
+	writel(value | BIT(idx), addr);
+}
+
+static void riscv_iommu_pmu_enable_ovf_intr(struct riscv_iommu_pmu *pmu, u32 idx)
+{
+	u64 value;
+
+	if (get_event(pmu->events[idx]) == RISCV_IOMMU_HPMEVENT_CYCLE) {
+		value = riscv_iommu_pmu_get_counter(pmu, idx) & ~RISCV_IOMMU_IOHPMCYCLES_OF;
+		writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES);
+	} else {
+		value = riscv_iommu_pmu_get_event(pmu, idx) & ~RISCV_IOMMU_IOHPMEVT_OF;
+		writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE + (idx - 1) * 8);
+	}
+}
+
+static void riscv_iommu_pmu_disable_ovf_intr(struct riscv_iommu_pmu *pmu, u32 idx)
+{
+	u64 value;
+
+	if (get_event(pmu->events[idx]) == RISCV_IOMMU_HPMEVENT_CYCLE) {
+		value = riscv_iommu_pmu_get_counter(pmu, idx) | RISCV_IOMMU_IOHPMCYCLES_OF;
+		writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES);
+	} else {
+		value = riscv_iommu_pmu_get_event(pmu, idx) | RISCV_IOMMU_IOHPMEVT_OF;
+		writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE + (idx - 1) * 8);
+	}
+}
+
+static void riscv_iommu_pmu_start_all(struct riscv_iommu_pmu *pmu)
+{
+	int idx;
+
+	for_each_set_bit(idx, pmu->used_counters, pmu->num_counters) {
+		riscv_iommu_pmu_enable_ovf_intr(pmu, idx);
+		riscv_iommu_pmu_enable_counter(pmu, idx);
+	}
+}
+
+static void riscv_iommu_pmu_stop_all(struct riscv_iommu_pmu *pmu)
+{
+	writel(GENMASK_ULL(pmu->num_counters - 1, 0),
+	       pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH);
+}
+
+/* PMU APIs */
+static int riscv_iommu_pmu_set_period(struct perf_event *event)
+{
+	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	u64 max_period = pmu->mask_counter;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	/*
+	 * Limit the maximum period to prevent the counter value
+	 * from overtaking the one we are about to program. In
+	 * effect we are reducing max_period to account for
+	 * interrupt latency (and we are being very conservative).
+	 */
+	if (left > (max_period >> 1))
+		left = (max_period >> 1);
+
+	local64_set(&hwc->prev_count, (u64)-left);
+	riscv_iommu_pmu_set_counter(pmu, hwc->idx, (u64)(-left) & max_period);
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+static int riscv_iommu_pmu_event_init(struct perf_event *event)
+{
+	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	hwc->idx = -1;
+	hwc->config = event->attr.config;
+
+	if (!is_sampling_event(event)) {
+		/*
+		 * For non-sampling runs, limit the sample_period to half
+		 * of the counter width. That way, the new counter value
+		 * is far less likely to overtake the previous one unless
+		 * you have some serious IRQ latency issues.
+		 */
+		hwc->sample_period = pmu->mask_counter >> 1;
+		hwc->last_period = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	return 0;
+}
+
+static void riscv_iommu_pmu_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
+	u64 delta, prev, now;
+	u32 idx = hwc->idx;
+
+	do {
+		prev = local64_read(&hwc->prev_count);
+		now = riscv_iommu_pmu_get_counter(pmu, idx);
+	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+	delta = FIELD_GET(RISCV_IOMMU_IOHPMCTR_COUNTER, now - prev) & pmu->mask_counter;
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+}
+
+static void riscv_iommu_pmu_start(struct perf_event *event, int flags)
+{
+	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+	riscv_iommu_pmu_set_period(event);
+	riscv_iommu_pmu_set_event(pmu, hwc->idx, hwc->config);
+	riscv_iommu_pmu_enable_ovf_intr(pmu, hwc->idx);
+	riscv_iommu_pmu_enable_counter(pmu, hwc->idx);
+
+	perf_event_update_userpage(event);
+}
+
+static void riscv_iommu_pmu_stop(struct perf_event *event, int flags)
+{
+	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	riscv_iommu_pmu_set_event(pmu, hwc->idx, RISCV_IOMMU_HPMEVENT_INVALID);
+	riscv_iommu_pmu_disable_counter(pmu, hwc->idx);
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE))
+		riscv_iommu_pmu_update(event);
+
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int riscv_iommu_pmu_add(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
+	unsigned int num_counters = pmu->num_counters;
+	int idx;
+
+	/* Reserve index zero for iohpmcycles */
+	if (get_event(event) == RISCV_IOMMU_HPMEVENT_CYCLE)
+		idx = 0;
+	else
+		idx = find_next_zero_bit(pmu->used_counters, num_counters, 1);
+
+	if (idx == num_counters)
+		return -EAGAIN;
+
+	set_bit(idx, pmu->used_counters);
+
+	pmu->events[idx] = event;
+	hwc->idx = idx;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (flags & PERF_EF_START)
+		riscv_iommu_pmu_start(event, flags);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void riscv_iommu_pmu_read(struct perf_event *event)
+{
+	riscv_iommu_pmu_update(event);
+}
+
+static void riscv_iommu_pmu_del(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
+	int idx = hwc->idx;
+
+	riscv_iommu_pmu_stop(event, PERF_EF_UPDATE);
+	pmu->events[idx] = NULL;
+	clear_bit(idx, pmu->used_counters);
+	perf_event_update_userpage(event);
+}
+
+irqreturn_t riscv_iommu_pmu_handle_irq(struct riscv_iommu_pmu *pmu)
+{
+	struct perf_sample_data data;
+	struct pt_regs *regs;
+	u32 ovf = readl(pmu->reg + RISCV_IOMMU_REG_IOCOUNTOVF);
+	int idx;
+
+	if (!ovf)
+		return IRQ_NONE;
+
+	riscv_iommu_pmu_stop_all(pmu);
+
+	regs = get_irq_regs();
+
+	for_each_set_bit(idx, (unsigned long *)&ovf, pmu->num_counters) {
+		struct perf_event *event = pmu->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (WARN_ON_ONCE(!event) || !is_sampling_event(event))
+			continue;
+
+		hwc = &event->hw;
+
+		riscv_iommu_pmu_update(event);
+		perf_sample_data_init(&data, 0, hwc->last_period);
+		if (!riscv_iommu_pmu_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			riscv_iommu_pmu_stop(event, 0);
+	}
+
+	riscv_iommu_pmu_start_all(pmu);
+
+	return IRQ_HANDLED;
+}
+
+int riscv_iommu_pmu_init(struct riscv_iommu_pmu *pmu, void __iomem *reg,
+			 const char *dev_name)
+{
+	char *name;
+	int ret;
+
+	pmu->reg = reg;
+	pmu->num_counters = RISCV_IOMMU_HPM_COUNTER_NUM;
+	pmu->mask_counter = RISCV_IOMMU_IOHPMCTR_COUNTER;
+
+	pmu->pmu = (struct pmu) {
+		.task_ctx_nr	= perf_invalid_context,
+		.event_init	= riscv_iommu_pmu_event_init,
+		.add		= riscv_iommu_pmu_add,
+		.del		= riscv_iommu_pmu_del,
+		.start		= riscv_iommu_pmu_start,
+		.stop		= riscv_iommu_pmu_stop,
+		.read		= riscv_iommu_pmu_read,
+		.attr_groups	= riscv_iommu_pmu_attr_grps,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+		.module		= THIS_MODULE,
+	};
+
+	name = kasprintf(GFP_KERNEL, "riscv_iommu_pmu_%s", dev_name);
+
+	ret = perf_pmu_register(&pmu->pmu, name, -1);
+	if (ret) {
+		pr_err("Failed to register riscv_iommu_pmu_%s: %d\n",
+		       dev_name, ret);
+		return ret;
+	}
+
+	/* Stop all counters and later start the counter with perf */
+	riscv_iommu_pmu_stop_all(pmu);
+
+	pr_info("riscv_iommu_pmu_%s: Registered with %d counters\n",
+		dev_name, pmu->num_counters);
+
+	return 0;
+}
+
+void riscv_iommu_pmu_uninit(struct riscv_iommu_pmu *pmu)
+{
+	int idx;
+
+	/* Disable interrupt and functions */
+	for_each_set_bit(idx, pmu->used_counters, pmu->num_counters) {
+		riscv_iommu_pmu_disable_counter(pmu, idx);
+		riscv_iommu_pmu_disable_ovf_intr(pmu, idx);
+	}
+
+	perf_pmu_unregister(&pmu->pmu);
+}
diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
index b1c4664542b4..92659a8a75ae 100644
--- a/drivers/iommu/riscv/iommu.h
+++ b/drivers/iommu/riscv/iommu.h
@@ -60,11 +60,19 @@ struct riscv_iommu_device {
 	unsigned int ddt_mode;
 	dma_addr_t ddt_phys;
 	u64 *ddt_root;
+
+	/* hardware performance monitor */
+	struct riscv_iommu_pmu pmu;
 };
 
 int riscv_iommu_init(struct riscv_iommu_device *iommu);
 void riscv_iommu_remove(struct riscv_iommu_device *iommu);
 
+int riscv_iommu_pmu_init(struct riscv_iommu_pmu *pmu, void __iomem *reg,
+			 const char *name);
+void riscv_iommu_pmu_uninit(struct riscv_iommu_pmu *pmu);
+irqreturn_t riscv_iommu_pmu_handle_irq(struct riscv_iommu_pmu *pmu);
+
 #define riscv_iommu_readl(iommu, addr) \
 	readl_relaxed((iommu)->reg + (addr))
 
-- 
2.17.1
Re: [PATCH 1/2] iommu/riscv: add RISC-V IOMMU PMU support
Posted by kernel test robot 10 months, 3 weeks ago
Hi Zong,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.13 next-20250123]
[cannot apply to joro-iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Zong-Li/iommu-riscv-add-RISC-V-IOMMU-PMU-support/20250115-110456
base:   linus/master
patch link:    https://lore.kernel.org/r/20250115030306.29735-2-zong.li%40sifive.com
patch subject: [PATCH 1/2] iommu/riscv: add RISC-V IOMMU PMU support
config: riscv-randconfig-r062-20250123 (https://download.01.org/0day-ci/archive/20250123/202501231928.CmUsMTXF-lkp@intel.com/config)
compiler: riscv64-linux-gcc (GCC) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250123/202501231928.CmUsMTXF-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501231928.CmUsMTXF-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from <command-line>:
   drivers/iommu/riscv/iommu-pmu.c: In function 'get_event':
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/compiler_types.h:522:23: note: in definition of macro '__compiletime_assert'
     522 |                 if (!(condition))                                       \
         |                       ^~~~~~~~~
   include/linux/compiler_types.h:542:9: note: in expansion of macro '_compiletime_assert'
     542 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |         ^~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
      39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
         |                                     ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:72:17: note: in expansion of macro 'BUILD_BUG_ON_MSG'
      72 |                 BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) >     \
         |                 ^~~~~~~~~~~~~~~~
   include/linux/bitfield.h:61:43: note: in expansion of macro '__unsigned_scalar_typeof'
      61 | #define __bf_cast_unsigned(type, x)     ((__unsigned_scalar_typeof(type))(x))
         |                                           ^~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:73:34: note: in expansion of macro '__bf_cast_unsigned'
      73 |                                  __bf_cast_unsigned(_reg, ~0ull),       \
         |                                  ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:155:17: note: in expansion of macro '__BF_FIELD_CHECK'
     155 |                 __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: ");       \
         |                 ^~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:19:24: note: in expansion of macro 'FIELD_GET'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                        ^~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:22:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      22 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(event, RISCV_IOMMU_IOHPMEVT_EVENTID);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   In file included from include/linux/fortify-string.h:5,
                    from include/linux/string.h:389,
                    from include/linux/bitmap.h:13,
                    from include/linux/cpumask.h:12,
                    from arch/riscv/include/asm/processor.h:55,
                    from arch/riscv/include/asm/thread_info.h:42,
                    from include/linux/thread_info.h:60,
                    from include/asm-generic/preempt.h:5,
                    from ./arch/riscv/include/generated/asm/preempt.h:1,
                    from include/linux/preempt.h:79,
                    from include/linux/spinlock.h:56,
                    from include/linux/mmzone.h:8,
                    from arch/riscv/include/asm/pgtable.h:9,
                    from include/linux/pgtable.h:6,
                    from arch/riscv/include/asm/io.h:15,
                    from include/linux/io.h:14,
                    from include/linux/io-64-nonatomic-hi-lo.h:5,
                    from drivers/iommu/riscv/iommu-pmu.c:9:
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/bitfield.h:156:35: note: in definition of macro 'FIELD_GET'
     156 |                 (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
         |                                   ^~~~
   drivers/iommu/riscv/iommu-pmu.c:22:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      22 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(event, RISCV_IOMMU_IOHPMEVT_EVENTID);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c: In function 'get_partial_matching':
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/compiler_types.h:522:23: note: in definition of macro '__compiletime_assert'
     522 |                 if (!(condition))                                       \
         |                       ^~~~~~~~~
   include/linux/compiler_types.h:542:9: note: in expansion of macro '_compiletime_assert'
     542 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |         ^~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
      39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
         |                                     ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:72:17: note: in expansion of macro 'BUILD_BUG_ON_MSG'
      72 |                 BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) >     \
         |                 ^~~~~~~~~~~~~~~~
   include/linux/bitfield.h:61:43: note: in expansion of macro '__unsigned_scalar_typeof'
      61 | #define __bf_cast_unsigned(type, x)     ((__unsigned_scalar_typeof(type))(x))
         |                                           ^~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:73:34: note: in expansion of macro '__bf_cast_unsigned'
      73 |                                  __bf_cast_unsigned(_reg, ~0ull),       \
         |                                  ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:155:17: note: in expansion of macro '__BF_FIELD_CHECK'
     155 |                 __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: ");       \
         |                 ^~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:19:24: note: in expansion of macro 'FIELD_GET'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                        ^~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:23:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      23 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(partial_matching, RISCV_IOMMU_IOHPMEVT_DMASK);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/bitfield.h:156:35: note: in definition of macro 'FIELD_GET'
     156 |                 (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
         |                                   ^~~~
   drivers/iommu/riscv/iommu-pmu.c:23:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      23 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(partial_matching, RISCV_IOMMU_IOHPMEVT_DMASK);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c: In function 'get_pid_pscid':
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/compiler_types.h:522:23: note: in definition of macro '__compiletime_assert'
     522 |                 if (!(condition))                                       \
         |                       ^~~~~~~~~
   include/linux/compiler_types.h:542:9: note: in expansion of macro '_compiletime_assert'
     542 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |         ^~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
      39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
         |                                     ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:72:17: note: in expansion of macro 'BUILD_BUG_ON_MSG'
      72 |                 BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) >     \
         |                 ^~~~~~~~~~~~~~~~
   include/linux/bitfield.h:61:43: note: in expansion of macro '__unsigned_scalar_typeof'
      61 | #define __bf_cast_unsigned(type, x)     ((__unsigned_scalar_typeof(type))(x))
         |                                           ^~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:73:34: note: in expansion of macro '__bf_cast_unsigned'
      73 |                                  __bf_cast_unsigned(_reg, ~0ull),       \
         |                                  ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:155:17: note: in expansion of macro '__BF_FIELD_CHECK'
     155 |                 __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: ");       \
         |                 ^~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:19:24: note: in expansion of macro 'FIELD_GET'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                        ^~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:24:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      24 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(pid_pscid, RISCV_IOMMU_IOHPMEVT_PID_PSCID);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/bitfield.h:156:35: note: in definition of macro 'FIELD_GET'
     156 |                 (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
         |                                   ^~~~
   drivers/iommu/riscv/iommu-pmu.c:24:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      24 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(pid_pscid, RISCV_IOMMU_IOHPMEVT_PID_PSCID);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c: In function 'get_did_gscid':
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/compiler_types.h:522:23: note: in definition of macro '__compiletime_assert'
     522 |                 if (!(condition))                                       \
         |                       ^~~~~~~~~
   include/linux/compiler_types.h:542:9: note: in expansion of macro '_compiletime_assert'
     542 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |         ^~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
      39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
         |                                     ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:72:17: note: in expansion of macro 'BUILD_BUG_ON_MSG'
      72 |                 BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) >     \
         |                 ^~~~~~~~~~~~~~~~
   include/linux/bitfield.h:61:43: note: in expansion of macro '__unsigned_scalar_typeof'
      61 | #define __bf_cast_unsigned(type, x)     ((__unsigned_scalar_typeof(type))(x))
         |                                           ^~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:73:34: note: in expansion of macro '__bf_cast_unsigned'
      73 |                                  __bf_cast_unsigned(_reg, ~0ull),       \
         |                                  ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:155:17: note: in expansion of macro '__BF_FIELD_CHECK'
     155 |                 __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: ");       \
         |                 ^~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:19:24: note: in expansion of macro 'FIELD_GET'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                        ^~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:25:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      25 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(did_gscid, RISCV_IOMMU_IOHPMEVT_DID_GSCID);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/bitfield.h:156:35: note: in definition of macro 'FIELD_GET'
     156 |                 (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
         |                                   ^~~~
   drivers/iommu/riscv/iommu-pmu.c:25:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      25 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(did_gscid, RISCV_IOMMU_IOHPMEVT_DID_GSCID);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c: In function 'get_filter_pid_pscid':
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/compiler_types.h:522:23: note: in definition of macro '__compiletime_assert'
     522 |                 if (!(condition))                                       \
         |                       ^~~~~~~~~
   include/linux/compiler_types.h:542:9: note: in expansion of macro '_compiletime_assert'
     542 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |         ^~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
      39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
         |                                     ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:72:17: note: in expansion of macro 'BUILD_BUG_ON_MSG'
      72 |                 BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) >     \
         |                 ^~~~~~~~~~~~~~~~
   include/linux/bitfield.h:61:43: note: in expansion of macro '__unsigned_scalar_typeof'
      61 | #define __bf_cast_unsigned(type, x)     ((__unsigned_scalar_typeof(type))(x))
         |                                           ^~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:73:34: note: in expansion of macro '__bf_cast_unsigned'
      73 |                                  __bf_cast_unsigned(_reg, ~0ull),       \
         |                                  ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:155:17: note: in expansion of macro '__BF_FIELD_CHECK'
     155 |                 __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: ");       \
         |                 ^~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:19:24: note: in expansion of macro 'FIELD_GET'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                        ^~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:26:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      26 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_pid_pscid, RISCV_IOMMU_IOHPMEVT_PV_PSCV);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/bitfield.h:156:35: note: in definition of macro 'FIELD_GET'
     156 |                 (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
         |                                   ^~~~
   drivers/iommu/riscv/iommu-pmu.c:26:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      26 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_pid_pscid, RISCV_IOMMU_IOHPMEVT_PV_PSCV);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c: In function 'get_filter_did_gscid':
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/compiler_types.h:522:23: note: in definition of macro '__compiletime_assert'
     522 |                 if (!(condition))                                       \
         |                       ^~~~~~~~~
   include/linux/compiler_types.h:542:9: note: in expansion of macro '_compiletime_assert'
     542 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |         ^~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
      39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
         |                                     ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:72:17: note: in expansion of macro 'BUILD_BUG_ON_MSG'
      72 |                 BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) >     \
         |                 ^~~~~~~~~~~~~~~~
   include/linux/bitfield.h:61:43: note: in expansion of macro '__unsigned_scalar_typeof'
      61 | #define __bf_cast_unsigned(type, x)     ((__unsigned_scalar_typeof(type))(x))
         |                                           ^~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:73:34: note: in expansion of macro '__bf_cast_unsigned'
      73 |                                  __bf_cast_unsigned(_reg, ~0ull),       \
         |                                  ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:155:17: note: in expansion of macro '__BF_FIELD_CHECK'
     155 |                 __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: ");       \
         |                 ^~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:19:24: note: in expansion of macro 'FIELD_GET'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                        ^~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:27:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      27 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_did_gscid, RISCV_IOMMU_IOHPMEVT_DV_GSCV);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/bitfield.h:156:35: note: in definition of macro 'FIELD_GET'
     156 |                 (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
         |                                   ^~~~
   drivers/iommu/riscv/iommu-pmu.c:27:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      27 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_did_gscid, RISCV_IOMMU_IOHPMEVT_DV_GSCV);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c: In function 'get_filter_id_type':
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/compiler_types.h:522:23: note: in definition of macro '__compiletime_assert'
     522 |                 if (!(condition))                                       \
         |                       ^~~~~~~~~
   include/linux/compiler_types.h:542:9: note: in expansion of macro '_compiletime_assert'
     542 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
         |         ^~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
      39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
         |                                     ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:72:17: note: in expansion of macro 'BUILD_BUG_ON_MSG'
      72 |                 BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) >     \
         |                 ^~~~~~~~~~~~~~~~
   include/linux/bitfield.h:61:43: note: in expansion of macro '__unsigned_scalar_typeof'
      61 | #define __bf_cast_unsigned(type, x)     ((__unsigned_scalar_typeof(type))(x))
         |                                           ^~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:73:34: note: in expansion of macro '__bf_cast_unsigned'
      73 |                                  __bf_cast_unsigned(_reg, ~0ull),       \
         |                                  ^~~~~~~~~~~~~~~~~~
   include/linux/bitfield.h:155:17: note: in expansion of macro '__BF_FIELD_CHECK'
     155 |                 __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: ");       \
         |                 ^~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:19:24: note: in expansion of macro 'FIELD_GET'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                        ^~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:28:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      28 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_id_type, RISCV_IOMMU_IOHPMEVT_IDT);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> drivers/iommu/riscv/iommu-pmu.c:19:46: error: 'struct perf_event' has no member named 'attr'
      19 |                 return FIELD_GET(_mask, event->attr.config);            \
         |                                              ^~
   include/linux/bitfield.h:156:35: note: in definition of macro 'FIELD_GET'
     156 |                 (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
         |                                   ^~~~
   drivers/iommu/riscv/iommu-pmu.c:28:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      28 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_id_type, RISCV_IOMMU_IOHPMEVT_IDT);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   In file included from include/linux/kernel.h:22,
                    from include/linux/cpumask.h:11:
   drivers/iommu/riscv/iommu-pmu.c: In function 'riscv_iommu_pmu_set_period':
>> drivers/iommu/riscv/iommu-pmu.c:235:63: error: 'struct perf_event' has no member named 'pmu'
     235 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                                               ^~
   include/linux/container_of.h:19:33: note: in definition of macro 'container_of'
      19 |         void *__mptr = (void *)(ptr);                                   \
         |                                 ^~~
   drivers/iommu/riscv/iommu-pmu.c:235:39: note: in expansion of macro 'to_riscv_iommu_pmu'
     235 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                       ^~~~~~~~~~~~~~~~~~
   In file included from include/linux/init.h:5,
                    from include/linux/io.h:11:
>> drivers/iommu/riscv/iommu-pmu.c:235:63: error: 'struct perf_event' has no member named 'pmu'
     235 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                                               ^~
   include/linux/build_bug.h:78:56: note: in definition of macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   include/linux/container_of.h:20:9: note: in expansion of macro 'static_assert'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |         ^~~~~~~~~~~~~
   include/linux/container_of.h:20:23: note: in expansion of macro '__same_type'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |                       ^~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:14:32: note: in expansion of macro 'container_of'
      14 | #define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
         |                                ^~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:235:39: note: in expansion of macro 'to_riscv_iommu_pmu'
     235 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                       ^~~~~~~~~~~~~~~~~~
>> drivers/iommu/riscv/iommu-pmu.c:235:63: error: 'struct perf_event' has no member named 'pmu'
     235 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                                               ^~
   include/linux/build_bug.h:78:56: note: in definition of macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   include/linux/container_of.h:20:9: note: in expansion of macro 'static_assert'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |         ^~~~~~~~~~~~~
   include/linux/container_of.h:21:23: note: in expansion of macro '__same_type'
      21 |                       __same_type(*(ptr), void),                        \
         |                       ^~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:14:32: note: in expansion of macro 'container_of'
      14 | #define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
         |                                ^~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:235:39: note: in expansion of macro 'to_riscv_iommu_pmu'
     235 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                       ^~~~~~~~~~~~~~~~~~
   include/linux/compiler_types.h:483:27: error: expression in static assertion is not an integer
     483 | #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
         |                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:78:56: note: in definition of macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   include/linux/container_of.h:20:9: note: in expansion of macro 'static_assert'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |         ^~~~~~~~~~~~~
   include/linux/container_of.h:20:23: note: in expansion of macro '__same_type'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |                       ^~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:14:32: note: in expansion of macro 'container_of'
      14 | #define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
         |                                ^~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:235:39: note: in expansion of macro 'to_riscv_iommu_pmu'
     235 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                       ^~~~~~~~~~~~~~~~~~
>> drivers/iommu/riscv/iommu-pmu.c:236:43: error: 'struct perf_event' has no member named 'hw'
     236 |         struct hw_perf_event *hwc = &event->hw;
         |                                           ^~
   In file included from ./arch/riscv/include/generated/asm/local.h:1,
                    from include/asm-generic/local64.h:22,
                    from ./arch/riscv/include/generated/asm/local64.h:1,
                    from include/linux/u64_stats_sync.h:71,
                    from include/linux/cgroup-defs.h:20,
                    from include/linux/cgroup.h:28,
                    from include/linux/perf_event.h:60,
                    from drivers/iommu/riscv/iommu-bits.h:20,
                    from drivers/iommu/riscv/iommu.h:18,
                    from drivers/iommu/riscv/iommu-pmu.c:11:
>> drivers/iommu/riscv/iommu-pmu.c:237:37: error: 'struct hw_perf_event' has no member named 'period_left'
     237 |         s64 left = local64_read(&hwc->period_left);
         |                                     ^~
   include/asm-generic/local.h:29:44: note: in definition of macro 'local_read'
      29 | #define local_read(l)   atomic_long_read(&(l)->a)
         |                                            ^
   drivers/iommu/riscv/iommu-pmu.c:237:20: note: in expansion of macro 'local64_read'
     237 |         s64 left = local64_read(&hwc->period_left);
         |                    ^~~~~~~~~~~~
>> drivers/iommu/riscv/iommu-pmu.c:238:25: error: 'struct hw_perf_event' has no member named 'sample_period'
     238 |         s64 period = hwc->sample_period;
         |                         ^~
   drivers/iommu/riscv/iommu-pmu.c:244:33: error: 'struct hw_perf_event' has no member named 'period_left'
     244 |                 local64_set(&hwc->period_left, left);
         |                                 ^~
   include/asm-generic/local.h:30:44: note: in definition of macro 'local_set'
      30 | #define local_set(l,i)  atomic_long_set((&(l)->a),(i))
         |                                            ^
   drivers/iommu/riscv/iommu-pmu.c:244:17: note: in expansion of macro 'local64_set'
     244 |                 local64_set(&hwc->period_left, left);
         |                 ^~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:245:20: error: 'struct hw_perf_event' has no member named 'last_period'
     245 |                 hwc->last_period = period;
         |                    ^~
   drivers/iommu/riscv/iommu-pmu.c:251:33: error: 'struct hw_perf_event' has no member named 'period_left'
     251 |                 local64_set(&hwc->period_left, left);
         |                                 ^~
   include/asm-generic/local.h:30:44: note: in definition of macro 'local_set'
      30 | #define local_set(l,i)  atomic_long_set((&(l)->a),(i))
         |                                            ^
   drivers/iommu/riscv/iommu-pmu.c:251:17: note: in expansion of macro 'local64_set'
     251 |                 local64_set(&hwc->period_left, left);
         |                 ^~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:252:20: error: 'struct hw_perf_event' has no member named 'last_period'
     252 |                 hwc->last_period = period;
         |                    ^~
   drivers/iommu/riscv/iommu-pmu.c:265:25: error: 'struct hw_perf_event' has no member named 'prev_count'
     265 |         local64_set(&hwc->prev_count, (u64)-left);
         |                         ^~
   include/asm-generic/local.h:30:44: note: in definition of macro 'local_set'
      30 | #define local_set(l,i)  atomic_long_set((&(l)->a),(i))
         |                                            ^
   drivers/iommu/riscv/iommu-pmu.c:265:9: note: in expansion of macro 'local64_set'
     265 |         local64_set(&hwc->prev_count, (u64)-left);
         |         ^~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:266:45: error: 'struct hw_perf_event' has no member named 'idx'
     266 |         riscv_iommu_pmu_set_counter(pmu, hwc->idx, (u64)(-left) & max_period);
         |                                             ^~
   drivers/iommu/riscv/iommu-pmu.c:267:9: error: implicit declaration of function 'perf_event_update_userpage'; did you mean 'arch_perf_update_userpage'? [-Wimplicit-function-declaration]
     267 |         perf_event_update_userpage(event);
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~
         |         arch_perf_update_userpage
   drivers/iommu/riscv/iommu-pmu.c: In function 'riscv_iommu_pmu_event_init':
   drivers/iommu/riscv/iommu-pmu.c:274:63: error: 'struct perf_event' has no member named 'pmu'
     274 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                                               ^~
   include/linux/container_of.h:19:33: note: in definition of macro 'container_of'
      19 |         void *__mptr = (void *)(ptr);                                   \
         |                                 ^~~
   drivers/iommu/riscv/iommu-pmu.c:274:39: note: in expansion of macro 'to_riscv_iommu_pmu'
     274 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                       ^~~~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:274:63: error: 'struct perf_event' has no member named 'pmu'
     274 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                                               ^~
   include/linux/build_bug.h:78:56: note: in definition of macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   include/linux/container_of.h:20:9: note: in expansion of macro 'static_assert'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |         ^~~~~~~~~~~~~
   include/linux/container_of.h:20:23: note: in expansion of macro '__same_type'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |                       ^~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:14:32: note: in expansion of macro 'container_of'
      14 | #define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
         |                                ^~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:274:39: note: in expansion of macro 'to_riscv_iommu_pmu'
     274 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                       ^~~~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:274:63: error: 'struct perf_event' has no member named 'pmu'
     274 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                                               ^~
   include/linux/build_bug.h:78:56: note: in definition of macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   include/linux/container_of.h:20:9: note: in expansion of macro 'static_assert'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |         ^~~~~~~~~~~~~
   include/linux/container_of.h:21:23: note: in expansion of macro '__same_type'
      21 |                       __same_type(*(ptr), void),                        \
         |                       ^~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:14:32: note: in expansion of macro 'container_of'
      14 | #define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
         |                                ^~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:274:39: note: in expansion of macro 'to_riscv_iommu_pmu'
     274 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                       ^~~~~~~~~~~~~~~~~~
   include/linux/compiler_types.h:483:27: error: expression in static assertion is not an integer
     483 | #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
         |                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:78:56: note: in definition of macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   include/linux/container_of.h:20:9: note: in expansion of macro 'static_assert'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |         ^~~~~~~~~~~~~
   include/linux/container_of.h:20:23: note: in expansion of macro '__same_type'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |                       ^~~~~~~~~~~


vim +19 drivers/iommu/riscv/iommu-pmu.c

    15	
    16	#define RISCV_IOMMU_PMU_ATTR_EXTRACTOR(_name, _mask)			\
    17		static inline u32 get_##_name(struct perf_event *event)		\
    18		{								\
  > 19			return FIELD_GET(_mask, event->attr.config);		\
    20		}								\
    21	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH 1/2] iommu/riscv: add RISC-V IOMMU PMU support
Posted by kernel test robot 10 months, 3 weeks ago
Hi Zong,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.13 next-20250122]
[cannot apply to joro-iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Zong-Li/iommu-riscv-add-RISC-V-IOMMU-PMU-support/20250115-110456
base:   linus/master
patch link:    https://lore.kernel.org/r/20250115030306.29735-2-zong.li%40sifive.com
patch subject: [PATCH 1/2] iommu/riscv: add RISC-V IOMMU PMU support
config: riscv-randconfig-r062-20250123 (https://download.01.org/0day-ci/archive/20250123/202501231613.OiXGj7hO-lkp@intel.com/config)
compiler: riscv64-linux-gcc (GCC) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250123/202501231613.OiXGj7hO-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501231613.OiXGj7hO-lkp@intel.com/

All warnings (new ones prefixed by >>):

   drivers/iommu/riscv/iommu-pmu.c:368:12: error: 'struct hw_perf_event' has no member named 'idx'
     368 |         hwc->idx = idx;
         |            ^~
   drivers/iommu/riscv/iommu-pmu.c:369:12: error: 'struct hw_perf_event' has no member named 'state'
     369 |         hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
         |            ^~
   drivers/iommu/riscv/iommu-pmu.c:369:22: error: 'PERF_HES_STOPPED' undeclared (first use in this function)
     369 |         hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
         |                      ^~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:369:41: error: 'PERF_HES_UPTODATE' undeclared (first use in this function); did you mean 'PERF_EF_UPDATE'?
     369 |         hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
         |                                         ^~~~~~~~~~~~~~~~~
         |                                         PERF_EF_UPDATE
   drivers/iommu/riscv/iommu-pmu.c: In function 'riscv_iommu_pmu_del':
   drivers/iommu/riscv/iommu-pmu.c:387:43: error: 'struct perf_event' has no member named 'hw'
     387 |         struct hw_perf_event *hwc = &event->hw;
         |                                           ^~
   drivers/iommu/riscv/iommu-pmu.c:388:63: error: 'struct perf_event' has no member named 'pmu'
     388 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                                               ^~
   include/linux/container_of.h:19:33: note: in definition of macro 'container_of'
      19 |         void *__mptr = (void *)(ptr);                                   \
         |                                 ^~~
   drivers/iommu/riscv/iommu-pmu.c:388:39: note: in expansion of macro 'to_riscv_iommu_pmu'
     388 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                       ^~~~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:388:63: error: 'struct perf_event' has no member named 'pmu'
     388 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                                               ^~
   include/linux/build_bug.h:78:56: note: in definition of macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   include/linux/container_of.h:20:9: note: in expansion of macro 'static_assert'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |         ^~~~~~~~~~~~~
   include/linux/container_of.h:20:23: note: in expansion of macro '__same_type'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |                       ^~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:14:32: note: in expansion of macro 'container_of'
      14 | #define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
         |                                ^~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:388:39: note: in expansion of macro 'to_riscv_iommu_pmu'
     388 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                       ^~~~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:388:63: error: 'struct perf_event' has no member named 'pmu'
     388 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                                               ^~
   include/linux/build_bug.h:78:56: note: in definition of macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   include/linux/container_of.h:20:9: note: in expansion of macro 'static_assert'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |         ^~~~~~~~~~~~~
   include/linux/container_of.h:21:23: note: in expansion of macro '__same_type'
      21 |                       __same_type(*(ptr), void),                        \
         |                       ^~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:14:32: note: in expansion of macro 'container_of'
      14 | #define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
         |                                ^~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:388:39: note: in expansion of macro 'to_riscv_iommu_pmu'
     388 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                       ^~~~~~~~~~~~~~~~~~
   include/linux/compiler_types.h:483:27: error: expression in static assertion is not an integer
     483 | #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
         |                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:78:56: note: in definition of macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   include/linux/container_of.h:20:9: note: in expansion of macro 'static_assert'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |         ^~~~~~~~~~~~~
   include/linux/container_of.h:20:23: note: in expansion of macro '__same_type'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |                       ^~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:14:32: note: in expansion of macro 'container_of'
      14 | #define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
         |                                ^~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:388:39: note: in expansion of macro 'to_riscv_iommu_pmu'
     388 |         struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
         |                                       ^~~~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:389:22: error: 'struct hw_perf_event' has no member named 'idx'
     389 |         int idx = hwc->idx;
         |                      ^~
   drivers/iommu/riscv/iommu-pmu.c: In function 'riscv_iommu_pmu_handle_irq':
   drivers/iommu/riscv/iommu-pmu.c:399:33: error: storage size of 'data' isn't known
     399 |         struct perf_sample_data data;
         |                                 ^~~~
   drivers/iommu/riscv/iommu-pmu.c:418:29: error: 'struct perf_event' has no member named 'hw'
     418 |                 hwc = &event->hw;
         |                             ^~
   drivers/iommu/riscv/iommu-pmu.c:421:17: error: implicit declaration of function 'perf_sample_data_init' [-Wimplicit-function-declaration]
     421 |                 perf_sample_data_init(&data, 0, hwc->last_period);
         |                 ^~~~~~~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c:421:52: error: 'struct hw_perf_event' has no member named 'last_period'
     421 |                 perf_sample_data_init(&data, 0, hwc->last_period);
         |                                                    ^~
   drivers/iommu/riscv/iommu-pmu.c:425:21: error: implicit declaration of function 'perf_event_overflow'; did you mean 'perf_event_period'? [-Wimplicit-function-declaration]
     425 |                 if (perf_event_overflow(event, &data, regs))
         |                     ^~~~~~~~~~~~~~~~~~~
         |                     perf_event_period
>> drivers/iommu/riscv/iommu-pmu.c:399:33: warning: unused variable 'data' [-Wunused-variable]
     399 |         struct perf_sample_data data;
         |                                 ^~~~
   drivers/iommu/riscv/iommu-pmu.c: In function 'riscv_iommu_pmu_init':
   drivers/iommu/riscv/iommu-pmu.c:459:15: error: implicit declaration of function 'perf_pmu_register' [-Wimplicit-function-declaration]
     459 |         ret = perf_pmu_register(&pmu->pmu, name, -1);
         |               ^~~~~~~~~~~~~~~~~
   drivers/iommu/riscv/iommu-pmu.c: In function 'riscv_iommu_pmu_uninit':
   drivers/iommu/riscv/iommu-pmu.c:485:9: error: implicit declaration of function 'perf_pmu_unregister'; did you mean 'device_unregister'? [-Wimplicit-function-declaration]
     485 |         perf_pmu_unregister(&pmu->pmu);
         |         ^~~~~~~~~~~~~~~~~~~
         |         device_unregister
   drivers/iommu/riscv/iommu-pmu.c: In function 'get_event':
>> drivers/iommu/riscv/iommu-pmu.c:20:9: warning: control reaches end of non-void function [-Wreturn-type]
      20 |         }                                                               \
         |         ^
   drivers/iommu/riscv/iommu-pmu.c:22:1: note: in expansion of macro 'RISCV_IOMMU_PMU_ATTR_EXTRACTOR'
      22 | RISCV_IOMMU_PMU_ATTR_EXTRACTOR(event, RISCV_IOMMU_IOHPMEVT_EVENTID);
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


vim +/data +399 drivers/iommu/riscv/iommu-pmu.c

   396	
   397	irqreturn_t riscv_iommu_pmu_handle_irq(struct riscv_iommu_pmu *pmu)
   398	{
 > 399		struct perf_sample_data data;
   400		struct pt_regs *regs;
   401		u32 ovf = readl(pmu->reg + RISCV_IOMMU_REG_IOCOUNTOVF);
   402		int idx;
   403	
   404		if (!ovf)
   405			return IRQ_NONE;
   406	
   407		riscv_iommu_pmu_stop_all(pmu);
   408	
   409		regs = get_irq_regs();
   410	
   411		for_each_set_bit(idx, (unsigned long *)&ovf, pmu->num_counters) {
   412			struct perf_event *event = pmu->events[idx];
   413			struct hw_perf_event *hwc;
   414	
   415			if (WARN_ON_ONCE(!event) || !is_sampling_event(event))
   416				continue;
   417	
   418			hwc = &event->hw;
   419	
   420			riscv_iommu_pmu_update(event);
   421			perf_sample_data_init(&data, 0, hwc->last_period);
   422			if (!riscv_iommu_pmu_set_period(event))
   423				continue;
   424	
   425			if (perf_event_overflow(event, &data, regs))
   426				riscv_iommu_pmu_stop(event, 0);
   427		}
   428	
   429		riscv_iommu_pmu_start_all(pmu);
   430	
   431		return IRQ_HANDLED;
   432	}
   433	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH 1/2] iommu/riscv: add RISC-V IOMMU PMU support
Posted by Robin Murphy 11 months ago
On 2025-01-15 3:03 am, Zong Li wrote:
> Support for the RISC-V IOMMU hardware performance monitor includes
> both counting and sampling modes.
> 
> The specification does not define an event ID for counting the
> number of clock cycles, meaning there is no associated `iohpmevt0`.
> However, we need an event for counting cycle, so we reserve the
> maximum event ID for this purpose.

Why not use an extra config bit to encode cycles events completely 
independently of the regular eventID space? Or even just use 0 since by 
definition that cannot overlap a valid eventID?

> Signed-off-by: Zong Li <zong.li@sifive.com>
> Tested-by: Xu Lu <luxu.kernel@bytedance.com>
> ---
>   drivers/iommu/riscv/Makefile     |   2 +-
>   drivers/iommu/riscv/iommu-bits.h |  16 +
>   drivers/iommu/riscv/iommu-pmu.c  | 486 +++++++++++++++++++++++++++++++
>   drivers/iommu/riscv/iommu.h      |   8 +
>   4 files changed, 511 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/iommu/riscv/iommu-pmu.c
> 
> diff --git a/drivers/iommu/riscv/Makefile b/drivers/iommu/riscv/Makefile
> index f54c9ed17d41..d36625a1fd08 100644
> --- a/drivers/iommu/riscv/Makefile
> +++ b/drivers/iommu/riscv/Makefile
> @@ -1,3 +1,3 @@
>   # SPDX-License-Identifier: GPL-2.0-only
> -obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o
> +obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o iommu-pmu.o
>   obj-$(CONFIG_RISCV_IOMMU_PCI) += iommu-pci.o
> diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
> index 98daf0e1a306..60523449f016 100644
> --- a/drivers/iommu/riscv/iommu-bits.h
> +++ b/drivers/iommu/riscv/iommu-bits.h
> @@ -17,6 +17,7 @@
>   #include <linux/types.h>
>   #include <linux/bitfield.h>
>   #include <linux/bits.h>
> +#include <linux/perf_event.h>
>   
>   /*
>    * Chapter 5: Memory Mapped register interface
> @@ -207,6 +208,7 @@ enum riscv_iommu_ddtp_modes {
>   /* 5.22 Performance monitoring event counters (31 * 64bits) */
>   #define RISCV_IOMMU_REG_IOHPMCTR_BASE	0x0068
>   #define RISCV_IOMMU_REG_IOHPMCTR(_n)	(RISCV_IOMMU_REG_IOHPMCTR_BASE + ((_n) * 0x8))
> +#define RISCV_IOMMU_IOHPMCTR_COUNTER	GENMASK_ULL(63, 0)
>   
>   /* 5.23 Performance monitoring event selectors (31 * 64bits) */
>   #define RISCV_IOMMU_REG_IOHPMEVT_BASE	0x0160
> @@ -250,6 +252,20 @@ enum riscv_iommu_hpmevent_id {
>   	RISCV_IOMMU_HPMEVENT_MAX        = 9
>   };
>   
> +/* Use maximum event ID for cycle event */
> +#define RISCV_IOMMU_HPMEVENT_CYCLE	GENMASK_ULL(14, 0)

It's also horribly confusing to use GENMASK for something which is not 
actually a mask at all.

> +#define RISCV_IOMMU_HPM_COUNTER_NUM	32
> +
> +struct riscv_iommu_pmu {
> +	struct pmu pmu;
> +	void __iomem *reg;
> +	int num_counters;
> +	u64 mask_counter;
> +	struct perf_event *events[RISCV_IOMMU_IOHPMEVT_CNT + 1];
> +	DECLARE_BITMAP(used_counters, RISCV_IOMMU_IOHPMEVT_CNT + 1);

Hmm, from experience I would expect these variables to be 
number-of-counters related, but I guess there must be some special 
cleverness going on since that number-of-counters looking 
RISCV_IOMMU_HPM_COUNTER_NUM definition is conspicuously not used, so it 
must be significant that these are instead related to the number of...

[ goes off to search code... ]

...event selectors? But with a magic +1 for reasons so obvious they 
clearly don't need explaining.

> +};
> +
>   /* 5.24 Translation request IOVA (64bits) */
>   #define RISCV_IOMMU_REG_TR_REQ_IOVA     0x0258
>   #define RISCV_IOMMU_TR_REQ_IOVA_VPN	GENMASK_ULL(63, 12)
> diff --git a/drivers/iommu/riscv/iommu-pmu.c b/drivers/iommu/riscv/iommu-pmu.c
> new file mode 100644
> index 000000000000..74eb1525cd32
> --- /dev/null
> +++ b/drivers/iommu/riscv/iommu-pmu.c
> @@ -0,0 +1,486 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2024 SiFive
> + *
> + * Authors
> + *	Zong Li <zong.li@sifive.com>
> + */
> +
> +#include <linux/io-64-nonatomic-hi-lo.h>
> +
> +#include "iommu.h"
> +#include "iommu-bits.h"
> +
> +#define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
> +
> +#define RISCV_IOMMU_PMU_ATTR_EXTRACTOR(_name, _mask)			\
> +	static inline u32 get_##_name(struct perf_event *event)		\
> +	{								\
> +		return FIELD_GET(_mask, event->attr.config);		\
> +	}								\
> +
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(event, RISCV_IOMMU_IOHPMEVT_EVENTID);
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(partial_matching, RISCV_IOMMU_IOHPMEVT_DMASK);
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(pid_pscid, RISCV_IOMMU_IOHPMEVT_PID_PSCID);
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(did_gscid, RISCV_IOMMU_IOHPMEVT_DID_GSCID);
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_pid_pscid, RISCV_IOMMU_IOHPMEVT_PV_PSCV);
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_did_gscid, RISCV_IOMMU_IOHPMEVT_DV_GSCV);
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_id_type, RISCV_IOMMU_IOHPMEVT_IDT);
> +
> +/* Formats */
> +PMU_FORMAT_ATTR(event, "config:0-14");
> +PMU_FORMAT_ATTR(partial_matching, "config:15");
> +PMU_FORMAT_ATTR(pid_pscid, "config:16-35");
> +PMU_FORMAT_ATTR(did_gscid, "config:36-59");
> +PMU_FORMAT_ATTR(filter_pid_pscid, "config:60");
> +PMU_FORMAT_ATTR(filter_did_gscid, "config:61");
> +PMU_FORMAT_ATTR(filter_id_type, "config:62");
> +
> +static struct attribute *riscv_iommu_pmu_formats[] = {
> +	&format_attr_event.attr,
> +	&format_attr_partial_matching.attr,
> +	&format_attr_pid_pscid.attr,
> +	&format_attr_did_gscid.attr,
> +	&format_attr_filter_pid_pscid.attr,
> +	&format_attr_filter_did_gscid.attr,
> +	&format_attr_filter_id_type.attr,
> +	NULL,
> +};
> +
> +static const struct attribute_group riscv_iommu_pmu_format_group = {
> +	.name = "format",
> +	.attrs = riscv_iommu_pmu_formats,
> +};
> +
> +/* Events */
> +static ssize_t riscv_iommu_pmu_event_show(struct device *dev,
> +					  struct device_attribute *attr,
> +					  char *page)
> +{
> +	struct perf_pmu_events_attr *pmu_attr;
> +
> +	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
> +
> +	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);

sysfs_emit()

> +}
> +
> +PMU_EVENT_ATTR(cycle, event_attr_cycle,
> +	       RISCV_IOMMU_HPMEVENT_CYCLE, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(dont_count, event_attr_dont_count,
> +	       RISCV_IOMMU_HPMEVENT_INVALID, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(untranslated_req, event_attr_untranslated_req,
> +	       RISCV_IOMMU_HPMEVENT_URQ, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(translated_req, event_attr_translated_req,
> +	       RISCV_IOMMU_HPMEVENT_TRQ, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(ats_trans_req, event_attr_ats_trans_req,
> +	       RISCV_IOMMU_HPMEVENT_ATS_RQ, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(tlb_miss, event_attr_tlb_miss,
> +	       RISCV_IOMMU_HPMEVENT_TLB_MISS, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(ddt_walks, event_attr_ddt_walks,
> +	       RISCV_IOMMU_HPMEVENT_DD_WALK, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(pdt_walks, event_attr_pdt_walks,
> +	       RISCV_IOMMU_HPMEVENT_PD_WALK, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(s_vs_pt_walks, event_attr_s_vs_pt_walks,
> +	       RISCV_IOMMU_HPMEVENT_S_VS_WALKS, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(g_pt_walks, event_attr_g_pt_walks,
> +	       RISCV_IOMMU_HPMEVENT_G_WALKS, riscv_iommu_pmu_event_show);
> +
> +static struct attribute *riscv_iommu_pmu_events[] = {
> +	&event_attr_cycle.attr.attr,
> +	&event_attr_dont_count.attr.attr,
> +	&event_attr_untranslated_req.attr.attr,
> +	&event_attr_translated_req.attr.attr,
> +	&event_attr_ats_trans_req.attr.attr,
> +	&event_attr_tlb_miss.attr.attr,
> +	&event_attr_ddt_walks.attr.attr,
> +	&event_attr_pdt_walks.attr.attr,
> +	&event_attr_s_vs_pt_walks.attr.attr,
> +	&event_attr_g_pt_walks.attr.attr,
> +	NULL,
> +};
> +
> +static const struct attribute_group riscv_iommu_pmu_events_group = {
> +	.name = "events",
> +	.attrs = riscv_iommu_pmu_events,
> +};
> +
> +static const struct attribute_group *riscv_iommu_pmu_attr_grps[] = {
> +	&riscv_iommu_pmu_format_group,
> +	&riscv_iommu_pmu_events_group,
> +	NULL,
> +};

You also need a "cpumask" attribute to tell userspace this is a 
system/uncore PMU.

> +
> +/* PMU Operations */
> +static void riscv_iommu_pmu_set_counter(struct riscv_iommu_pmu *pmu, u32 idx,
> +					u64 value)
> +{
> +	void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES;
> +
> +	if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))

One of those conditions is literally impossible, the other should 
already be avoided by construction.

> +		return;
> +
> +	if (idx == 0)
> +		value = (value & ~RISCV_IOMMU_IOHPMCYCLES_OF) |
> +			 (readq(addr) & RISCV_IOMMU_IOHPMCYCLES_OF);

I don't think you need this - as best I can tell, you never initialise a 
counter without also (re)enabling the interrupt (which is logical), so 
since "value" for the cycle counter should always implicitly have OF=0 
anyway, it should work to just write it like the other counters.

> +	writeq(FIELD_PREP(RISCV_IOMMU_IOHPMCTR_COUNTER, value), addr + idx * 8);

Is RISCV_IOMMU_IOHPMCTR_COUNTER honestly useful? Or is it actively 
hurting readability by obfuscating that we are in fact just using the 
full 64-bit value in all those places?

> +}
> +
> +static u64 riscv_iommu_pmu_get_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> +{
> +	void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES;
> +	u64 value;
> +
> +	if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> +		return -EINVAL;
> +
> +	value = readq(addr + idx * 8);

Might be worth leaving a comment just in case anyone does try to enable 
this for 32-bit that the io-64-nonatomic readq() isn't enough to work 
properly here.

> +
> +	if (idx == 0)
> +		return FIELD_GET(RISCV_IOMMU_IOHPMCYCLES_COUNTER, value);
> +
> +	return FIELD_GET(RISCV_IOMMU_IOHPMCTR_COUNTER, value);
> +}
> +
> +static u64 riscv_iommu_pmu_get_event(struct riscv_iommu_pmu *pmu, u32 idx)
> +{
> +	void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE;
> +
> +	if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> +		return 0;
> +
> +	/* There is no associtated IOHPMEVT0 for IOHPMCYCLES */
> +	if (idx == 0)
> +		return 0;
> +
> +	return readq(addr + (idx - 1) * 8);
> +}

That's a wonderfully expensive way to spell "pmu->events[idx]->hw.config"...

> +
> +static void riscv_iommu_pmu_set_event(struct riscv_iommu_pmu *pmu, u32 idx,
> +				      u64 value)
> +{
> +	void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE;
> +
> +	if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> +		return;
> +
> +	/* There is no associtated IOHPMEVT0 for IOHPMCYCLES */
> +	if (idx == 0)
> +		return;
> +
> +	writeq(value, addr + (idx - 1) * 8);
> +}
> +
> +static void riscv_iommu_pmu_enable_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> +{
> +	void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH;
> +	u32 value = readl(addr);
> +
> +	writel(value & ~BIT(idx), addr);
> +}
> +
> +static void riscv_iommu_pmu_disable_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> +{
> +	void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH;
> +	u32 value = readl(addr);
> +
> +	writel(value | BIT(idx), addr);
> +}
> +
> +static void riscv_iommu_pmu_enable_ovf_intr(struct riscv_iommu_pmu *pmu, u32 idx)
> +{
> +	u64 value;
> +
> +	if (get_event(pmu->events[idx]) == RISCV_IOMMU_HPMEVENT_CYCLE) {

And that's a very creative way to spell "if (idx == 0)".

> +		value = riscv_iommu_pmu_get_counter(pmu, idx) & ~RISCV_IOMMU_IOHPMCYCLES_OF;
> +		writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES);

(and as above, I think you can make this a no-op for the cycle counter)

> +	} else {
> +		value = riscv_iommu_pmu_get_event(pmu, idx) & ~RISCV_IOMMU_IOHPMEVT_OF;
> +		writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE + (idx - 1) * 8);
> +	}
> +}
> +
> +static void riscv_iommu_pmu_disable_ovf_intr(struct riscv_iommu_pmu *pmu, u32 idx)
> +{
> +	u64 value;
> +
> +	if (get_event(pmu->events[idx]) == RISCV_IOMMU_HPMEVENT_CYCLE) {
> +		value = riscv_iommu_pmu_get_counter(pmu, idx) | RISCV_IOMMU_IOHPMCYCLES_OF;
> +		writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES);
> +	} else {
> +		value = riscv_iommu_pmu_get_event(pmu, idx) | RISCV_IOMMU_IOHPMEVT_OF;
> +		writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE + (idx - 1) * 8);
> +	}
> +}

TBH I'd be inclined to just leave out all the dead cleanup code if the 
PMU driver is tied to the IOMMU driver and can never realistically be 
removed.

> +static void riscv_iommu_pmu_start_all(struct riscv_iommu_pmu *pmu)
> +{
> +	int idx;
> +
> +	for_each_set_bit(idx, pmu->used_counters, pmu->num_counters) {
> +		riscv_iommu_pmu_enable_ovf_intr(pmu, idx);

Surely this should only touch the counter(s) that overflowed and have 
been handled? It might be cleaner to keep that within the IRQ handler 
itself.

> +		riscv_iommu_pmu_enable_counter(pmu, idx);

This needs to start all active counters together, not one-by-one.

> +	}
> +}
> +
> +static void riscv_iommu_pmu_stop_all(struct riscv_iommu_pmu *pmu)
> +{
> +	writel(GENMASK_ULL(pmu->num_counters - 1, 0),
> +	       pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH);
> +}
> +
> +/* PMU APIs */
> +static int riscv_iommu_pmu_set_period(struct perf_event *event)
> +{
> +	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +	struct hw_perf_event *hwc = &event->hw;
> +	s64 left = local64_read(&hwc->period_left);
> +	s64 period = hwc->sample_period;
> +	u64 max_period = pmu->mask_counter;
> +	int ret = 0;
> +
> +	if (unlikely(left <= -period)) {
> +		left = period;
> +		local64_set(&hwc->period_left, left);
> +		hwc->last_period = period;
> +		ret = 1;
> +	}
> +
> +	if (unlikely(left <= 0)) {
> +		left += period;
> +		local64_set(&hwc->period_left, left);
> +		hwc->last_period = period;
> +		ret = 1;
> +	}

None of this is relevant or necessary. This is not a CPU PMU, so it 
can't support sampling because it doesn't have a meaningful context to 
sample.

> +	/*
> +	 * Limit the maximum period to prevent the counter value
> +	 * from overtaking the one we are about to program. In
> +	 * effect we are reducing max_period to account for
> +	 * interrupt latency (and we are being very conservative).
> +	 */
> +	if (left > (max_period >> 1))
> +		left = (max_period >> 1);
> +
> +	local64_set(&hwc->prev_count, (u64)-left);
> +	riscv_iommu_pmu_set_counter(pmu, hwc->idx, (u64)(-left) & max_period);
> +	perf_event_update_userpage(event);
> +
> +	return ret;
> +}
> +
> +static int riscv_iommu_pmu_event_init(struct perf_event *event)
> +{
> +	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +	struct hw_perf_event *hwc = &event->hw;

You first need to validate that the event is for this PMU at all, and 
return -ENOENT if not.

> +	hwc->idx = -1;
> +	hwc->config = event->attr.config;
> +
> +	if (!is_sampling_event(event)) {

As above, you can't support sampling events anyway, so you should reject 
them with -EINVAL.

You also need to validate event groups to ensure they don't contain more 
events than could ever be scheduled at once.

> +		/*
> +		 * For non-sampling runs, limit the sample_period to half
> +		 * of the counter width. That way, the new counter value
> +		 * is far less likely to overtake the previous one unless
> +		 * you have some serious IRQ latency issues.
> +		 */
> +		hwc->sample_period = pmu->mask_counter >> 1;
> +		hwc->last_period = hwc->sample_period;
> +		local64_set(&hwc->period_left, hwc->sample_period);
> +	}
> +
> +	return 0;
> +}
> +
> +static void riscv_iommu_pmu_update(struct perf_event *event)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +	u64 delta, prev, now;
> +	u32 idx = hwc->idx;
> +
> +	do {
> +		prev = local64_read(&hwc->prev_count);
> +		now = riscv_iommu_pmu_get_counter(pmu, idx);
> +	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
> +
> +	delta = FIELD_GET(RISCV_IOMMU_IOHPMCTR_COUNTER, now - prev) & pmu->mask_counter;
> +	local64_add(delta, &event->count);
> +	local64_sub(delta, &hwc->period_left);
> +}
> +
> +static void riscv_iommu_pmu_start(struct perf_event *event, int flags)
> +{
> +	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +	struct hw_perf_event *hwc = &event->hw;
> +
> +	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
> +		return;
> +
> +	if (flags & PERF_EF_RELOAD)
> +		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
> +
> +	hwc->state = 0;
> +	riscv_iommu_pmu_set_period(event);
> +	riscv_iommu_pmu_set_event(pmu, hwc->idx, hwc->config);
> +	riscv_iommu_pmu_enable_ovf_intr(pmu, hwc->idx);

This does nothing, since set_event has just implicitly written OF=0.

...unless, that is, the user was mischievous and also set bit 63 in 
event->attr.config, since you never sanitised the input ;)

> +	riscv_iommu_pmu_enable_counter(pmu, hwc->idx);
> +
> +	perf_event_update_userpage(event);
> +}
> +
> +static void riscv_iommu_pmu_stop(struct perf_event *event, int flags)
> +{
> +	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +	struct hw_perf_event *hwc = &event->hw;
> +
> +	if (hwc->state & PERF_HES_STOPPED)
> +		return;
> +
> +	riscv_iommu_pmu_set_event(pmu, hwc->idx, RISCV_IOMMU_HPMEVENT_INVALID);
> +	riscv_iommu_pmu_disable_counter(pmu, hwc->idx);
> +
> +	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE))
> +		riscv_iommu_pmu_update(event);
> +
> +	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
> +}
> +
> +static int riscv_iommu_pmu_add(struct perf_event *event, int flags)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +	unsigned int num_counters = pmu->num_counters;
> +	int idx;
> +
> +	/* Reserve index zero for iohpmcycles */
> +	if (get_event(event) == RISCV_IOMMU_HPMEVENT_CYCLE)
> +		idx = 0;
> +	else
> +		idx = find_next_zero_bit(pmu->used_counters, num_counters, 1);
> +
> +	if (idx == num_counters)
> +		return -EAGAIN;

But stomping a cycles event on top of an existing cycles event is fine?

> +	set_bit(idx, pmu->used_counters);
> +
> +	pmu->events[idx] = event;
> +	hwc->idx = idx;
> +	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
> +
> +	if (flags & PERF_EF_START)
> +		riscv_iommu_pmu_start(event, flags);
> +
> +	/* Propagate changes to the userspace mapping. */
> +	perf_event_update_userpage(event);
> +
> +	return 0;
> +}
> +
> +static void riscv_iommu_pmu_read(struct perf_event *event)
> +{
> +	riscv_iommu_pmu_update(event);
> +}
> +
> +static void riscv_iommu_pmu_del(struct perf_event *event, int flags)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +	struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +	int idx = hwc->idx;
> +
> +	riscv_iommu_pmu_stop(event, PERF_EF_UPDATE);
> +	pmu->events[idx] = NULL;
> +	clear_bit(idx, pmu->used_counters);
> +	perf_event_update_userpage(event);
> +}
> +
> +irqreturn_t riscv_iommu_pmu_handle_irq(struct riscv_iommu_pmu *pmu)
> +{
> +	struct perf_sample_data data;
> +	struct pt_regs *regs;
> +	u32 ovf = readl(pmu->reg + RISCV_IOMMU_REG_IOCOUNTOVF);
> +	int idx;
> +
> +	if (!ovf)
> +		return IRQ_NONE;
> +
> +	riscv_iommu_pmu_stop_all(pmu);
> +
> +	regs = get_irq_regs();

What do these regs represent? If you look at the perf_event_open ABI, 
you'll see that events can target various combinations of CPU and/or 
pid, but there is no encoding for "whichever CPU takes the IOMMU PMU 
interrupt". Thus whatever you get here is more than likely not what the 
user asked for, and this is why non-CPU PMUs cannot reasonably support 
sampling ;)

> +
> +	for_each_set_bit(idx, (unsigned long *)&ovf, pmu->num_counters) {
> +		struct perf_event *event = pmu->events[idx];
> +		struct hw_perf_event *hwc;
> +
> +		if (WARN_ON_ONCE(!event) || !is_sampling_event(event))

You still need to handle counter rollover for all events, otherwise they 
can start losing counts and rapidly turn to nonsense. Admittedly it's 
largely theoretical with full 64-bit counters, but still...

> +			continue;
> +
> +		hwc = &event->hw;
> +
> +		riscv_iommu_pmu_update(event);
> +		perf_sample_data_init(&data, 0, hwc->last_period);
> +		if (!riscv_iommu_pmu_set_period(event))
> +			continue;
> +
> +		if (perf_event_overflow(event, &data, regs))
> +			riscv_iommu_pmu_stop(event, 0);
> +	}
> +
> +	riscv_iommu_pmu_start_all(pmu);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +int riscv_iommu_pmu_init(struct riscv_iommu_pmu *pmu, void __iomem *reg,
> +			 const char *dev_name)
> +{
> +	char *name;
> +	int ret;
> +
> +	pmu->reg = reg;
> +	pmu->num_counters = RISCV_IOMMU_HPM_COUNTER_NUM;
> +	pmu->mask_counter = RISCV_IOMMU_IOHPMCTR_COUNTER;

Initially this looks weird - why bother storing constants in memory if 
they're constant? - but I see the spec implies they are not necessarily 
fixed, and we can only actually assume at least one counter at least 32 
bits wide, so I guess this is really more of a placeholder? Is there a 
well-defined way we're supposed to be able to discover these, like some 
more ID register fields somewhere, or writing all 1s to various 
registers to see what sticks, or is it liable to be a mess of just 
having to know what each implementation has by matching DT compatibles 
and/or PCI IDs?

> +
> +	pmu->pmu = (struct pmu) {
> +		.task_ctx_nr	= perf_invalid_context,
> +		.event_init	= riscv_iommu_pmu_event_init,
> +		.add		= riscv_iommu_pmu_add,
> +		.del		= riscv_iommu_pmu_del,
> +		.start		= riscv_iommu_pmu_start,
> +		.stop		= riscv_iommu_pmu_stop,
> +		.read		= riscv_iommu_pmu_read,
> +		.attr_groups	= riscv_iommu_pmu_attr_grps,
> +		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
> +		.module		= THIS_MODULE,
> +	};

The new thing is that you can now set pmu.parent to the IOMMU device so 
their relationship is clear in sysfs, rather than having to play tricks 
with the PMU name.

> +
> +	name = kasprintf(GFP_KERNEL, "riscv_iommu_pmu_%s", dev_name);
> +
> +	ret = perf_pmu_register(&pmu->pmu, name, -1);
> +	if (ret) {
> +		pr_err("Failed to register riscv_iommu_pmu_%s: %d\n",
> +		       dev_name, ret);
> +		return ret;
> +	}
> +
> +	/* Stop all counters and later start the counter with perf */
> +	riscv_iommu_pmu_stop_all(pmu);
> +
> +	pr_info("riscv_iommu_pmu_%s: Registered with %d counters\n",
> +		dev_name, pmu->num_counters);
> +
> +	return 0;
> +}
> +
> +void riscv_iommu_pmu_uninit(struct riscv_iommu_pmu *pmu)
> +{
> +	int idx;
> +
> +	/* Disable interrupt and functions */
> +	for_each_set_bit(idx, pmu->used_counters, pmu->num_counters) {

This will never do anything, since even if we could ever get here, it 
would not be at a time when there are any active events. Unregistering 
an in-use PMU does not end well (hence why standalone PMU drivers need 
to use suppress_bind_attrs)...

Thanks,
Robin.

> +		riscv_iommu_pmu_disable_counter(pmu, idx);
> +		riscv_iommu_pmu_disable_ovf_intr(pmu, idx);
> +	}
> +
> +	perf_pmu_unregister(&pmu->pmu);
> +}
> diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
> index b1c4664542b4..92659a8a75ae 100644
> --- a/drivers/iommu/riscv/iommu.h
> +++ b/drivers/iommu/riscv/iommu.h
> @@ -60,11 +60,19 @@ struct riscv_iommu_device {
>   	unsigned int ddt_mode;
>   	dma_addr_t ddt_phys;
>   	u64 *ddt_root;
> +
> +	/* hardware performance monitor */
> +	struct riscv_iommu_pmu pmu;
>   };
>   
>   int riscv_iommu_init(struct riscv_iommu_device *iommu);
>   void riscv_iommu_remove(struct riscv_iommu_device *iommu);
>   
> +int riscv_iommu_pmu_init(struct riscv_iommu_pmu *pmu, void __iomem *reg,
> +			 const char *name);
> +void riscv_iommu_pmu_uninit(struct riscv_iommu_pmu *pmu);
> +irqreturn_t riscv_iommu_pmu_handle_irq(struct riscv_iommu_pmu *pmu);
> +
>   #define riscv_iommu_readl(iommu, addr) \
>   	readl_relaxed((iommu)->reg + (addr))
>
Re: [PATCH 1/2] iommu/riscv: add RISC-V IOMMU PMU support
Posted by Zong Li 10 months, 3 weeks ago
Hi Robin,
Thanks for your review, sorry for the late response, there was a lot
of information mentioned, and I took some time to digest it.

On Thu, Jan 16, 2025 at 5:32 AM Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 2025-01-15 3:03 am, Zong Li wrote:
> > Support for the RISC-V IOMMU hardware performance monitor includes
> > both counting and sampling modes.
> >
> > The specification does not define an event ID for counting the
> > number of clock cycles, meaning there is no associated `iohpmevt0`.
> > However, we need an event for counting cycle, so we reserve the
> > maximum event ID for this purpose.
>
> Why not use an extra config bit to encode cycles events completely
> independently of the regular eventID space? Or even just use 0 since by
> definition that cannot overlap a valid eventID?

Event ID 0 means "Don't count", It might be a bit confusing when
compared to the spec,
so I guess we could use another bit for the cycle event, such as bit 15.

>
> > Signed-off-by: Zong Li <zong.li@sifive.com>
> > Tested-by: Xu Lu <luxu.kernel@bytedance.com>
> > ---
> >   drivers/iommu/riscv/Makefile     |   2 +-
> >   drivers/iommu/riscv/iommu-bits.h |  16 +
> >   drivers/iommu/riscv/iommu-pmu.c  | 486 +++++++++++++++++++++++++++++++
> >   drivers/iommu/riscv/iommu.h      |   8 +
> >   4 files changed, 511 insertions(+), 1 deletion(-)
> >   create mode 100644 drivers/iommu/riscv/iommu-pmu.c
> >
> > diff --git a/drivers/iommu/riscv/Makefile b/drivers/iommu/riscv/Makefile
> > index f54c9ed17d41..d36625a1fd08 100644
> > --- a/drivers/iommu/riscv/Makefile
> > +++ b/drivers/iommu/riscv/Makefile
> > @@ -1,3 +1,3 @@
> >   # SPDX-License-Identifier: GPL-2.0-only
> > -obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o
> > +obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o iommu-pmu.o
> >   obj-$(CONFIG_RISCV_IOMMU_PCI) += iommu-pci.o
> > diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
> > index 98daf0e1a306..60523449f016 100644
> > --- a/drivers/iommu/riscv/iommu-bits.h
> > +++ b/drivers/iommu/riscv/iommu-bits.h
> > @@ -17,6 +17,7 @@
> >   #include <linux/types.h>
> >   #include <linux/bitfield.h>
> >   #include <linux/bits.h>
> > +#include <linux/perf_event.h>
> >
> >   /*
> >    * Chapter 5: Memory Mapped register interface
> > @@ -207,6 +208,7 @@ enum riscv_iommu_ddtp_modes {
> >   /* 5.22 Performance monitoring event counters (31 * 64bits) */
> >   #define RISCV_IOMMU_REG_IOHPMCTR_BASE       0x0068
> >   #define RISCV_IOMMU_REG_IOHPMCTR(_n)        (RISCV_IOMMU_REG_IOHPMCTR_BASE + ((_n) * 0x8))
> > +#define RISCV_IOMMU_IOHPMCTR_COUNTER GENMASK_ULL(63, 0)
> >
> >   /* 5.23 Performance monitoring event selectors (31 * 64bits) */
> >   #define RISCV_IOMMU_REG_IOHPMEVT_BASE       0x0160
> > @@ -250,6 +252,20 @@ enum riscv_iommu_hpmevent_id {
> >       RISCV_IOMMU_HPMEVENT_MAX        = 9
> >   };
> >
> > +/* Use maximum event ID for cycle event */
> > +#define RISCV_IOMMU_HPMEVENT_CYCLE   GENMASK_ULL(14, 0)
>
> It's also horribly confusing to use GENMASK for something which is not
> actually a mask at all.

Let's define a constant value for it, or maybe use BIT(15) as I mentioned above.

>
> > +#define RISCV_IOMMU_HPM_COUNTER_NUM  32
> > +
> > +struct riscv_iommu_pmu {
> > +     struct pmu pmu;
> > +     void __iomem *reg;
> > +     int num_counters;
> > +     u64 mask_counter;
> > +     struct perf_event *events[RISCV_IOMMU_IOHPMEVT_CNT + 1];
> > +     DECLARE_BITMAP(used_counters, RISCV_IOMMU_IOHPMEVT_CNT + 1);
>
> Hmm, from experience I would expect these variables to be
> number-of-counters related, but I guess there must be some special
> cleverness going on since that number-of-counters looking
> RISCV_IOMMU_HPM_COUNTER_NUM definition is conspicuously not used, so it
> must be significant that these are instead related to the number of...
>
> [ goes off to search code... ]
>
> ...event selectors? But with a magic +1 for reasons so obvious they
> clearly don't need explaining.

Yes, I think RISCV_IOMMU_HPM_COUNTER_NUM is better. I can't remember
clearly why I preferred to use RISCV_IOMMU_IOHPMEVT_CNT + 1 before.
Maybe I'd like to emphasize the number of counter is N event counter +
1 cycle counter.

>
> > +};
> > +
> >   /* 5.24 Translation request IOVA (64bits) */
> >   #define RISCV_IOMMU_REG_TR_REQ_IOVA     0x0258
> >   #define RISCV_IOMMU_TR_REQ_IOVA_VPN GENMASK_ULL(63, 12)
> > diff --git a/drivers/iommu/riscv/iommu-pmu.c b/drivers/iommu/riscv/iommu-pmu.c
> > new file mode 100644
> > index 000000000000..74eb1525cd32
> > --- /dev/null
> > +++ b/drivers/iommu/riscv/iommu-pmu.c
> > @@ -0,0 +1,486 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2024 SiFive
> > + *
> > + * Authors
> > + *   Zong Li <zong.li@sifive.com>
> > + */
> > +
> > +#include <linux/io-64-nonatomic-hi-lo.h>
> > +
> > +#include "iommu.h"
> > +#include "iommu-bits.h"
> > +
> > +#define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
> > +
> > +#define RISCV_IOMMU_PMU_ATTR_EXTRACTOR(_name, _mask)                 \
> > +     static inline u32 get_##_name(struct perf_event *event)         \
> > +     {                                                               \
> > +             return FIELD_GET(_mask, event->attr.config);            \
> > +     }                                                               \
> > +
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(event, RISCV_IOMMU_IOHPMEVT_EVENTID);
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(partial_matching, RISCV_IOMMU_IOHPMEVT_DMASK);
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(pid_pscid, RISCV_IOMMU_IOHPMEVT_PID_PSCID);
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(did_gscid, RISCV_IOMMU_IOHPMEVT_DID_GSCID);
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_pid_pscid, RISCV_IOMMU_IOHPMEVT_PV_PSCV);
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_did_gscid, RISCV_IOMMU_IOHPMEVT_DV_GSCV);
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_id_type, RISCV_IOMMU_IOHPMEVT_IDT);
> > +
> > +/* Formats */
> > +PMU_FORMAT_ATTR(event, "config:0-14");
> > +PMU_FORMAT_ATTR(partial_matching, "config:15");
> > +PMU_FORMAT_ATTR(pid_pscid, "config:16-35");
> > +PMU_FORMAT_ATTR(did_gscid, "config:36-59");
> > +PMU_FORMAT_ATTR(filter_pid_pscid, "config:60");
> > +PMU_FORMAT_ATTR(filter_did_gscid, "config:61");
> > +PMU_FORMAT_ATTR(filter_id_type, "config:62");
> > +
> > +static struct attribute *riscv_iommu_pmu_formats[] = {
> > +     &format_attr_event.attr,
> > +     &format_attr_partial_matching.attr,
> > +     &format_attr_pid_pscid.attr,
> > +     &format_attr_did_gscid.attr,
> > +     &format_attr_filter_pid_pscid.attr,
> > +     &format_attr_filter_did_gscid.attr,
> > +     &format_attr_filter_id_type.attr,
> > +     NULL,
> > +};
> > +
> > +static const struct attribute_group riscv_iommu_pmu_format_group = {
> > +     .name = "format",
> > +     .attrs = riscv_iommu_pmu_formats,
> > +};
> > +
> > +/* Events */
> > +static ssize_t riscv_iommu_pmu_event_show(struct device *dev,
> > +                                       struct device_attribute *attr,
> > +                                       char *page)
> > +{
> > +     struct perf_pmu_events_attr *pmu_attr;
> > +
> > +     pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
> > +
> > +     return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
>
> sysfs_emit()
>

Thanks for pointig it out.

> > +}
> > +
> > +PMU_EVENT_ATTR(cycle, event_attr_cycle,
> > +            RISCV_IOMMU_HPMEVENT_CYCLE, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(dont_count, event_attr_dont_count,
> > +            RISCV_IOMMU_HPMEVENT_INVALID, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(untranslated_req, event_attr_untranslated_req,
> > +            RISCV_IOMMU_HPMEVENT_URQ, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(translated_req, event_attr_translated_req,
> > +            RISCV_IOMMU_HPMEVENT_TRQ, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(ats_trans_req, event_attr_ats_trans_req,
> > +            RISCV_IOMMU_HPMEVENT_ATS_RQ, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(tlb_miss, event_attr_tlb_miss,
> > +            RISCV_IOMMU_HPMEVENT_TLB_MISS, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(ddt_walks, event_attr_ddt_walks,
> > +            RISCV_IOMMU_HPMEVENT_DD_WALK, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(pdt_walks, event_attr_pdt_walks,
> > +            RISCV_IOMMU_HPMEVENT_PD_WALK, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(s_vs_pt_walks, event_attr_s_vs_pt_walks,
> > +            RISCV_IOMMU_HPMEVENT_S_VS_WALKS, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(g_pt_walks, event_attr_g_pt_walks,
> > +            RISCV_IOMMU_HPMEVENT_G_WALKS, riscv_iommu_pmu_event_show);
> > +
> > +static struct attribute *riscv_iommu_pmu_events[] = {
> > +     &event_attr_cycle.attr.attr,
> > +     &event_attr_dont_count.attr.attr,
> > +     &event_attr_untranslated_req.attr.attr,
> > +     &event_attr_translated_req.attr.attr,
> > +     &event_attr_ats_trans_req.attr.attr,
> > +     &event_attr_tlb_miss.attr.attr,
> > +     &event_attr_ddt_walks.attr.attr,
> > +     &event_attr_pdt_walks.attr.attr,
> > +     &event_attr_s_vs_pt_walks.attr.attr,
> > +     &event_attr_g_pt_walks.attr.attr,
> > +     NULL,
> > +};
> > +
> > +static const struct attribute_group riscv_iommu_pmu_events_group = {
> > +     .name = "events",
> > +     .attrs = riscv_iommu_pmu_events,
> > +};
> > +
> > +static const struct attribute_group *riscv_iommu_pmu_attr_grps[] = {
> > +     &riscv_iommu_pmu_format_group,
> > +     &riscv_iommu_pmu_events_group,
> > +     NULL,
> > +};
>
> You also need a "cpumask" attribute to tell userspace this is a
> system/uncore PMU.

Thanks.

>
> > +
> > +/* PMU Operations */
> > +static void riscv_iommu_pmu_set_counter(struct riscv_iommu_pmu *pmu, u32 idx,
> > +                                     u64 value)
> > +{
> > +     void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES;
> > +
> > +     if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
>
> One of those conditions is literally impossible, the other should
> already be avoided by construction.
>

Let me remove this condition check.

> > +             return;
> > +
> > +     if (idx == 0)
> > +             value = (value & ~RISCV_IOMMU_IOHPMCYCLES_OF) |
> > +                      (readq(addr) & RISCV_IOMMU_IOHPMCYCLES_OF);
>
> I don't think you need this - as best I can tell, you never initialise a
> counter without also (re)enabling the interrupt (which is logical), so
> since "value" for the cycle counter should always implicitly have OF=0
> anyway, it should work to just write it like the other counters.
>
> > +     writeq(FIELD_PREP(RISCV_IOMMU_IOHPMCTR_COUNTER, value), addr + idx * 8);
>
> Is RISCV_IOMMU_IOHPMCTR_COUNTER honestly useful? Or is it actively
> hurting readability by obfuscating that we are in fact just using the
> full 64-bit value in all those places?

It should be riscv_iommu_pmu->mask_counter, as the valid width is
hardware-implemented.
Although we can still write a 64-bit value here since the register is
WARL, I think it would be more readable
if we mask the valid bit here. This way, we can be aware that the
valid width might not be the full 64 bits

>
> > +}
> > +
> > +static u64 riscv_iommu_pmu_get_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> > +{
> > +     void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES;
> > +     u64 value;
> > +
> > +     if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> > +             return -EINVAL;
> > +
> > +     value = readq(addr + idx * 8);
>
> Might be worth leaving a comment just in case anyone does try to enable
> this for 32-bit that the io-64-nonatomic readq() isn't enough to work
> properly here.

I guess you are referring to a 32-bit width counter, is that correct?
Because It should be fine on a 32-bit system
since readq will be defined as hi_lo_readq.
For a 32-bit width counter, the return value will be masked with the
valid width, so I think it should work as expected
 Does that make sense?

>
> > +
> > +     if (idx == 0)
> > +             return FIELD_GET(RISCV_IOMMU_IOHPMCYCLES_COUNTER, value);
> > +
> > +     return FIELD_GET(RISCV_IOMMU_IOHPMCTR_COUNTER, value);
> > +}
> > +
> > +static u64 riscv_iommu_pmu_get_event(struct riscv_iommu_pmu *pmu, u32 idx)
> > +{
> > +     void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE;
> > +
> > +     if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> > +             return 0;
> > +
> > +     /* There is no associtated IOHPMEVT0 for IOHPMCYCLES */
> > +     if (idx == 0)
> > +             return 0;
> > +
> > +     return readq(addr + (idx - 1) * 8);
> > +}
>
> That's a wonderfully expensive way to spell "pmu->events[idx]->hw.config"...
>

Yes, I think we can use what we store in memory
(pmu->event[idx]->hw.config). The contents are consistent.

> > +
> > +static void riscv_iommu_pmu_set_event(struct riscv_iommu_pmu *pmu, u32 idx,
> > +                                   u64 value)
> > +{
> > +     void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE;
> > +
> > +     if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> > +             return;
> > +
> > +     /* There is no associtated IOHPMEVT0 for IOHPMCYCLES */
> > +     if (idx == 0)
> > +             return;
> > +
> > +     writeq(value, addr + (idx - 1) * 8);
> > +}
> > +
> > +static void riscv_iommu_pmu_enable_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> > +{
> > +     void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH;
> > +     u32 value = readl(addr);
> > +
> > +     writel(value & ~BIT(idx), addr);
> > +}
> > +
> > +static void riscv_iommu_pmu_disable_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> > +{
> > +     void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH;
> > +     u32 value = readl(addr);
> > +
> > +     writel(value | BIT(idx), addr);
> > +}
> > +
> > +static void riscv_iommu_pmu_enable_ovf_intr(struct riscv_iommu_pmu *pmu, u32 idx)
> > +{
> > +     u64 value;
> > +
> > +     if (get_event(pmu->events[idx]) == RISCV_IOMMU_HPMEVENT_CYCLE) {
>
> And that's a very creative way to spell "if (idx == 0)".

Let's use value of idx directly.

>
> > +             value = riscv_iommu_pmu_get_counter(pmu, idx) & ~RISCV_IOMMU_IOHPMCYCLES_OF;
> > +             writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES);
>
> (and as above, I think you can make this a no-op for the cycle counter)

Let's remove it.

>
> > +     } else {
> > +             value = riscv_iommu_pmu_get_event(pmu, idx) & ~RISCV_IOMMU_IOHPMEVT_OF;
> > +             writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE + (idx - 1) * 8);
> > +     }
> > +}
> > +
> > +static void riscv_iommu_pmu_disable_ovf_intr(struct riscv_iommu_pmu *pmu, u32 idx)
> > +{
> > +     u64 value;
> > +
> > +     if (get_event(pmu->events[idx]) == RISCV_IOMMU_HPMEVENT_CYCLE) {
> > +             value = riscv_iommu_pmu_get_counter(pmu, idx) | RISCV_IOMMU_IOHPMCYCLES_OF;
> > +             writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES);
> > +     } else {
> > +             value = riscv_iommu_pmu_get_event(pmu, idx) | RISCV_IOMMU_IOHPMEVT_OF;
> > +             writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE + (idx - 1) * 8);
> > +     }
> > +}
>
> TBH I'd be inclined to just leave out all the dead cleanup code if the
> PMU driver is tied to the IOMMU driver and can never realistically be
> removed.

Yes, but riscv_iommu_remove has been implemented. Does it still make
sense to clean up the PMU when certain cases enter riscv_iommu_remove?

>
> > +static void riscv_iommu_pmu_start_all(struct riscv_iommu_pmu *pmu)
> > +{
> > +     int idx;
> > +
> > +     for_each_set_bit(idx, pmu->used_counters, pmu->num_counters) {
> > +             riscv_iommu_pmu_enable_ovf_intr(pmu, idx);
>
> Surely this should only touch the counter(s) that overflowed and have
> been handled? It might be cleaner to keep that within the IRQ handler
> itself.
>

Yes, let me move it into the IRQ handler and handle only the counters
that have overflowed.

> > +             riscv_iommu_pmu_enable_counter(pmu, idx);
>
> This needs to start all active counters together, not one-by-one.
>

Ok, I think we need a new wrapper to achieve it.

> > +     }
> > +}
> > +
> > +static void riscv_iommu_pmu_stop_all(struct riscv_iommu_pmu *pmu)
> > +{
> > +     writel(GENMASK_ULL(pmu->num_counters - 1, 0),
> > +            pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH);
> > +}
> > +
> > +/* PMU APIs */
> > +static int riscv_iommu_pmu_set_period(struct perf_event *event)
> > +{
> > +     struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +     struct hw_perf_event *hwc = &event->hw;
> > +     s64 left = local64_read(&hwc->period_left);
> > +     s64 period = hwc->sample_period;
> > +     u64 max_period = pmu->mask_counter;
> > +     int ret = 0;
> > +
> > +     if (unlikely(left <= -period)) {
> > +             left = period;
> > +             local64_set(&hwc->period_left, left);
> > +             hwc->last_period = period;
> > +             ret = 1;
> > +     }
> > +
> > +     if (unlikely(left <= 0)) {
> > +             left += period;
> > +             local64_set(&hwc->period_left, left);
> > +             hwc->last_period = period;
> > +             ret = 1;
> > +     }
>
> None of this is relevant or necessary. This is not a CPU PMU, so it
> can't support sampling because it doesn't have a meaningful context to
> sample.
>
> > +     /*
> > +      * Limit the maximum period to prevent the counter value
> > +      * from overtaking the one we are about to program. In
> > +      * effect we are reducing max_period to account for
> > +      * interrupt latency (and we are being very conservative).
> > +      */
> > +     if (left > (max_period >> 1))
> > +             left = (max_period >> 1);
> > +
> > +     local64_set(&hwc->prev_count, (u64)-left);
> > +     riscv_iommu_pmu_set_counter(pmu, hwc->idx, (u64)(-left) & max_period);
> > +     perf_event_update_userpage(event);
> > +
> > +     return ret;
> > +}
> > +
> > +static int riscv_iommu_pmu_event_init(struct perf_event *event)
> > +{
> > +     struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +     struct hw_perf_event *hwc = &event->hw;
>
> You first need to validate that the event is for this PMU at all, and
> return -ENOENT if not.

Thanks for pointing it out. I will add event check here.

>
> > +     hwc->idx = -1;
> > +     hwc->config = event->attr.config;
> > +
> > +     if (!is_sampling_event(event)) {
>
> As above, you can't support sampling events anyway, so you should reject
> them with -EINVAL.
>
> You also need to validate event groups to ensure they don't contain more
> events than could ever be scheduled at once.

Check event group in the next version.

>
> > +             /*
> > +              * For non-sampling runs, limit the sample_period to half
> > +              * of the counter width. That way, the new counter value
> > +              * is far less likely to overtake the previous one unless
> > +              * you have some serious IRQ latency issues.
> > +              */
> > +             hwc->sample_period = pmu->mask_counter >> 1;
> > +             hwc->last_period = hwc->sample_period;
> > +             local64_set(&hwc->period_left, hwc->sample_period);
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static void riscv_iommu_pmu_update(struct perf_event *event)
> > +{
> > +     struct hw_perf_event *hwc = &event->hw;
> > +     struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +     u64 delta, prev, now;
> > +     u32 idx = hwc->idx;
> > +
> > +     do {
> > +             prev = local64_read(&hwc->prev_count);
> > +             now = riscv_iommu_pmu_get_counter(pmu, idx);
> > +     } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
> > +
> > +     delta = FIELD_GET(RISCV_IOMMU_IOHPMCTR_COUNTER, now - prev) & pmu->mask_counter;
> > +     local64_add(delta, &event->count);
> > +     local64_sub(delta, &hwc->period_left);
> > +}
> > +
> > +static void riscv_iommu_pmu_start(struct perf_event *event, int flags)
> > +{
> > +     struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +     struct hw_perf_event *hwc = &event->hw;
> > +
> > +     if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
> > +             return;
> > +
> > +     if (flags & PERF_EF_RELOAD)
> > +             WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
> > +
> > +     hwc->state = 0;
> > +     riscv_iommu_pmu_set_period(event);
> > +     riscv_iommu_pmu_set_event(pmu, hwc->idx, hwc->config);
> > +     riscv_iommu_pmu_enable_ovf_intr(pmu, hwc->idx);
>
> This does nothing, since set_event has just implicitly written OF=0.
>
> ...unless, that is, the user was mischievous and also set bit 63 in
> event->attr.config, since you never sanitised the input ;)

Ok, and I think we need to mask the input with valid width.

>
> > +     riscv_iommu_pmu_enable_counter(pmu, hwc->idx);
> > +
> > +     perf_event_update_userpage(event);
> > +}
> > +
> > +static void riscv_iommu_pmu_stop(struct perf_event *event, int flags)
> > +{
> > +     struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +     struct hw_perf_event *hwc = &event->hw;
> > +
> > +     if (hwc->state & PERF_HES_STOPPED)
> > +             return;
> > +
> > +     riscv_iommu_pmu_set_event(pmu, hwc->idx, RISCV_IOMMU_HPMEVENT_INVALID);
> > +     riscv_iommu_pmu_disable_counter(pmu, hwc->idx);
> > +
> > +     if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE))
> > +             riscv_iommu_pmu_update(event);
> > +
> > +     hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
> > +}
> > +
> > +static int riscv_iommu_pmu_add(struct perf_event *event, int flags)
> > +{
> > +     struct hw_perf_event *hwc = &event->hw;
> > +     struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +     unsigned int num_counters = pmu->num_counters;
> > +     int idx;
> > +
> > +     /* Reserve index zero for iohpmcycles */
> > +     if (get_event(event) == RISCV_IOMMU_HPMEVENT_CYCLE)
> > +             idx = 0;
> > +     else
> > +             idx = find_next_zero_bit(pmu->used_counters, num_counters, 1);
> > +
> > +     if (idx == num_counters)
> > +             return -EAGAIN;
>
> But stomping a cycles event on top of an existing cycles event is fine?

Yes, we need a way to check this situation and return failure.

>
> > +     set_bit(idx, pmu->used_counters);
> > +
> > +     pmu->events[idx] = event;
> > +     hwc->idx = idx;
> > +     hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
> > +
> > +     if (flags & PERF_EF_START)
> > +             riscv_iommu_pmu_start(event, flags);
> > +
> > +     /* Propagate changes to the userspace mapping. */
> > +     perf_event_update_userpage(event);
> > +
> > +     return 0;
> > +}
> > +
> > +static void riscv_iommu_pmu_read(struct perf_event *event)
> > +{
> > +     riscv_iommu_pmu_update(event);
> > +}
> > +
> > +static void riscv_iommu_pmu_del(struct perf_event *event, int flags)
> > +{
> > +     struct hw_perf_event *hwc = &event->hw;
> > +     struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +     int idx = hwc->idx;
> > +
> > +     riscv_iommu_pmu_stop(event, PERF_EF_UPDATE);
> > +     pmu->events[idx] = NULL;
> > +     clear_bit(idx, pmu->used_counters);
> > +     perf_event_update_userpage(event);
> > +}
> > +
> > +irqreturn_t riscv_iommu_pmu_handle_irq(struct riscv_iommu_pmu *pmu)
> > +{
> > +     struct perf_sample_data data;
> > +     struct pt_regs *regs;
> > +     u32 ovf = readl(pmu->reg + RISCV_IOMMU_REG_IOCOUNTOVF);
> > +     int idx;
> > +
> > +     if (!ovf)
> > +             return IRQ_NONE;
> > +
> > +     riscv_iommu_pmu_stop_all(pmu);
> > +
> > +     regs = get_irq_regs();
>
> What do these regs represent? If you look at the perf_event_open ABI,
> you'll see that events can target various combinations of CPU and/or
> pid, but there is no encoding for "whichever CPU takes the IOMMU PMU
> interrupt". Thus whatever you get here is more than likely not what the
> user asked for, and this is why non-CPU PMUs cannot reasonably support
> sampling ;)
>

There are some comments related to removing sampling, and I can't
completely understand them.
Please correct me if I am misunderstanding.
If the user doesn't specify a target CPU, does this situation make
sense for non-CPU PMUs? If so,
should the user be aware that they are using non-CPU PMUs and should
use them by system-wide?
Otherwise, the behavior would be unexpected.
Could you please help me understand it better? Thank you

> > +
> > +     for_each_set_bit(idx, (unsigned long *)&ovf, pmu->num_counters) {
> > +             struct perf_event *event = pmu->events[idx];
> > +             struct hw_perf_event *hwc;
> > +
> > +             if (WARN_ON_ONCE(!event) || !is_sampling_event(event))
>
> You still need to handle counter rollover for all events, otherwise they
> can start losing counts and rapidly turn to nonsense. Admittedly it's
> largely theoretical with full 64-bit counters, but still...

Ok. Let's fix it in the next version.

>
> > +                     continue;
> > +
> > +             hwc = &event->hw;
> > +
> > +             riscv_iommu_pmu_update(event);
> > +             perf_sample_data_init(&data, 0, hwc->last_period);
> > +             if (!riscv_iommu_pmu_set_period(event))
> > +                     continue;
> > +
> > +             if (perf_event_overflow(event, &data, regs))
> > +                     riscv_iommu_pmu_stop(event, 0);
> > +     }
> > +
> > +     riscv_iommu_pmu_start_all(pmu);
> > +
> > +     return IRQ_HANDLED;
> > +}
> > +
> > +int riscv_iommu_pmu_init(struct riscv_iommu_pmu *pmu, void __iomem *reg,
> > +                      const char *dev_name)
> > +{
> > +     char *name;
> > +     int ret;
> > +
> > +     pmu->reg = reg;
> > +     pmu->num_counters = RISCV_IOMMU_HPM_COUNTER_NUM;
> > +     pmu->mask_counter = RISCV_IOMMU_IOHPMCTR_COUNTER;
>
> Initially this looks weird - why bother storing constants in memory if
> they're constant? - but I see the spec implies they are not necessarily
> fixed, and we can only actually assume at least one counter at least 32
> bits wide, so I guess this is really more of a placeholder? Is there a
> well-defined way we're supposed to be able to discover these, like some
> more ID register fields somewhere, or writing all 1s to various
> registers to see what sticks, or is it liable to be a mess of just
> having to know what each implementation has by matching DT compatibles
> and/or PCI IDs?

As you mentioned, the idea is that the valid width of the counter and
the number of counters are hardware-implemented.
My original plan was to submit another series to add vendor-specific
implementations, using matching data to indicate these two values.
However, based on your suggestion, I think we only need to specify
num_counter, as mask_counter can be detected by writing all 1s.

In the next version, I could check whether pmu->num_counters is 0
before setting num_counter to default value, then it looks more
reasonable. Does that make sense to you?

>
> > +
> > +     pmu->pmu = (struct pmu) {
> > +             .task_ctx_nr    = perf_invalid_context,
> > +             .event_init     = riscv_iommu_pmu_event_init,
> > +             .add            = riscv_iommu_pmu_add,
> > +             .del            = riscv_iommu_pmu_del,
> > +             .start          = riscv_iommu_pmu_start,
> > +             .stop           = riscv_iommu_pmu_stop,
> > +             .read           = riscv_iommu_pmu_read,
> > +             .attr_groups    = riscv_iommu_pmu_attr_grps,
> > +             .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
> > +             .module         = THIS_MODULE,
> > +     };
>
> The new thing is that you can now set pmu.parent to the IOMMU device so
> their relationship is clear in sysfs, rather than having to play tricks
> with the PMU name.

Let me add it.

>
> > +
> > +     name = kasprintf(GFP_KERNEL, "riscv_iommu_pmu_%s", dev_name);
> > +
> > +     ret = perf_pmu_register(&pmu->pmu, name, -1);
> > +     if (ret) {
> > +             pr_err("Failed to register riscv_iommu_pmu_%s: %d\n",
> > +                    dev_name, ret);
> > +             return ret;
> > +     }
> > +
> > +     /* Stop all counters and later start the counter with perf */
> > +     riscv_iommu_pmu_stop_all(pmu);
> > +
> > +     pr_info("riscv_iommu_pmu_%s: Registered with %d counters\n",
> > +             dev_name, pmu->num_counters);
> > +
> > +     return 0;
> > +}
> > +
> > +void riscv_iommu_pmu_uninit(struct riscv_iommu_pmu *pmu)
> > +{
> > +     int idx;
> > +
> > +     /* Disable interrupt and functions */
> > +     for_each_set_bit(idx, pmu->used_counters, pmu->num_counters) {
>
> This will never do anything, since even if we could ever get here, it
> would not be at a time when there are any active events. Unregistering
> an in-use PMU does not end well (hence why standalone PMU drivers need
> to use suppress_bind_attrs)...

Okay, let's remove it. Thanks

>
> Thanks,
> Robin.
>
> > +             riscv_iommu_pmu_disable_counter(pmu, idx);
> > +             riscv_iommu_pmu_disable_ovf_intr(pmu, idx);
> > +     }
> > +
> > +     perf_pmu_unregister(&pmu->pmu);
> > +}
> > diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
> > index b1c4664542b4..92659a8a75ae 100644
> > --- a/drivers/iommu/riscv/iommu.h
> > +++ b/drivers/iommu/riscv/iommu.h
> > @@ -60,11 +60,19 @@ struct riscv_iommu_device {
> >       unsigned int ddt_mode;
> >       dma_addr_t ddt_phys;
> >       u64 *ddt_root;
> > +
> > +     /* hardware performance monitor */
> > +     struct riscv_iommu_pmu pmu;
> >   };
> >
> >   int riscv_iommu_init(struct riscv_iommu_device *iommu);
> >   void riscv_iommu_remove(struct riscv_iommu_device *iommu);
> >
> > +int riscv_iommu_pmu_init(struct riscv_iommu_pmu *pmu, void __iomem *reg,
> > +                      const char *name);
> > +void riscv_iommu_pmu_uninit(struct riscv_iommu_pmu *pmu);
> > +irqreturn_t riscv_iommu_pmu_handle_irq(struct riscv_iommu_pmu *pmu);
> > +
> >   #define riscv_iommu_readl(iommu, addr) \
> >       readl_relaxed((iommu)->reg + (addr))
> >
>
Re: [External] [PATCH 1/2] iommu/riscv: add RISC-V IOMMU PMU support
Posted by Xu Lu 11 months ago
Hi Zong,

On Wed, Jan 15, 2025 at 11:03 AM Zong Li <zong.li@sifive.com> wrote:
>
> Support for the RISC-V IOMMU hardware performance monitor includes
> both counting and sampling modes.
>
> The specification does not define an event ID for counting the
> number of clock cycles, meaning there is no associated `iohpmevt0`.
> However, we need an event for counting cycle, so we reserve the
> maximum event ID for this purpose.
>
> Signed-off-by: Zong Li <zong.li@sifive.com>
> Tested-by: Xu Lu <luxu.kernel@bytedance.com>
> ---
>  drivers/iommu/riscv/Makefile     |   2 +-
>  drivers/iommu/riscv/iommu-bits.h |  16 +
>  drivers/iommu/riscv/iommu-pmu.c  | 486 +++++++++++++++++++++++++++++++
>  drivers/iommu/riscv/iommu.h      |   8 +
>  4 files changed, 511 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/iommu/riscv/iommu-pmu.c
>
> diff --git a/drivers/iommu/riscv/Makefile b/drivers/iommu/riscv/Makefile
> index f54c9ed17d41..d36625a1fd08 100644
> --- a/drivers/iommu/riscv/Makefile
> +++ b/drivers/iommu/riscv/Makefile
> @@ -1,3 +1,3 @@
>  # SPDX-License-Identifier: GPL-2.0-only
> -obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o
> +obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o iommu-pmu.o
>  obj-$(CONFIG_RISCV_IOMMU_PCI) += iommu-pci.o
> diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
> index 98daf0e1a306..60523449f016 100644
> --- a/drivers/iommu/riscv/iommu-bits.h
> +++ b/drivers/iommu/riscv/iommu-bits.h
> @@ -17,6 +17,7 @@
>  #include <linux/types.h>
>  #include <linux/bitfield.h>
>  #include <linux/bits.h>
> +#include <linux/perf_event.h>
>
>  /*
>   * Chapter 5: Memory Mapped register interface
> @@ -207,6 +208,7 @@ enum riscv_iommu_ddtp_modes {
>  /* 5.22 Performance monitoring event counters (31 * 64bits) */
>  #define RISCV_IOMMU_REG_IOHPMCTR_BASE  0x0068
>  #define RISCV_IOMMU_REG_IOHPMCTR(_n)   (RISCV_IOMMU_REG_IOHPMCTR_BASE + ((_n) * 0x8))
> +#define RISCV_IOMMU_IOHPMCTR_COUNTER   GENMASK_ULL(63, 0)
>
>  /* 5.23 Performance monitoring event selectors (31 * 64bits) */
>  #define RISCV_IOMMU_REG_IOHPMEVT_BASE  0x0160
> @@ -250,6 +252,20 @@ enum riscv_iommu_hpmevent_id {
>         RISCV_IOMMU_HPMEVENT_MAX        = 9
>  };
>
> +/* Use maximum event ID for cycle event */
> +#define RISCV_IOMMU_HPMEVENT_CYCLE     GENMASK_ULL(14, 0)
> +
> +#define RISCV_IOMMU_HPM_COUNTER_NUM    32
> +
> +struct riscv_iommu_pmu {
> +       struct pmu pmu;
> +       void __iomem *reg;
> +       int num_counters;
> +       u64 mask_counter;
> +       struct perf_event *events[RISCV_IOMMU_IOHPMEVT_CNT + 1];
> +       DECLARE_BITMAP(used_counters, RISCV_IOMMU_IOHPMEVT_CNT + 1);
> +};
> +
>  /* 5.24 Translation request IOVA (64bits) */
>  #define RISCV_IOMMU_REG_TR_REQ_IOVA     0x0258
>  #define RISCV_IOMMU_TR_REQ_IOVA_VPN    GENMASK_ULL(63, 12)
> diff --git a/drivers/iommu/riscv/iommu-pmu.c b/drivers/iommu/riscv/iommu-pmu.c
> new file mode 100644
> index 000000000000..74eb1525cd32
> --- /dev/null
> +++ b/drivers/iommu/riscv/iommu-pmu.c
> @@ -0,0 +1,486 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2024 SiFive
> + *
> + * Authors
> + *     Zong Li <zong.li@sifive.com>
> + */
> +
> +#include <linux/io-64-nonatomic-hi-lo.h>
> +
> +#include "iommu.h"
> +#include "iommu-bits.h"
> +
> +#define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
> +
> +#define RISCV_IOMMU_PMU_ATTR_EXTRACTOR(_name, _mask)                   \
> +       static inline u32 get_##_name(struct perf_event *event)         \
> +       {                                                               \
> +               return FIELD_GET(_mask, event->attr.config);            \
> +       }                                                               \
> +
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(event, RISCV_IOMMU_IOHPMEVT_EVENTID);
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(partial_matching, RISCV_IOMMU_IOHPMEVT_DMASK);
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(pid_pscid, RISCV_IOMMU_IOHPMEVT_PID_PSCID);
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(did_gscid, RISCV_IOMMU_IOHPMEVT_DID_GSCID);
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_pid_pscid, RISCV_IOMMU_IOHPMEVT_PV_PSCV);
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_did_gscid, RISCV_IOMMU_IOHPMEVT_DV_GSCV);
> +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_id_type, RISCV_IOMMU_IOHPMEVT_IDT);
> +
> +/* Formats */
> +PMU_FORMAT_ATTR(event, "config:0-14");
> +PMU_FORMAT_ATTR(partial_matching, "config:15");
> +PMU_FORMAT_ATTR(pid_pscid, "config:16-35");
> +PMU_FORMAT_ATTR(did_gscid, "config:36-59");
> +PMU_FORMAT_ATTR(filter_pid_pscid, "config:60");
> +PMU_FORMAT_ATTR(filter_did_gscid, "config:61");
> +PMU_FORMAT_ATTR(filter_id_type, "config:62");
> +
> +static struct attribute *riscv_iommu_pmu_formats[] = {
> +       &format_attr_event.attr,
> +       &format_attr_partial_matching.attr,
> +       &format_attr_pid_pscid.attr,
> +       &format_attr_did_gscid.attr,
> +       &format_attr_filter_pid_pscid.attr,
> +       &format_attr_filter_did_gscid.attr,
> +       &format_attr_filter_id_type.attr,
> +       NULL,
> +};
> +
> +static const struct attribute_group riscv_iommu_pmu_format_group = {
> +       .name = "format",
> +       .attrs = riscv_iommu_pmu_formats,
> +};
> +
> +/* Events */
> +static ssize_t riscv_iommu_pmu_event_show(struct device *dev,
> +                                         struct device_attribute *attr,
> +                                         char *page)
> +{
> +       struct perf_pmu_events_attr *pmu_attr;
> +
> +       pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
> +
> +       return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
> +}
> +
> +PMU_EVENT_ATTR(cycle, event_attr_cycle,
> +              RISCV_IOMMU_HPMEVENT_CYCLE, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(dont_count, event_attr_dont_count,
> +              RISCV_IOMMU_HPMEVENT_INVALID, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(untranslated_req, event_attr_untranslated_req,
> +              RISCV_IOMMU_HPMEVENT_URQ, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(translated_req, event_attr_translated_req,
> +              RISCV_IOMMU_HPMEVENT_TRQ, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(ats_trans_req, event_attr_ats_trans_req,
> +              RISCV_IOMMU_HPMEVENT_ATS_RQ, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(tlb_miss, event_attr_tlb_miss,
> +              RISCV_IOMMU_HPMEVENT_TLB_MISS, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(ddt_walks, event_attr_ddt_walks,
> +              RISCV_IOMMU_HPMEVENT_DD_WALK, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(pdt_walks, event_attr_pdt_walks,
> +              RISCV_IOMMU_HPMEVENT_PD_WALK, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(s_vs_pt_walks, event_attr_s_vs_pt_walks,
> +              RISCV_IOMMU_HPMEVENT_S_VS_WALKS, riscv_iommu_pmu_event_show);
> +PMU_EVENT_ATTR(g_pt_walks, event_attr_g_pt_walks,
> +              RISCV_IOMMU_HPMEVENT_G_WALKS, riscv_iommu_pmu_event_show);
> +
> +static struct attribute *riscv_iommu_pmu_events[] = {
> +       &event_attr_cycle.attr.attr,
> +       &event_attr_dont_count.attr.attr,
> +       &event_attr_untranslated_req.attr.attr,
> +       &event_attr_translated_req.attr.attr,
> +       &event_attr_ats_trans_req.attr.attr,
> +       &event_attr_tlb_miss.attr.attr,
> +       &event_attr_ddt_walks.attr.attr,
> +       &event_attr_pdt_walks.attr.attr,
> +       &event_attr_s_vs_pt_walks.attr.attr,
> +       &event_attr_g_pt_walks.attr.attr,
> +       NULL,
> +};
> +
> +static const struct attribute_group riscv_iommu_pmu_events_group = {
> +       .name = "events",
> +       .attrs = riscv_iommu_pmu_events,
> +};
> +
> +static const struct attribute_group *riscv_iommu_pmu_attr_grps[] = {
> +       &riscv_iommu_pmu_format_group,
> +       &riscv_iommu_pmu_events_group,
> +       NULL,
> +};
> +
> +/* PMU Operations */
> +static void riscv_iommu_pmu_set_counter(struct riscv_iommu_pmu *pmu, u32 idx,
> +                                       u64 value)
> +{
> +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES;
> +
> +       if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> +               return;
> +
> +       if (idx == 0)
> +               value = (value & ~RISCV_IOMMU_IOHPMCYCLES_OF) |
> +                        (readq(addr) & RISCV_IOMMU_IOHPMCYCLES_OF);
> +
> +       writeq(FIELD_PREP(RISCV_IOMMU_IOHPMCTR_COUNTER, value), addr + idx * 8);
> +}
> +
> +static u64 riscv_iommu_pmu_get_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> +{
> +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES;
> +       u64 value;
> +
> +       if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> +               return -EINVAL;
> +
> +       value = readq(addr + idx * 8);
> +
> +       if (idx == 0)
> +               return FIELD_GET(RISCV_IOMMU_IOHPMCYCLES_COUNTER, value);
> +
> +       return FIELD_GET(RISCV_IOMMU_IOHPMCTR_COUNTER, value);
> +}
> +
> +static u64 riscv_iommu_pmu_get_event(struct riscv_iommu_pmu *pmu, u32 idx)
> +{
> +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE;
> +
> +       if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> +               return 0;
> +
> +       /* There is no associtated IOHPMEVT0 for IOHPMCYCLES */
> +       if (idx == 0)
> +               return 0;
> +
> +       return readq(addr + (idx - 1) * 8);
> +}
> +
> +static void riscv_iommu_pmu_set_event(struct riscv_iommu_pmu *pmu, u32 idx,
> +                                     u64 value)
> +{
> +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE;
> +
> +       if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> +               return;
> +
> +       /* There is no associtated IOHPMEVT0 for IOHPMCYCLES */
> +       if (idx == 0)
> +               return;
> +
> +       writeq(value, addr + (idx - 1) * 8);
> +}
> +
> +static void riscv_iommu_pmu_enable_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> +{
> +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH;
> +       u32 value = readl(addr);
> +
> +       writel(value & ~BIT(idx), addr);
> +}
> +
> +static void riscv_iommu_pmu_disable_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> +{
> +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH;
> +       u32 value = readl(addr);
> +
> +       writel(value | BIT(idx), addr);
> +}
> +
> +static void riscv_iommu_pmu_enable_ovf_intr(struct riscv_iommu_pmu *pmu, u32 idx)
> +{
> +       u64 value;
> +
> +       if (get_event(pmu->events[idx]) == RISCV_IOMMU_HPMEVENT_CYCLE) {
> +               value = riscv_iommu_pmu_get_counter(pmu, idx) & ~RISCV_IOMMU_IOHPMCYCLES_OF;
> +               writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES);
> +       } else {
> +               value = riscv_iommu_pmu_get_event(pmu, idx) & ~RISCV_IOMMU_IOHPMEVT_OF;
> +               writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE + (idx - 1) * 8);
> +       }
> +}
> +
> +static void riscv_iommu_pmu_disable_ovf_intr(struct riscv_iommu_pmu *pmu, u32 idx)
> +{
> +       u64 value;
> +
> +       if (get_event(pmu->events[idx]) == RISCV_IOMMU_HPMEVENT_CYCLE) {
> +               value = riscv_iommu_pmu_get_counter(pmu, idx) | RISCV_IOMMU_IOHPMCYCLES_OF;
> +               writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES);
> +       } else {
> +               value = riscv_iommu_pmu_get_event(pmu, idx) | RISCV_IOMMU_IOHPMEVT_OF;
> +               writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE + (idx - 1) * 8);
> +       }
> +}
> +
> +static void riscv_iommu_pmu_start_all(struct riscv_iommu_pmu *pmu)
> +{
> +       int idx;
> +
> +       for_each_set_bit(idx, pmu->used_counters, pmu->num_counters) {
> +               riscv_iommu_pmu_enable_ovf_intr(pmu, idx);
> +               riscv_iommu_pmu_enable_counter(pmu, idx);
> +       }
> +}
> +
> +static void riscv_iommu_pmu_stop_all(struct riscv_iommu_pmu *pmu)
> +{
> +       writel(GENMASK_ULL(pmu->num_counters - 1, 0),
> +              pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH);
> +}
> +
> +/* PMU APIs */
> +static int riscv_iommu_pmu_set_period(struct perf_event *event)
> +{
> +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +       struct hw_perf_event *hwc = &event->hw;
> +       s64 left = local64_read(&hwc->period_left);
> +       s64 period = hwc->sample_period;
> +       u64 max_period = pmu->mask_counter;
> +       int ret = 0;
> +
> +       if (unlikely(left <= -period)) {
> +               left = period;
> +               local64_set(&hwc->period_left, left);
> +               hwc->last_period = period;
> +               ret = 1;
> +       }
> +
> +       if (unlikely(left <= 0)) {
> +               left += period;
> +               local64_set(&hwc->period_left, left);
> +               hwc->last_period = period;
> +               ret = 1;
> +       }
> +
> +       /*
> +        * Limit the maximum period to prevent the counter value
> +        * from overtaking the one we are about to program. In
> +        * effect we are reducing max_period to account for
> +        * interrupt latency (and we are being very conservative).
> +        */
> +       if (left > (max_period >> 1))
> +               left = (max_period >> 1);
> +
> +       local64_set(&hwc->prev_count, (u64)-left);
> +       riscv_iommu_pmu_set_counter(pmu, hwc->idx, (u64)(-left) & max_period);
> +       perf_event_update_userpage(event);
> +
> +       return ret;
> +}
> +
> +static int riscv_iommu_pmu_event_init(struct perf_event *event)
> +{
> +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +       struct hw_perf_event *hwc = &event->hw;
> +
> +       hwc->idx = -1;
> +       hwc->config = event->attr.config;
> +
> +       if (!is_sampling_event(event)) {
> +               /*
> +                * For non-sampling runs, limit the sample_period to half
> +                * of the counter width. That way, the new counter value
> +                * is far less likely to overtake the previous one unless
> +                * you have some serious IRQ latency issues.
> +                */
> +               hwc->sample_period = pmu->mask_counter >> 1;
> +               hwc->last_period = hwc->sample_period;
> +               local64_set(&hwc->period_left, hwc->sample_period);
> +       }
> +
> +       return 0;
> +}
> +
> +static void riscv_iommu_pmu_update(struct perf_event *event)
> +{
> +       struct hw_perf_event *hwc = &event->hw;
> +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +       u64 delta, prev, now;
> +       u32 idx = hwc->idx;
> +
> +       do {
> +               prev = local64_read(&hwc->prev_count);
> +               now = riscv_iommu_pmu_get_counter(pmu, idx);
> +       } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
> +
> +       delta = FIELD_GET(RISCV_IOMMU_IOHPMCTR_COUNTER, now - prev) & pmu->mask_counter;
> +       local64_add(delta, &event->count);
> +       local64_sub(delta, &hwc->period_left);
> +}
> +
> +static void riscv_iommu_pmu_start(struct perf_event *event, int flags)
> +{
> +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +       struct hw_perf_event *hwc = &event->hw;
> +
> +       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
> +               return;
> +
> +       if (flags & PERF_EF_RELOAD)
> +               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
> +
> +       hwc->state = 0;
> +       riscv_iommu_pmu_set_period(event);
> +       riscv_iommu_pmu_set_event(pmu, hwc->idx, hwc->config);
> +       riscv_iommu_pmu_enable_ovf_intr(pmu, hwc->idx);
> +       riscv_iommu_pmu_enable_counter(pmu, hwc->idx);
> +
> +       perf_event_update_userpage(event);
> +}
> +
> +static void riscv_iommu_pmu_stop(struct perf_event *event, int flags)
> +{
> +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +       struct hw_perf_event *hwc = &event->hw;
> +
> +       if (hwc->state & PERF_HES_STOPPED)
> +               return;
> +
> +       riscv_iommu_pmu_set_event(pmu, hwc->idx, RISCV_IOMMU_HPMEVENT_INVALID);
> +       riscv_iommu_pmu_disable_counter(pmu, hwc->idx);
> +
> +       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE))
> +               riscv_iommu_pmu_update(event);
> +
> +       hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
> +}
> +
> +static int riscv_iommu_pmu_add(struct perf_event *event, int flags)
> +{
> +       struct hw_perf_event *hwc = &event->hw;
> +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +       unsigned int num_counters = pmu->num_counters;
> +       int idx;
> +
> +       /* Reserve index zero for iohpmcycles */
> +       if (get_event(event) == RISCV_IOMMU_HPMEVENT_CYCLE)
> +               idx = 0;
> +       else
> +               idx = find_next_zero_bit(pmu->used_counters, num_counters, 1);
> +
> +       if (idx == num_counters)
> +               return -EAGAIN;
> +
> +       set_bit(idx, pmu->used_counters);
> +
> +       pmu->events[idx] = event;
> +       hwc->idx = idx;
> +       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
> +
> +       if (flags & PERF_EF_START)
> +               riscv_iommu_pmu_start(event, flags);
> +
> +       /* Propagate changes to the userspace mapping. */
> +       perf_event_update_userpage(event);
> +
> +       return 0;
> +}
> +
> +static void riscv_iommu_pmu_read(struct perf_event *event)
> +{
> +       riscv_iommu_pmu_update(event);
> +}
> +
> +static void riscv_iommu_pmu_del(struct perf_event *event, int flags)
> +{
> +       struct hw_perf_event *hwc = &event->hw;
> +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> +       int idx = hwc->idx;
> +
> +       riscv_iommu_pmu_stop(event, PERF_EF_UPDATE);
> +       pmu->events[idx] = NULL;
> +       clear_bit(idx, pmu->used_counters);
> +       perf_event_update_userpage(event);
> +}
> +
> +irqreturn_t riscv_iommu_pmu_handle_irq(struct riscv_iommu_pmu *pmu)
> +{
> +       struct perf_sample_data data;
> +       struct pt_regs *regs;
> +       u32 ovf = readl(pmu->reg + RISCV_IOMMU_REG_IOCOUNTOVF);
> +       int idx;
> +
> +       if (!ovf)
> +               return IRQ_NONE;
> +
> +       riscv_iommu_pmu_stop_all(pmu);
> +
> +       regs = get_irq_regs();
> +
> +       for_each_set_bit(idx, (unsigned long *)&ovf, pmu->num_counters) {
> +               struct perf_event *event = pmu->events[idx];
> +               struct hw_perf_event *hwc;
> +
> +               if (WARN_ON_ONCE(!event) || !is_sampling_event(event))
> +                       continue;
> +
> +               hwc = &event->hw;
> +
> +               riscv_iommu_pmu_update(event);
> +               perf_sample_data_init(&data, 0, hwc->last_period);
> +               if (!riscv_iommu_pmu_set_period(event))
> +                       continue;
> +
> +               if (perf_event_overflow(event, &data, regs))
> +                       riscv_iommu_pmu_stop(event, 0);
> +       }
> +
> +       riscv_iommu_pmu_start_all(pmu);
> +
> +       return IRQ_HANDLED;
> +}
> +
> +int riscv_iommu_pmu_init(struct riscv_iommu_pmu *pmu, void __iomem *reg,
> +                        const char *dev_name)
> +{
> +       char *name;
> +       int ret;
> +
> +       pmu->reg = reg;
> +       pmu->num_counters = RISCV_IOMMU_HPM_COUNTER_NUM;
> +       pmu->mask_counter = RISCV_IOMMU_IOHPMCTR_COUNTER;
> +
> +       pmu->pmu = (struct pmu) {
> +               .task_ctx_nr    = perf_invalid_context,
> +               .event_init     = riscv_iommu_pmu_event_init,
> +               .add            = riscv_iommu_pmu_add,
> +               .del            = riscv_iommu_pmu_del,
> +               .start          = riscv_iommu_pmu_start,
> +               .stop           = riscv_iommu_pmu_stop,
> +               .read           = riscv_iommu_pmu_read,
> +               .attr_groups    = riscv_iommu_pmu_attr_grps,
> +               .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
> +               .module         = THIS_MODULE,
> +       };
> +
> +       name = kasprintf(GFP_KERNEL, "riscv_iommu_pmu_%s", dev_name);

The dev_name of RISCV IOMMU is usually 'riscv,iommu'. If we compose
the iommu pmu name of iommu dev name, then maybe perf subsystem can
not handle the pmu event name correctly as the exists ',' in it.

Best Regards,

Xu Lu

> +
> +       ret = perf_pmu_register(&pmu->pmu, name, -1);
> +       if (ret) {
> +               pr_err("Failed to register riscv_iommu_pmu_%s: %d\n",
> +                      dev_name, ret);
> +               return ret;
> +       }
> +
> +       /* Stop all counters and later start the counter with perf */
> +       riscv_iommu_pmu_stop_all(pmu);
> +
> +       pr_info("riscv_iommu_pmu_%s: Registered with %d counters\n",
> +               dev_name, pmu->num_counters);
> +
> +       return 0;
> +}
> +
> +void riscv_iommu_pmu_uninit(struct riscv_iommu_pmu *pmu)
> +{
> +       int idx;
> +
> +       /* Disable interrupt and functions */
> +       for_each_set_bit(idx, pmu->used_counters, pmu->num_counters) {
> +               riscv_iommu_pmu_disable_counter(pmu, idx);
> +               riscv_iommu_pmu_disable_ovf_intr(pmu, idx);
> +       }
> +
> +       perf_pmu_unregister(&pmu->pmu);
> +}
> diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
> index b1c4664542b4..92659a8a75ae 100644
> --- a/drivers/iommu/riscv/iommu.h
> +++ b/drivers/iommu/riscv/iommu.h
> @@ -60,11 +60,19 @@ struct riscv_iommu_device {
>         unsigned int ddt_mode;
>         dma_addr_t ddt_phys;
>         u64 *ddt_root;
> +
> +       /* hardware performance monitor */
> +       struct riscv_iommu_pmu pmu;
>  };
>
>  int riscv_iommu_init(struct riscv_iommu_device *iommu);
>  void riscv_iommu_remove(struct riscv_iommu_device *iommu);
>
> +int riscv_iommu_pmu_init(struct riscv_iommu_pmu *pmu, void __iomem *reg,
> +                        const char *name);
> +void riscv_iommu_pmu_uninit(struct riscv_iommu_pmu *pmu);
> +irqreturn_t riscv_iommu_pmu_handle_irq(struct riscv_iommu_pmu *pmu);
> +
>  #define riscv_iommu_readl(iommu, addr) \
>         readl_relaxed((iommu)->reg + (addr))
>
> --
> 2.17.1
>
Re: [External] [PATCH 1/2] iommu/riscv: add RISC-V IOMMU PMU support
Posted by Zong Li 11 months ago
On Wed, Jan 15, 2025 at 11:45 AM Xu Lu <luxu.kernel@bytedance.com> wrote:
>
> Hi Zong,
>
> On Wed, Jan 15, 2025 at 11:03 AM Zong Li <zong.li@sifive.com> wrote:
> >
> > Support for the RISC-V IOMMU hardware performance monitor includes
> > both counting and sampling modes.
> >
> > The specification does not define an event ID for counting the
> > number of clock cycles, meaning there is no associated `iohpmevt0`.
> > However, we need an event for counting cycle, so we reserve the
> > maximum event ID for this purpose.
> >
> > Signed-off-by: Zong Li <zong.li@sifive.com>
> > Tested-by: Xu Lu <luxu.kernel@bytedance.com>
> > ---
> >  drivers/iommu/riscv/Makefile     |   2 +-
> >  drivers/iommu/riscv/iommu-bits.h |  16 +
> >  drivers/iommu/riscv/iommu-pmu.c  | 486 +++++++++++++++++++++++++++++++
> >  drivers/iommu/riscv/iommu.h      |   8 +
> >  4 files changed, 511 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/iommu/riscv/iommu-pmu.c
> >
> > diff --git a/drivers/iommu/riscv/Makefile b/drivers/iommu/riscv/Makefile
> > index f54c9ed17d41..d36625a1fd08 100644
> > --- a/drivers/iommu/riscv/Makefile
> > +++ b/drivers/iommu/riscv/Makefile
> > @@ -1,3 +1,3 @@
> >  # SPDX-License-Identifier: GPL-2.0-only
> > -obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o
> > +obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o iommu-pmu.o
> >  obj-$(CONFIG_RISCV_IOMMU_PCI) += iommu-pci.o
> > diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
> > index 98daf0e1a306..60523449f016 100644
> > --- a/drivers/iommu/riscv/iommu-bits.h
> > +++ b/drivers/iommu/riscv/iommu-bits.h
> > @@ -17,6 +17,7 @@
> >  #include <linux/types.h>
> >  #include <linux/bitfield.h>
> >  #include <linux/bits.h>
> > +#include <linux/perf_event.h>
> >
> >  /*
> >   * Chapter 5: Memory Mapped register interface
> > @@ -207,6 +208,7 @@ enum riscv_iommu_ddtp_modes {
> >  /* 5.22 Performance monitoring event counters (31 * 64bits) */
> >  #define RISCV_IOMMU_REG_IOHPMCTR_BASE  0x0068
> >  #define RISCV_IOMMU_REG_IOHPMCTR(_n)   (RISCV_IOMMU_REG_IOHPMCTR_BASE + ((_n) * 0x8))
> > +#define RISCV_IOMMU_IOHPMCTR_COUNTER   GENMASK_ULL(63, 0)
> >
> >  /* 5.23 Performance monitoring event selectors (31 * 64bits) */
> >  #define RISCV_IOMMU_REG_IOHPMEVT_BASE  0x0160
> > @@ -250,6 +252,20 @@ enum riscv_iommu_hpmevent_id {
> >         RISCV_IOMMU_HPMEVENT_MAX        = 9
> >  };
> >
> > +/* Use maximum event ID for cycle event */
> > +#define RISCV_IOMMU_HPMEVENT_CYCLE     GENMASK_ULL(14, 0)
> > +
> > +#define RISCV_IOMMU_HPM_COUNTER_NUM    32
> > +
> > +struct riscv_iommu_pmu {
> > +       struct pmu pmu;
> > +       void __iomem *reg;
> > +       int num_counters;
> > +       u64 mask_counter;
> > +       struct perf_event *events[RISCV_IOMMU_IOHPMEVT_CNT + 1];
> > +       DECLARE_BITMAP(used_counters, RISCV_IOMMU_IOHPMEVT_CNT + 1);
> > +};
> > +
> >  /* 5.24 Translation request IOVA (64bits) */
> >  #define RISCV_IOMMU_REG_TR_REQ_IOVA     0x0258
> >  #define RISCV_IOMMU_TR_REQ_IOVA_VPN    GENMASK_ULL(63, 12)
> > diff --git a/drivers/iommu/riscv/iommu-pmu.c b/drivers/iommu/riscv/iommu-pmu.c
> > new file mode 100644
> > index 000000000000..74eb1525cd32
> > --- /dev/null
> > +++ b/drivers/iommu/riscv/iommu-pmu.c
> > @@ -0,0 +1,486 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2024 SiFive
> > + *
> > + * Authors
> > + *     Zong Li <zong.li@sifive.com>
> > + */
> > +
> > +#include <linux/io-64-nonatomic-hi-lo.h>
> > +
> > +#include "iommu.h"
> > +#include "iommu-bits.h"
> > +
> > +#define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
> > +
> > +#define RISCV_IOMMU_PMU_ATTR_EXTRACTOR(_name, _mask)                   \
> > +       static inline u32 get_##_name(struct perf_event *event)         \
> > +       {                                                               \
> > +               return FIELD_GET(_mask, event->attr.config);            \
> > +       }                                                               \
> > +
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(event, RISCV_IOMMU_IOHPMEVT_EVENTID);
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(partial_matching, RISCV_IOMMU_IOHPMEVT_DMASK);
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(pid_pscid, RISCV_IOMMU_IOHPMEVT_PID_PSCID);
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(did_gscid, RISCV_IOMMU_IOHPMEVT_DID_GSCID);
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_pid_pscid, RISCV_IOMMU_IOHPMEVT_PV_PSCV);
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_did_gscid, RISCV_IOMMU_IOHPMEVT_DV_GSCV);
> > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_id_type, RISCV_IOMMU_IOHPMEVT_IDT);
> > +
> > +/* Formats */
> > +PMU_FORMAT_ATTR(event, "config:0-14");
> > +PMU_FORMAT_ATTR(partial_matching, "config:15");
> > +PMU_FORMAT_ATTR(pid_pscid, "config:16-35");
> > +PMU_FORMAT_ATTR(did_gscid, "config:36-59");
> > +PMU_FORMAT_ATTR(filter_pid_pscid, "config:60");
> > +PMU_FORMAT_ATTR(filter_did_gscid, "config:61");
> > +PMU_FORMAT_ATTR(filter_id_type, "config:62");
> > +
> > +static struct attribute *riscv_iommu_pmu_formats[] = {
> > +       &format_attr_event.attr,
> > +       &format_attr_partial_matching.attr,
> > +       &format_attr_pid_pscid.attr,
> > +       &format_attr_did_gscid.attr,
> > +       &format_attr_filter_pid_pscid.attr,
> > +       &format_attr_filter_did_gscid.attr,
> > +       &format_attr_filter_id_type.attr,
> > +       NULL,
> > +};
> > +
> > +static const struct attribute_group riscv_iommu_pmu_format_group = {
> > +       .name = "format",
> > +       .attrs = riscv_iommu_pmu_formats,
> > +};
> > +
> > +/* Events */
> > +static ssize_t riscv_iommu_pmu_event_show(struct device *dev,
> > +                                         struct device_attribute *attr,
> > +                                         char *page)
> > +{
> > +       struct perf_pmu_events_attr *pmu_attr;
> > +
> > +       pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
> > +
> > +       return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
> > +}
> > +
> > +PMU_EVENT_ATTR(cycle, event_attr_cycle,
> > +              RISCV_IOMMU_HPMEVENT_CYCLE, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(dont_count, event_attr_dont_count,
> > +              RISCV_IOMMU_HPMEVENT_INVALID, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(untranslated_req, event_attr_untranslated_req,
> > +              RISCV_IOMMU_HPMEVENT_URQ, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(translated_req, event_attr_translated_req,
> > +              RISCV_IOMMU_HPMEVENT_TRQ, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(ats_trans_req, event_attr_ats_trans_req,
> > +              RISCV_IOMMU_HPMEVENT_ATS_RQ, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(tlb_miss, event_attr_tlb_miss,
> > +              RISCV_IOMMU_HPMEVENT_TLB_MISS, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(ddt_walks, event_attr_ddt_walks,
> > +              RISCV_IOMMU_HPMEVENT_DD_WALK, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(pdt_walks, event_attr_pdt_walks,
> > +              RISCV_IOMMU_HPMEVENT_PD_WALK, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(s_vs_pt_walks, event_attr_s_vs_pt_walks,
> > +              RISCV_IOMMU_HPMEVENT_S_VS_WALKS, riscv_iommu_pmu_event_show);
> > +PMU_EVENT_ATTR(g_pt_walks, event_attr_g_pt_walks,
> > +              RISCV_IOMMU_HPMEVENT_G_WALKS, riscv_iommu_pmu_event_show);
> > +
> > +static struct attribute *riscv_iommu_pmu_events[] = {
> > +       &event_attr_cycle.attr.attr,
> > +       &event_attr_dont_count.attr.attr,
> > +       &event_attr_untranslated_req.attr.attr,
> > +       &event_attr_translated_req.attr.attr,
> > +       &event_attr_ats_trans_req.attr.attr,
> > +       &event_attr_tlb_miss.attr.attr,
> > +       &event_attr_ddt_walks.attr.attr,
> > +       &event_attr_pdt_walks.attr.attr,
> > +       &event_attr_s_vs_pt_walks.attr.attr,
> > +       &event_attr_g_pt_walks.attr.attr,
> > +       NULL,
> > +};
> > +
> > +static const struct attribute_group riscv_iommu_pmu_events_group = {
> > +       .name = "events",
> > +       .attrs = riscv_iommu_pmu_events,
> > +};
> > +
> > +static const struct attribute_group *riscv_iommu_pmu_attr_grps[] = {
> > +       &riscv_iommu_pmu_format_group,
> > +       &riscv_iommu_pmu_events_group,
> > +       NULL,
> > +};
> > +
> > +/* PMU Operations */
> > +static void riscv_iommu_pmu_set_counter(struct riscv_iommu_pmu *pmu, u32 idx,
> > +                                       u64 value)
> > +{
> > +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES;
> > +
> > +       if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> > +               return;
> > +
> > +       if (idx == 0)
> > +               value = (value & ~RISCV_IOMMU_IOHPMCYCLES_OF) |
> > +                        (readq(addr) & RISCV_IOMMU_IOHPMCYCLES_OF);
> > +
> > +       writeq(FIELD_PREP(RISCV_IOMMU_IOHPMCTR_COUNTER, value), addr + idx * 8);
> > +}
> > +
> > +static u64 riscv_iommu_pmu_get_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> > +{
> > +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES;
> > +       u64 value;
> > +
> > +       if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> > +               return -EINVAL;
> > +
> > +       value = readq(addr + idx * 8);
> > +
> > +       if (idx == 0)
> > +               return FIELD_GET(RISCV_IOMMU_IOHPMCYCLES_COUNTER, value);
> > +
> > +       return FIELD_GET(RISCV_IOMMU_IOHPMCTR_COUNTER, value);
> > +}
> > +
> > +static u64 riscv_iommu_pmu_get_event(struct riscv_iommu_pmu *pmu, u32 idx)
> > +{
> > +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE;
> > +
> > +       if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> > +               return 0;
> > +
> > +       /* There is no associtated IOHPMEVT0 for IOHPMCYCLES */
> > +       if (idx == 0)
> > +               return 0;
> > +
> > +       return readq(addr + (idx - 1) * 8);
> > +}
> > +
> > +static void riscv_iommu_pmu_set_event(struct riscv_iommu_pmu *pmu, u32 idx,
> > +                                     u64 value)
> > +{
> > +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE;
> > +
> > +       if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> > +               return;
> > +
> > +       /* There is no associtated IOHPMEVT0 for IOHPMCYCLES */
> > +       if (idx == 0)
> > +               return;
> > +
> > +       writeq(value, addr + (idx - 1) * 8);
> > +}
> > +
> > +static void riscv_iommu_pmu_enable_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> > +{
> > +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH;
> > +       u32 value = readl(addr);
> > +
> > +       writel(value & ~BIT(idx), addr);
> > +}
> > +
> > +static void riscv_iommu_pmu_disable_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> > +{
> > +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH;
> > +       u32 value = readl(addr);
> > +
> > +       writel(value | BIT(idx), addr);
> > +}
> > +
> > +static void riscv_iommu_pmu_enable_ovf_intr(struct riscv_iommu_pmu *pmu, u32 idx)
> > +{
> > +       u64 value;
> > +
> > +       if (get_event(pmu->events[idx]) == RISCV_IOMMU_HPMEVENT_CYCLE) {
> > +               value = riscv_iommu_pmu_get_counter(pmu, idx) & ~RISCV_IOMMU_IOHPMCYCLES_OF;
> > +               writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES);
> > +       } else {
> > +               value = riscv_iommu_pmu_get_event(pmu, idx) & ~RISCV_IOMMU_IOHPMEVT_OF;
> > +               writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE + (idx - 1) * 8);
> > +       }
> > +}
> > +
> > +static void riscv_iommu_pmu_disable_ovf_intr(struct riscv_iommu_pmu *pmu, u32 idx)
> > +{
> > +       u64 value;
> > +
> > +       if (get_event(pmu->events[idx]) == RISCV_IOMMU_HPMEVENT_CYCLE) {
> > +               value = riscv_iommu_pmu_get_counter(pmu, idx) | RISCV_IOMMU_IOHPMCYCLES_OF;
> > +               writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES);
> > +       } else {
> > +               value = riscv_iommu_pmu_get_event(pmu, idx) | RISCV_IOMMU_IOHPMEVT_OF;
> > +               writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE + (idx - 1) * 8);
> > +       }
> > +}
> > +
> > +static void riscv_iommu_pmu_start_all(struct riscv_iommu_pmu *pmu)
> > +{
> > +       int idx;
> > +
> > +       for_each_set_bit(idx, pmu->used_counters, pmu->num_counters) {
> > +               riscv_iommu_pmu_enable_ovf_intr(pmu, idx);
> > +               riscv_iommu_pmu_enable_counter(pmu, idx);
> > +       }
> > +}
> > +
> > +static void riscv_iommu_pmu_stop_all(struct riscv_iommu_pmu *pmu)
> > +{
> > +       writel(GENMASK_ULL(pmu->num_counters - 1, 0),
> > +              pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH);
> > +}
> > +
> > +/* PMU APIs */
> > +static int riscv_iommu_pmu_set_period(struct perf_event *event)
> > +{
> > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +       struct hw_perf_event *hwc = &event->hw;
> > +       s64 left = local64_read(&hwc->period_left);
> > +       s64 period = hwc->sample_period;
> > +       u64 max_period = pmu->mask_counter;
> > +       int ret = 0;
> > +
> > +       if (unlikely(left <= -period)) {
> > +               left = period;
> > +               local64_set(&hwc->period_left, left);
> > +               hwc->last_period = period;
> > +               ret = 1;
> > +       }
> > +
> > +       if (unlikely(left <= 0)) {
> > +               left += period;
> > +               local64_set(&hwc->period_left, left);
> > +               hwc->last_period = period;
> > +               ret = 1;
> > +       }
> > +
> > +       /*
> > +        * Limit the maximum period to prevent the counter value
> > +        * from overtaking the one we are about to program. In
> > +        * effect we are reducing max_period to account for
> > +        * interrupt latency (and we are being very conservative).
> > +        */
> > +       if (left > (max_period >> 1))
> > +               left = (max_period >> 1);
> > +
> > +       local64_set(&hwc->prev_count, (u64)-left);
> > +       riscv_iommu_pmu_set_counter(pmu, hwc->idx, (u64)(-left) & max_period);
> > +       perf_event_update_userpage(event);
> > +
> > +       return ret;
> > +}
> > +
> > +static int riscv_iommu_pmu_event_init(struct perf_event *event)
> > +{
> > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +       struct hw_perf_event *hwc = &event->hw;
> > +
> > +       hwc->idx = -1;
> > +       hwc->config = event->attr.config;
> > +
> > +       if (!is_sampling_event(event)) {
> > +               /*
> > +                * For non-sampling runs, limit the sample_period to half
> > +                * of the counter width. That way, the new counter value
> > +                * is far less likely to overtake the previous one unless
> > +                * you have some serious IRQ latency issues.
> > +                */
> > +               hwc->sample_period = pmu->mask_counter >> 1;
> > +               hwc->last_period = hwc->sample_period;
> > +               local64_set(&hwc->period_left, hwc->sample_period);
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static void riscv_iommu_pmu_update(struct perf_event *event)
> > +{
> > +       struct hw_perf_event *hwc = &event->hw;
> > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +       u64 delta, prev, now;
> > +       u32 idx = hwc->idx;
> > +
> > +       do {
> > +               prev = local64_read(&hwc->prev_count);
> > +               now = riscv_iommu_pmu_get_counter(pmu, idx);
> > +       } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
> > +
> > +       delta = FIELD_GET(RISCV_IOMMU_IOHPMCTR_COUNTER, now - prev) & pmu->mask_counter;
> > +       local64_add(delta, &event->count);
> > +       local64_sub(delta, &hwc->period_left);
> > +}
> > +
> > +static void riscv_iommu_pmu_start(struct perf_event *event, int flags)
> > +{
> > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +       struct hw_perf_event *hwc = &event->hw;
> > +
> > +       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
> > +               return;
> > +
> > +       if (flags & PERF_EF_RELOAD)
> > +               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
> > +
> > +       hwc->state = 0;
> > +       riscv_iommu_pmu_set_period(event);
> > +       riscv_iommu_pmu_set_event(pmu, hwc->idx, hwc->config);
> > +       riscv_iommu_pmu_enable_ovf_intr(pmu, hwc->idx);
> > +       riscv_iommu_pmu_enable_counter(pmu, hwc->idx);
> > +
> > +       perf_event_update_userpage(event);
> > +}
> > +
> > +static void riscv_iommu_pmu_stop(struct perf_event *event, int flags)
> > +{
> > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +       struct hw_perf_event *hwc = &event->hw;
> > +
> > +       if (hwc->state & PERF_HES_STOPPED)
> > +               return;
> > +
> > +       riscv_iommu_pmu_set_event(pmu, hwc->idx, RISCV_IOMMU_HPMEVENT_INVALID);
> > +       riscv_iommu_pmu_disable_counter(pmu, hwc->idx);
> > +
> > +       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE))
> > +               riscv_iommu_pmu_update(event);
> > +
> > +       hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
> > +}
> > +
> > +static int riscv_iommu_pmu_add(struct perf_event *event, int flags)
> > +{
> > +       struct hw_perf_event *hwc = &event->hw;
> > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +       unsigned int num_counters = pmu->num_counters;
> > +       int idx;
> > +
> > +       /* Reserve index zero for iohpmcycles */
> > +       if (get_event(event) == RISCV_IOMMU_HPMEVENT_CYCLE)
> > +               idx = 0;
> > +       else
> > +               idx = find_next_zero_bit(pmu->used_counters, num_counters, 1);
> > +
> > +       if (idx == num_counters)
> > +               return -EAGAIN;
> > +
> > +       set_bit(idx, pmu->used_counters);
> > +
> > +       pmu->events[idx] = event;
> > +       hwc->idx = idx;
> > +       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
> > +
> > +       if (flags & PERF_EF_START)
> > +               riscv_iommu_pmu_start(event, flags);
> > +
> > +       /* Propagate changes to the userspace mapping. */
> > +       perf_event_update_userpage(event);
> > +
> > +       return 0;
> > +}
> > +
> > +static void riscv_iommu_pmu_read(struct perf_event *event)
> > +{
> > +       riscv_iommu_pmu_update(event);
> > +}
> > +
> > +static void riscv_iommu_pmu_del(struct perf_event *event, int flags)
> > +{
> > +       struct hw_perf_event *hwc = &event->hw;
> > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > +       int idx = hwc->idx;
> > +
> > +       riscv_iommu_pmu_stop(event, PERF_EF_UPDATE);
> > +       pmu->events[idx] = NULL;
> > +       clear_bit(idx, pmu->used_counters);
> > +       perf_event_update_userpage(event);
> > +}
> > +
> > +irqreturn_t riscv_iommu_pmu_handle_irq(struct riscv_iommu_pmu *pmu)
> > +{
> > +       struct perf_sample_data data;
> > +       struct pt_regs *regs;
> > +       u32 ovf = readl(pmu->reg + RISCV_IOMMU_REG_IOCOUNTOVF);
> > +       int idx;
> > +
> > +       if (!ovf)
> > +               return IRQ_NONE;
> > +
> > +       riscv_iommu_pmu_stop_all(pmu);
> > +
> > +       regs = get_irq_regs();
> > +
> > +       for_each_set_bit(idx, (unsigned long *)&ovf, pmu->num_counters) {
> > +               struct perf_event *event = pmu->events[idx];
> > +               struct hw_perf_event *hwc;
> > +
> > +               if (WARN_ON_ONCE(!event) || !is_sampling_event(event))
> > +                       continue;
> > +
> > +               hwc = &event->hw;
> > +
> > +               riscv_iommu_pmu_update(event);
> > +               perf_sample_data_init(&data, 0, hwc->last_period);
> > +               if (!riscv_iommu_pmu_set_period(event))
> > +                       continue;
> > +
> > +               if (perf_event_overflow(event, &data, regs))
> > +                       riscv_iommu_pmu_stop(event, 0);
> > +       }
> > +
> > +       riscv_iommu_pmu_start_all(pmu);
> > +
> > +       return IRQ_HANDLED;
> > +}
> > +
> > +int riscv_iommu_pmu_init(struct riscv_iommu_pmu *pmu, void __iomem *reg,
> > +                        const char *dev_name)
> > +{
> > +       char *name;
> > +       int ret;
> > +
> > +       pmu->reg = reg;
> > +       pmu->num_counters = RISCV_IOMMU_HPM_COUNTER_NUM;
> > +       pmu->mask_counter = RISCV_IOMMU_IOHPMCTR_COUNTER;
> > +
> > +       pmu->pmu = (struct pmu) {
> > +               .task_ctx_nr    = perf_invalid_context,
> > +               .event_init     = riscv_iommu_pmu_event_init,
> > +               .add            = riscv_iommu_pmu_add,
> > +               .del            = riscv_iommu_pmu_del,
> > +               .start          = riscv_iommu_pmu_start,
> > +               .stop           = riscv_iommu_pmu_stop,
> > +               .read           = riscv_iommu_pmu_read,
> > +               .attr_groups    = riscv_iommu_pmu_attr_grps,
> > +               .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
> > +               .module         = THIS_MODULE,
> > +       };
> > +
> > +       name = kasprintf(GFP_KERNEL, "riscv_iommu_pmu_%s", dev_name);
>
> The dev_name of RISCV IOMMU is usually 'riscv,iommu'. If we compose
> the iommu pmu name of iommu dev name, then maybe perf subsystem can
> not handle the pmu event name correctly as the exists ',' in it.

I assume you are referring to compatible string because 'riscv,iommu'
appears to be the compatible string. However, it seems to me that the
dev_name is derived from the node name rather than the compatible
string. As a result, there is no comma (',') symbol; instead, a dot
('.') symbol is used. For example, if the IOMMU node in the device
tree is 'iommu@12345678', the dev_name would be '12345678.iommu'.
Please let me know if I missed anything. Thanks

>
> Best Regards,
>
> Xu Lu
>
> > +
> > +       ret = perf_pmu_register(&pmu->pmu, name, -1);
> > +       if (ret) {
> > +               pr_err("Failed to register riscv_iommu_pmu_%s: %d\n",
> > +                      dev_name, ret);
> > +               return ret;
> > +       }
> > +
> > +       /* Stop all counters and later start the counter with perf */
> > +       riscv_iommu_pmu_stop_all(pmu);
> > +
> > +       pr_info("riscv_iommu_pmu_%s: Registered with %d counters\n",
> > +               dev_name, pmu->num_counters);
> > +
> > +       return 0;
> > +}
> > +
> > +void riscv_iommu_pmu_uninit(struct riscv_iommu_pmu *pmu)
> > +{
> > +       int idx;
> > +
> > +       /* Disable interrupt and functions */
> > +       for_each_set_bit(idx, pmu->used_counters, pmu->num_counters) {
> > +               riscv_iommu_pmu_disable_counter(pmu, idx);
> > +               riscv_iommu_pmu_disable_ovf_intr(pmu, idx);
> > +       }
> > +
> > +       perf_pmu_unregister(&pmu->pmu);
> > +}
> > diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
> > index b1c4664542b4..92659a8a75ae 100644
> > --- a/drivers/iommu/riscv/iommu.h
> > +++ b/drivers/iommu/riscv/iommu.h
> > @@ -60,11 +60,19 @@ struct riscv_iommu_device {
> >         unsigned int ddt_mode;
> >         dma_addr_t ddt_phys;
> >         u64 *ddt_root;
> > +
> > +       /* hardware performance monitor */
> > +       struct riscv_iommu_pmu pmu;
> >  };
> >
> >  int riscv_iommu_init(struct riscv_iommu_device *iommu);
> >  void riscv_iommu_remove(struct riscv_iommu_device *iommu);
> >
> > +int riscv_iommu_pmu_init(struct riscv_iommu_pmu *pmu, void __iomem *reg,
> > +                        const char *name);
> > +void riscv_iommu_pmu_uninit(struct riscv_iommu_pmu *pmu);
> > +irqreturn_t riscv_iommu_pmu_handle_irq(struct riscv_iommu_pmu *pmu);
> > +
> >  #define riscv_iommu_readl(iommu, addr) \
> >         readl_relaxed((iommu)->reg + (addr))
> >
> > --
> > 2.17.1
> >
Re: [External] [PATCH 1/2] iommu/riscv: add RISC-V IOMMU PMU support
Posted by Xu Lu 11 months ago
On Wed, Jan 15, 2025 at 3:49 PM Zong Li <zong.li@sifive.com> wrote:
>
> On Wed, Jan 15, 2025 at 11:45 AM Xu Lu <luxu.kernel@bytedance.com> wrote:
> >
> > Hi Zong,
> >
> > On Wed, Jan 15, 2025 at 11:03 AM Zong Li <zong.li@sifive.com> wrote:
> > >
> > > Support for the RISC-V IOMMU hardware performance monitor includes
> > > both counting and sampling modes.
> > >
> > > The specification does not define an event ID for counting the
> > > number of clock cycles, meaning there is no associated `iohpmevt0`.
> > > However, we need an event for counting cycle, so we reserve the
> > > maximum event ID for this purpose.
> > >
> > > Signed-off-by: Zong Li <zong.li@sifive.com>
> > > Tested-by: Xu Lu <luxu.kernel@bytedance.com>
> > > ---
> > >  drivers/iommu/riscv/Makefile     |   2 +-
> > >  drivers/iommu/riscv/iommu-bits.h |  16 +
> > >  drivers/iommu/riscv/iommu-pmu.c  | 486 +++++++++++++++++++++++++++++++
> > >  drivers/iommu/riscv/iommu.h      |   8 +
> > >  4 files changed, 511 insertions(+), 1 deletion(-)
> > >  create mode 100644 drivers/iommu/riscv/iommu-pmu.c
> > >
> > > diff --git a/drivers/iommu/riscv/Makefile b/drivers/iommu/riscv/Makefile
> > > index f54c9ed17d41..d36625a1fd08 100644
> > > --- a/drivers/iommu/riscv/Makefile
> > > +++ b/drivers/iommu/riscv/Makefile
> > > @@ -1,3 +1,3 @@
> > >  # SPDX-License-Identifier: GPL-2.0-only
> > > -obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o
> > > +obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o iommu-pmu.o
> > >  obj-$(CONFIG_RISCV_IOMMU_PCI) += iommu-pci.o
> > > diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
> > > index 98daf0e1a306..60523449f016 100644
> > > --- a/drivers/iommu/riscv/iommu-bits.h
> > > +++ b/drivers/iommu/riscv/iommu-bits.h
> > > @@ -17,6 +17,7 @@
> > >  #include <linux/types.h>
> > >  #include <linux/bitfield.h>
> > >  #include <linux/bits.h>
> > > +#include <linux/perf_event.h>
> > >
> > >  /*
> > >   * Chapter 5: Memory Mapped register interface
> > > @@ -207,6 +208,7 @@ enum riscv_iommu_ddtp_modes {
> > >  /* 5.22 Performance monitoring event counters (31 * 64bits) */
> > >  #define RISCV_IOMMU_REG_IOHPMCTR_BASE  0x0068
> > >  #define RISCV_IOMMU_REG_IOHPMCTR(_n)   (RISCV_IOMMU_REG_IOHPMCTR_BASE + ((_n) * 0x8))
> > > +#define RISCV_IOMMU_IOHPMCTR_COUNTER   GENMASK_ULL(63, 0)
> > >
> > >  /* 5.23 Performance monitoring event selectors (31 * 64bits) */
> > >  #define RISCV_IOMMU_REG_IOHPMEVT_BASE  0x0160
> > > @@ -250,6 +252,20 @@ enum riscv_iommu_hpmevent_id {
> > >         RISCV_IOMMU_HPMEVENT_MAX        = 9
> > >  };
> > >
> > > +/* Use maximum event ID for cycle event */
> > > +#define RISCV_IOMMU_HPMEVENT_CYCLE     GENMASK_ULL(14, 0)
> > > +
> > > +#define RISCV_IOMMU_HPM_COUNTER_NUM    32
> > > +
> > > +struct riscv_iommu_pmu {
> > > +       struct pmu pmu;
> > > +       void __iomem *reg;
> > > +       int num_counters;
> > > +       u64 mask_counter;
> > > +       struct perf_event *events[RISCV_IOMMU_IOHPMEVT_CNT + 1];
> > > +       DECLARE_BITMAP(used_counters, RISCV_IOMMU_IOHPMEVT_CNT + 1);
> > > +};
> > > +
> > >  /* 5.24 Translation request IOVA (64bits) */
> > >  #define RISCV_IOMMU_REG_TR_REQ_IOVA     0x0258
> > >  #define RISCV_IOMMU_TR_REQ_IOVA_VPN    GENMASK_ULL(63, 12)
> > > diff --git a/drivers/iommu/riscv/iommu-pmu.c b/drivers/iommu/riscv/iommu-pmu.c
> > > new file mode 100644
> > > index 000000000000..74eb1525cd32
> > > --- /dev/null
> > > +++ b/drivers/iommu/riscv/iommu-pmu.c
> > > @@ -0,0 +1,486 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +/*
> > > + * Copyright (C) 2024 SiFive
> > > + *
> > > + * Authors
> > > + *     Zong Li <zong.li@sifive.com>
> > > + */
> > > +
> > > +#include <linux/io-64-nonatomic-hi-lo.h>
> > > +
> > > +#include "iommu.h"
> > > +#include "iommu-bits.h"
> > > +
> > > +#define to_riscv_iommu_pmu(p) (container_of(p, struct riscv_iommu_pmu, pmu))
> > > +
> > > +#define RISCV_IOMMU_PMU_ATTR_EXTRACTOR(_name, _mask)                   \
> > > +       static inline u32 get_##_name(struct perf_event *event)         \
> > > +       {                                                               \
> > > +               return FIELD_GET(_mask, event->attr.config);            \
> > > +       }                                                               \
> > > +
> > > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(event, RISCV_IOMMU_IOHPMEVT_EVENTID);
> > > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(partial_matching, RISCV_IOMMU_IOHPMEVT_DMASK);
> > > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(pid_pscid, RISCV_IOMMU_IOHPMEVT_PID_PSCID);
> > > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(did_gscid, RISCV_IOMMU_IOHPMEVT_DID_GSCID);
> > > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_pid_pscid, RISCV_IOMMU_IOHPMEVT_PV_PSCV);
> > > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_did_gscid, RISCV_IOMMU_IOHPMEVT_DV_GSCV);
> > > +RISCV_IOMMU_PMU_ATTR_EXTRACTOR(filter_id_type, RISCV_IOMMU_IOHPMEVT_IDT);
> > > +
> > > +/* Formats */
> > > +PMU_FORMAT_ATTR(event, "config:0-14");
> > > +PMU_FORMAT_ATTR(partial_matching, "config:15");
> > > +PMU_FORMAT_ATTR(pid_pscid, "config:16-35");
> > > +PMU_FORMAT_ATTR(did_gscid, "config:36-59");
> > > +PMU_FORMAT_ATTR(filter_pid_pscid, "config:60");
> > > +PMU_FORMAT_ATTR(filter_did_gscid, "config:61");
> > > +PMU_FORMAT_ATTR(filter_id_type, "config:62");
> > > +
> > > +static struct attribute *riscv_iommu_pmu_formats[] = {
> > > +       &format_attr_event.attr,
> > > +       &format_attr_partial_matching.attr,
> > > +       &format_attr_pid_pscid.attr,
> > > +       &format_attr_did_gscid.attr,
> > > +       &format_attr_filter_pid_pscid.attr,
> > > +       &format_attr_filter_did_gscid.attr,
> > > +       &format_attr_filter_id_type.attr,
> > > +       NULL,
> > > +};
> > > +
> > > +static const struct attribute_group riscv_iommu_pmu_format_group = {
> > > +       .name = "format",
> > > +       .attrs = riscv_iommu_pmu_formats,
> > > +};
> > > +
> > > +/* Events */
> > > +static ssize_t riscv_iommu_pmu_event_show(struct device *dev,
> > > +                                         struct device_attribute *attr,
> > > +                                         char *page)
> > > +{
> > > +       struct perf_pmu_events_attr *pmu_attr;
> > > +
> > > +       pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
> > > +
> > > +       return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
> > > +}
> > > +
> > > +PMU_EVENT_ATTR(cycle, event_attr_cycle,
> > > +              RISCV_IOMMU_HPMEVENT_CYCLE, riscv_iommu_pmu_event_show);
> > > +PMU_EVENT_ATTR(dont_count, event_attr_dont_count,
> > > +              RISCV_IOMMU_HPMEVENT_INVALID, riscv_iommu_pmu_event_show);
> > > +PMU_EVENT_ATTR(untranslated_req, event_attr_untranslated_req,
> > > +              RISCV_IOMMU_HPMEVENT_URQ, riscv_iommu_pmu_event_show);
> > > +PMU_EVENT_ATTR(translated_req, event_attr_translated_req,
> > > +              RISCV_IOMMU_HPMEVENT_TRQ, riscv_iommu_pmu_event_show);
> > > +PMU_EVENT_ATTR(ats_trans_req, event_attr_ats_trans_req,
> > > +              RISCV_IOMMU_HPMEVENT_ATS_RQ, riscv_iommu_pmu_event_show);
> > > +PMU_EVENT_ATTR(tlb_miss, event_attr_tlb_miss,
> > > +              RISCV_IOMMU_HPMEVENT_TLB_MISS, riscv_iommu_pmu_event_show);
> > > +PMU_EVENT_ATTR(ddt_walks, event_attr_ddt_walks,
> > > +              RISCV_IOMMU_HPMEVENT_DD_WALK, riscv_iommu_pmu_event_show);
> > > +PMU_EVENT_ATTR(pdt_walks, event_attr_pdt_walks,
> > > +              RISCV_IOMMU_HPMEVENT_PD_WALK, riscv_iommu_pmu_event_show);
> > > +PMU_EVENT_ATTR(s_vs_pt_walks, event_attr_s_vs_pt_walks,
> > > +              RISCV_IOMMU_HPMEVENT_S_VS_WALKS, riscv_iommu_pmu_event_show);
> > > +PMU_EVENT_ATTR(g_pt_walks, event_attr_g_pt_walks,
> > > +              RISCV_IOMMU_HPMEVENT_G_WALKS, riscv_iommu_pmu_event_show);
> > > +
> > > +static struct attribute *riscv_iommu_pmu_events[] = {
> > > +       &event_attr_cycle.attr.attr,
> > > +       &event_attr_dont_count.attr.attr,
> > > +       &event_attr_untranslated_req.attr.attr,
> > > +       &event_attr_translated_req.attr.attr,
> > > +       &event_attr_ats_trans_req.attr.attr,
> > > +       &event_attr_tlb_miss.attr.attr,
> > > +       &event_attr_ddt_walks.attr.attr,
> > > +       &event_attr_pdt_walks.attr.attr,
> > > +       &event_attr_s_vs_pt_walks.attr.attr,
> > > +       &event_attr_g_pt_walks.attr.attr,
> > > +       NULL,
> > > +};
> > > +
> > > +static const struct attribute_group riscv_iommu_pmu_events_group = {
> > > +       .name = "events",
> > > +       .attrs = riscv_iommu_pmu_events,
> > > +};
> > > +
> > > +static const struct attribute_group *riscv_iommu_pmu_attr_grps[] = {
> > > +       &riscv_iommu_pmu_format_group,
> > > +       &riscv_iommu_pmu_events_group,
> > > +       NULL,
> > > +};
> > > +
> > > +/* PMU Operations */
> > > +static void riscv_iommu_pmu_set_counter(struct riscv_iommu_pmu *pmu, u32 idx,
> > > +                                       u64 value)
> > > +{
> > > +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES;
> > > +
> > > +       if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> > > +               return;
> > > +
> > > +       if (idx == 0)
> > > +               value = (value & ~RISCV_IOMMU_IOHPMCYCLES_OF) |
> > > +                        (readq(addr) & RISCV_IOMMU_IOHPMCYCLES_OF);
> > > +
> > > +       writeq(FIELD_PREP(RISCV_IOMMU_IOHPMCTR_COUNTER, value), addr + idx * 8);
> > > +}
> > > +
> > > +static u64 riscv_iommu_pmu_get_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> > > +{
> > > +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES;
> > > +       u64 value;
> > > +
> > > +       if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> > > +               return -EINVAL;
> > > +
> > > +       value = readq(addr + idx * 8);
> > > +
> > > +       if (idx == 0)
> > > +               return FIELD_GET(RISCV_IOMMU_IOHPMCYCLES_COUNTER, value);
> > > +
> > > +       return FIELD_GET(RISCV_IOMMU_IOHPMCTR_COUNTER, value);
> > > +}
> > > +
> > > +static u64 riscv_iommu_pmu_get_event(struct riscv_iommu_pmu *pmu, u32 idx)
> > > +{
> > > +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE;
> > > +
> > > +       if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> > > +               return 0;
> > > +
> > > +       /* There is no associtated IOHPMEVT0 for IOHPMCYCLES */
> > > +       if (idx == 0)
> > > +               return 0;
> > > +
> > > +       return readq(addr + (idx - 1) * 8);
> > > +}
> > > +
> > > +static void riscv_iommu_pmu_set_event(struct riscv_iommu_pmu *pmu, u32 idx,
> > > +                                     u64 value)
> > > +{
> > > +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE;
> > > +
> > > +       if (WARN_ON_ONCE(idx < 0 || idx > pmu->num_counters))
> > > +               return;
> > > +
> > > +       /* There is no associtated IOHPMEVT0 for IOHPMCYCLES */
> > > +       if (idx == 0)
> > > +               return;
> > > +
> > > +       writeq(value, addr + (idx - 1) * 8);
> > > +}
> > > +
> > > +static void riscv_iommu_pmu_enable_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> > > +{
> > > +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH;
> > > +       u32 value = readl(addr);
> > > +
> > > +       writel(value & ~BIT(idx), addr);
> > > +}
> > > +
> > > +static void riscv_iommu_pmu_disable_counter(struct riscv_iommu_pmu *pmu, u32 idx)
> > > +{
> > > +       void __iomem *addr = pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH;
> > > +       u32 value = readl(addr);
> > > +
> > > +       writel(value | BIT(idx), addr);
> > > +}
> > > +
> > > +static void riscv_iommu_pmu_enable_ovf_intr(struct riscv_iommu_pmu *pmu, u32 idx)
> > > +{
> > > +       u64 value;
> > > +
> > > +       if (get_event(pmu->events[idx]) == RISCV_IOMMU_HPMEVENT_CYCLE) {
> > > +               value = riscv_iommu_pmu_get_counter(pmu, idx) & ~RISCV_IOMMU_IOHPMCYCLES_OF;
> > > +               writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES);
> > > +       } else {
> > > +               value = riscv_iommu_pmu_get_event(pmu, idx) & ~RISCV_IOMMU_IOHPMEVT_OF;
> > > +               writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE + (idx - 1) * 8);
> > > +       }
> > > +}
> > > +
> > > +static void riscv_iommu_pmu_disable_ovf_intr(struct riscv_iommu_pmu *pmu, u32 idx)
> > > +{
> > > +       u64 value;
> > > +
> > > +       if (get_event(pmu->events[idx]) == RISCV_IOMMU_HPMEVENT_CYCLE) {
> > > +               value = riscv_iommu_pmu_get_counter(pmu, idx) | RISCV_IOMMU_IOHPMCYCLES_OF;
> > > +               writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMCYCLES);
> > > +       } else {
> > > +               value = riscv_iommu_pmu_get_event(pmu, idx) | RISCV_IOMMU_IOHPMEVT_OF;
> > > +               writeq(value, pmu->reg + RISCV_IOMMU_REG_IOHPMEVT_BASE + (idx - 1) * 8);
> > > +       }
> > > +}
> > > +
> > > +static void riscv_iommu_pmu_start_all(struct riscv_iommu_pmu *pmu)
> > > +{
> > > +       int idx;
> > > +
> > > +       for_each_set_bit(idx, pmu->used_counters, pmu->num_counters) {
> > > +               riscv_iommu_pmu_enable_ovf_intr(pmu, idx);
> > > +               riscv_iommu_pmu_enable_counter(pmu, idx);
> > > +       }
> > > +}
> > > +
> > > +static void riscv_iommu_pmu_stop_all(struct riscv_iommu_pmu *pmu)
> > > +{
> > > +       writel(GENMASK_ULL(pmu->num_counters - 1, 0),
> > > +              pmu->reg + RISCV_IOMMU_REG_IOCOUNTINH);
> > > +}
> > > +
> > > +/* PMU APIs */
> > > +static int riscv_iommu_pmu_set_period(struct perf_event *event)
> > > +{
> > > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > > +       struct hw_perf_event *hwc = &event->hw;
> > > +       s64 left = local64_read(&hwc->period_left);
> > > +       s64 period = hwc->sample_period;
> > > +       u64 max_period = pmu->mask_counter;
> > > +       int ret = 0;
> > > +
> > > +       if (unlikely(left <= -period)) {
> > > +               left = period;
> > > +               local64_set(&hwc->period_left, left);
> > > +               hwc->last_period = period;
> > > +               ret = 1;
> > > +       }
> > > +
> > > +       if (unlikely(left <= 0)) {
> > > +               left += period;
> > > +               local64_set(&hwc->period_left, left);
> > > +               hwc->last_period = period;
> > > +               ret = 1;
> > > +       }
> > > +
> > > +       /*
> > > +        * Limit the maximum period to prevent the counter value
> > > +        * from overtaking the one we are about to program. In
> > > +        * effect we are reducing max_period to account for
> > > +        * interrupt latency (and we are being very conservative).
> > > +        */
> > > +       if (left > (max_period >> 1))
> > > +               left = (max_period >> 1);
> > > +
> > > +       local64_set(&hwc->prev_count, (u64)-left);
> > > +       riscv_iommu_pmu_set_counter(pmu, hwc->idx, (u64)(-left) & max_period);
> > > +       perf_event_update_userpage(event);
> > > +
> > > +       return ret;
> > > +}
> > > +
> > > +static int riscv_iommu_pmu_event_init(struct perf_event *event)
> > > +{
> > > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > > +       struct hw_perf_event *hwc = &event->hw;
> > > +
> > > +       hwc->idx = -1;
> > > +       hwc->config = event->attr.config;
> > > +
> > > +       if (!is_sampling_event(event)) {
> > > +               /*
> > > +                * For non-sampling runs, limit the sample_period to half
> > > +                * of the counter width. That way, the new counter value
> > > +                * is far less likely to overtake the previous one unless
> > > +                * you have some serious IRQ latency issues.
> > > +                */
> > > +               hwc->sample_period = pmu->mask_counter >> 1;
> > > +               hwc->last_period = hwc->sample_period;
> > > +               local64_set(&hwc->period_left, hwc->sample_period);
> > > +       }
> > > +
> > > +       return 0;
> > > +}
> > > +
> > > +static void riscv_iommu_pmu_update(struct perf_event *event)
> > > +{
> > > +       struct hw_perf_event *hwc = &event->hw;
> > > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > > +       u64 delta, prev, now;
> > > +       u32 idx = hwc->idx;
> > > +
> > > +       do {
> > > +               prev = local64_read(&hwc->prev_count);
> > > +               now = riscv_iommu_pmu_get_counter(pmu, idx);
> > > +       } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
> > > +
> > > +       delta = FIELD_GET(RISCV_IOMMU_IOHPMCTR_COUNTER, now - prev) & pmu->mask_counter;
> > > +       local64_add(delta, &event->count);
> > > +       local64_sub(delta, &hwc->period_left);
> > > +}
> > > +
> > > +static void riscv_iommu_pmu_start(struct perf_event *event, int flags)
> > > +{
> > > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > > +       struct hw_perf_event *hwc = &event->hw;
> > > +
> > > +       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
> > > +               return;
> > > +
> > > +       if (flags & PERF_EF_RELOAD)
> > > +               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
> > > +
> > > +       hwc->state = 0;
> > > +       riscv_iommu_pmu_set_period(event);
> > > +       riscv_iommu_pmu_set_event(pmu, hwc->idx, hwc->config);
> > > +       riscv_iommu_pmu_enable_ovf_intr(pmu, hwc->idx);
> > > +       riscv_iommu_pmu_enable_counter(pmu, hwc->idx);
> > > +
> > > +       perf_event_update_userpage(event);
> > > +}
> > > +
> > > +static void riscv_iommu_pmu_stop(struct perf_event *event, int flags)
> > > +{
> > > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > > +       struct hw_perf_event *hwc = &event->hw;
> > > +
> > > +       if (hwc->state & PERF_HES_STOPPED)
> > > +               return;
> > > +
> > > +       riscv_iommu_pmu_set_event(pmu, hwc->idx, RISCV_IOMMU_HPMEVENT_INVALID);
> > > +       riscv_iommu_pmu_disable_counter(pmu, hwc->idx);
> > > +
> > > +       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE))
> > > +               riscv_iommu_pmu_update(event);
> > > +
> > > +       hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
> > > +}
> > > +
> > > +static int riscv_iommu_pmu_add(struct perf_event *event, int flags)
> > > +{
> > > +       struct hw_perf_event *hwc = &event->hw;
> > > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > > +       unsigned int num_counters = pmu->num_counters;
> > > +       int idx;
> > > +
> > > +       /* Reserve index zero for iohpmcycles */
> > > +       if (get_event(event) == RISCV_IOMMU_HPMEVENT_CYCLE)
> > > +               idx = 0;
> > > +       else
> > > +               idx = find_next_zero_bit(pmu->used_counters, num_counters, 1);
> > > +
> > > +       if (idx == num_counters)
> > > +               return -EAGAIN;
> > > +
> > > +       set_bit(idx, pmu->used_counters);
> > > +
> > > +       pmu->events[idx] = event;
> > > +       hwc->idx = idx;
> > > +       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
> > > +
> > > +       if (flags & PERF_EF_START)
> > > +               riscv_iommu_pmu_start(event, flags);
> > > +
> > > +       /* Propagate changes to the userspace mapping. */
> > > +       perf_event_update_userpage(event);
> > > +
> > > +       return 0;
> > > +}
> > > +
> > > +static void riscv_iommu_pmu_read(struct perf_event *event)
> > > +{
> > > +       riscv_iommu_pmu_update(event);
> > > +}
> > > +
> > > +static void riscv_iommu_pmu_del(struct perf_event *event, int flags)
> > > +{
> > > +       struct hw_perf_event *hwc = &event->hw;
> > > +       struct riscv_iommu_pmu *pmu = to_riscv_iommu_pmu(event->pmu);
> > > +       int idx = hwc->idx;
> > > +
> > > +       riscv_iommu_pmu_stop(event, PERF_EF_UPDATE);
> > > +       pmu->events[idx] = NULL;
> > > +       clear_bit(idx, pmu->used_counters);
> > > +       perf_event_update_userpage(event);
> > > +}
> > > +
> > > +irqreturn_t riscv_iommu_pmu_handle_irq(struct riscv_iommu_pmu *pmu)
> > > +{
> > > +       struct perf_sample_data data;
> > > +       struct pt_regs *regs;
> > > +       u32 ovf = readl(pmu->reg + RISCV_IOMMU_REG_IOCOUNTOVF);
> > > +       int idx;
> > > +
> > > +       if (!ovf)
> > > +               return IRQ_NONE;
> > > +
> > > +       riscv_iommu_pmu_stop_all(pmu);
> > > +
> > > +       regs = get_irq_regs();
> > > +
> > > +       for_each_set_bit(idx, (unsigned long *)&ovf, pmu->num_counters) {
> > > +               struct perf_event *event = pmu->events[idx];
> > > +               struct hw_perf_event *hwc;
> > > +
> > > +               if (WARN_ON_ONCE(!event) || !is_sampling_event(event))
> > > +                       continue;
> > > +
> > > +               hwc = &event->hw;
> > > +
> > > +               riscv_iommu_pmu_update(event);
> > > +               perf_sample_data_init(&data, 0, hwc->last_period);
> > > +               if (!riscv_iommu_pmu_set_period(event))
> > > +                       continue;
> > > +
> > > +               if (perf_event_overflow(event, &data, regs))
> > > +                       riscv_iommu_pmu_stop(event, 0);
> > > +       }
> > > +
> > > +       riscv_iommu_pmu_start_all(pmu);
> > > +
> > > +       return IRQ_HANDLED;
> > > +}
> > > +
> > > +int riscv_iommu_pmu_init(struct riscv_iommu_pmu *pmu, void __iomem *reg,
> > > +                        const char *dev_name)
> > > +{
> > > +       char *name;
> > > +       int ret;
> > > +
> > > +       pmu->reg = reg;
> > > +       pmu->num_counters = RISCV_IOMMU_HPM_COUNTER_NUM;
> > > +       pmu->mask_counter = RISCV_IOMMU_IOHPMCTR_COUNTER;
> > > +
> > > +       pmu->pmu = (struct pmu) {
> > > +               .task_ctx_nr    = perf_invalid_context,
> > > +               .event_init     = riscv_iommu_pmu_event_init,
> > > +               .add            = riscv_iommu_pmu_add,
> > > +               .del            = riscv_iommu_pmu_del,
> > > +               .start          = riscv_iommu_pmu_start,
> > > +               .stop           = riscv_iommu_pmu_stop,
> > > +               .read           = riscv_iommu_pmu_read,
> > > +               .attr_groups    = riscv_iommu_pmu_attr_grps,
> > > +               .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
> > > +               .module         = THIS_MODULE,
> > > +       };
> > > +
> > > +       name = kasprintf(GFP_KERNEL, "riscv_iommu_pmu_%s", dev_name);
> >
> > The dev_name of RISCV IOMMU is usually 'riscv,iommu'. If we compose
> > the iommu pmu name of iommu dev name, then maybe perf subsystem can
> > not handle the pmu event name correctly as the exists ',' in it.
>
> I assume you are referring to compatible string because 'riscv,iommu'
> appears to be the compatible string. However, it seems to me that the
> dev_name is derived from the node name rather than the compatible
> string. As a result, there is no comma (',') symbol; instead, a dot
> ('.') symbol is used. For example, if the IOMMU node in the device
> tree is 'iommu@12345678', the dev_name would be '12345678.iommu'.
> Please let me know if I missed anything. Thanks
>

Tested it on qemu. You are right. Please ignore me. Thanks!

> >
> > Best Regards,
> >
> > Xu Lu
> >
> > > +
> > > +       ret = perf_pmu_register(&pmu->pmu, name, -1);
> > > +       if (ret) {
> > > +               pr_err("Failed to register riscv_iommu_pmu_%s: %d\n",
> > > +                      dev_name, ret);
> > > +               return ret;
> > > +       }
> > > +
> > > +       /* Stop all counters and later start the counter with perf */
> > > +       riscv_iommu_pmu_stop_all(pmu);
> > > +
> > > +       pr_info("riscv_iommu_pmu_%s: Registered with %d counters\n",
> > > +               dev_name, pmu->num_counters);
> > > +
> > > +       return 0;
> > > +}
> > > +
> > > +void riscv_iommu_pmu_uninit(struct riscv_iommu_pmu *pmu)
> > > +{
> > > +       int idx;
> > > +
> > > +       /* Disable interrupt and functions */
> > > +       for_each_set_bit(idx, pmu->used_counters, pmu->num_counters) {
> > > +               riscv_iommu_pmu_disable_counter(pmu, idx);
> > > +               riscv_iommu_pmu_disable_ovf_intr(pmu, idx);
> > > +       }
> > > +
> > > +       perf_pmu_unregister(&pmu->pmu);
> > > +}
> > > diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
> > > index b1c4664542b4..92659a8a75ae 100644
> > > --- a/drivers/iommu/riscv/iommu.h
> > > +++ b/drivers/iommu/riscv/iommu.h
> > > @@ -60,11 +60,19 @@ struct riscv_iommu_device {
> > >         unsigned int ddt_mode;
> > >         dma_addr_t ddt_phys;
> > >         u64 *ddt_root;
> > > +
> > > +       /* hardware performance monitor */
> > > +       struct riscv_iommu_pmu pmu;
> > >  };
> > >
> > >  int riscv_iommu_init(struct riscv_iommu_device *iommu);
> > >  void riscv_iommu_remove(struct riscv_iommu_device *iommu);
> > >
> > > +int riscv_iommu_pmu_init(struct riscv_iommu_pmu *pmu, void __iomem *reg,
> > > +                        const char *name);
> > > +void riscv_iommu_pmu_uninit(struct riscv_iommu_pmu *pmu);
> > > +irqreturn_t riscv_iommu_pmu_handle_irq(struct riscv_iommu_pmu *pmu);
> > > +
> > >  #define riscv_iommu_readl(iommu, addr) \
> > >         readl_relaxed((iommu)->reg + (addr))
> > >
> > > --
> > > 2.17.1
> > >