[PATCH 4/8] iommu/vt-d: Add trace events for PASID entry sync updates

Lu Baolu posted 8 patches 1 month ago
[PATCH 4/8] iommu/vt-d: Add trace events for PASID entry sync updates
Posted by Lu Baolu 1 month ago
The entry_sync library introduces a more complex, multi-step update
process for PASID table entries to enable hitless transitions. Add a
set of trace events specifically for the Intel PASID sync plumbing.

The implemented trace events introduce:

- entry_write_start / entry_write_complete: Captures the state of the
  512-bit PASID entry before and after the entry_sync library performs
  its update logic. This allows verification of the final output compared
  to the target.

- entry_get_used: Logs the current entry alongside the calculated "used
  bits" mask. This is critical for debugging the library's decision-making
  process regarding whether an update can be hitless or must be disruptive.

- entry_sync: Tracks the state transitions (was_present vs. is_present)
  within the entry_sync callback. This helps verify that the correct cache
  invalidations and IOTLB flushes are being triggered for specific
  transitions (e.g., P=1 to P=1 hitless vs. P=1 to P=0 disruptive).

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/intel/trace.h | 107 ++++++++++++++++++++++++++++++++++++
 drivers/iommu/intel/pasid.c |  11 +++-
 2 files changed, 117 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/trace.h b/drivers/iommu/intel/trace.h
index 6311ba3f1691..b0ccda6f8dc5 100644
--- a/drivers/iommu/intel/trace.h
+++ b/drivers/iommu/intel/trace.h
@@ -181,6 +181,113 @@ DEFINE_EVENT(cache_tag_flush, cache_tag_flush_range_np,
 		 unsigned long addr, unsigned long pages, unsigned long mask),
 	TP_ARGS(tag, start, end, addr, pages, mask)
 );
+
+DECLARE_EVENT_CLASS(entry_write,
+	TP_PROTO(struct device *dev, u32 pasid, u128 *target, u128 *curr),
+	TP_ARGS(dev, pasid, target, curr),
+
+	TP_STRUCT__entry(
+		__string(dev, dev_name(dev))
+		__field(u32, pasid)
+		__field(u64, t_w3)
+		__field(u64, t_w2)
+		__field(u64, t_w1)
+		__field(u64, t_w0)
+		__field(u64, c_w3)
+		__field(u64, c_w2)
+		__field(u64, c_w1)
+		__field(u64, c_w0)
+	),
+
+	TP_fast_assign(
+		__assign_str(dev);
+		__entry->pasid = pasid;
+		/* Target Entry */
+		__entry->t_w0 = (u64)target[0];
+		__entry->t_w1 = (u64)(target[0] >> 64);
+		__entry->t_w2 = (u64)target[1];
+		__entry->t_w3 = (u64)(target[1] >> 64);
+		/* Current Entry */
+		__entry->c_w0 = (u64)curr[0];
+		__entry->c_w1 = (u64)(curr[0] >> 64);
+		__entry->c_w2 = (u64)curr[1];
+		__entry->c_w3 = (u64)(curr[1] >> 64);
+	),
+
+	TP_printk("%s[%u] target %016llx:%016llx:%016llx:%016llx, current %016llx:%016llx:%016llx:%016llx",
+		  __get_str(dev), __entry->pasid,
+		  __entry->t_w3, __entry->t_w2, __entry->t_w1, __entry->t_w0,
+		  __entry->c_w3, __entry->c_w2, __entry->c_w1, __entry->c_w0
+	)
+);
+
+DEFINE_EVENT(entry_write, entry_write_start,
+	TP_PROTO(struct device *dev, u32 pasid, u128 *target, u128 *curr),
+	TP_ARGS(dev, pasid, target, curr)
+);
+
+DEFINE_EVENT(entry_write, entry_write_complete,
+	TP_PROTO(struct device *dev, u32 pasid, u128 *target, u128 *curr),
+	TP_ARGS(dev, pasid, target, curr)
+);
+
+TRACE_EVENT(entry_get_used,
+	TP_PROTO(const u128 *pe, u128 *used),
+	TP_ARGS(pe, used),
+
+	TP_STRUCT__entry(
+		__field(u64, e_w3)
+		__field(u64, e_w2)
+		__field(u64, e_w1)
+		__field(u64, e_w0)
+		__field(u64, u_w3)
+		__field(u64, u_w2)
+		__field(u64, u_w1)
+		__field(u64, u_w0)
+	),
+
+	TP_fast_assign(
+		__entry->e_w0 = (u64)pe[0];
+		__entry->e_w1 = (u64)(pe[0] >> 64);
+		__entry->e_w2 = (u64)pe[1];
+		__entry->e_w3 = (u64)(pe[1] >> 64);
+
+		__entry->u_w0 = (u64)used[0];
+		__entry->u_w1 = (u64)(used[0] >> 64);
+		__entry->u_w2 = (u64)used[1];
+		__entry->u_w3 = (u64)(used[1] >> 64);
+	),
+
+	TP_printk("entry %016llx:%016llx:%016llx:%016llx, used %016llx:%016llx:%016llx:%016llx",
+		  __entry->e_w3, __entry->e_w2, __entry->e_w1, __entry->e_w0,
+		  __entry->u_w3, __entry->u_w2, __entry->u_w1, __entry->u_w0
+	)
+);
+
+TRACE_EVENT(entry_sync,
+	TP_PROTO(struct device *dev, u32 pasid, bool was_present, bool is_present),
+	TP_ARGS(dev, pasid, was_present, is_present),
+
+	TP_STRUCT__entry(
+		__string(dev, dev_name(dev))
+		__field(u32, pasid)
+		__field(bool, was_present)
+		__field(bool, is_present)
+	),
+
+	TP_fast_assign(
+		__assign_str(dev);
+		__entry->pasid = pasid;
+		__entry->was_present = was_present;
+		__entry->is_present = is_present;
+	),
+
+	TP_printk("%s[%u] was %s, is now %s",
+		  __get_str(dev), __entry->pasid,
+		  __entry->was_present ? "present" : "non-present",
+		  __entry->is_present ? "present" : "non-present"
+	)
+);
 #endif /* _TRACE_INTEL_IOMMU_H */
 
 /* This part must be outside protection */
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 5b9eb5c8f42d..b7c8888afaef 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -20,6 +20,7 @@
 
 #include "iommu.h"
 #include "pasid.h"
+#include "trace.h"
 #include "../iommu-pages.h"
 #include "../entry_sync.h"
 
@@ -68,8 +69,10 @@ static void intel_pasid_get_used(const u128 *entry, u128 *used)
 	ue->val[0] |= PASID_PTE_PRESENT;
 
 	/* Nothing more for non-present entries. */
-	if (!(pe->val[0] & PASID_PTE_PRESENT))
+	if (!(pe->val[0] & PASID_PTE_PRESENT)) {
+		trace_entry_get_used(entry, used);
 		return;
+	}
 
 	pgtt = pasid_pte_get_pgtt(pe);
 	switch (pgtt) {
@@ -107,6 +110,8 @@ static void intel_pasid_get_used(const u128 *entry, u128 *used)
 	default:
 		WARN_ON(true);
 	}
+
+	trace_entry_get_used(entry, used);
 }
 
 static void intel_pasid_sync(struct entry_sync_writer128 *writer)
@@ -132,6 +137,8 @@ static void intel_pasid_sync(struct entry_sync_writer128 *writer)
 	if (!ecap_coherent(iommu->ecap))
 		clflush_cache_range(pte, sizeof(*pte));
 
+	trace_entry_sync(dev, pasid, was_present, is_present);
+
 	/* Sync for "P=0" to "P=1": */
 	if (!was_present) {
 		if (is_present)
@@ -195,7 +202,9 @@ static int __maybe_unused intel_pasid_write(struct intel_iommu *iommu,
 	 * 1. Checks if it can do a 1-quanta hitless flip.
 	 * 2. If not, it does a 3-step V=0 (disruptive) update.
 	 */
+	trace_entry_write_start(dev, pasid, target, (u128 *)pte);
 	entry_sync_write128(&p_writer.writer, (u128 *)pte, target, memory, sizeof(memory));
+	trace_entry_write_complete(dev, pasid, target, (u128 *)pte);
 
 	return 0;
 }
-- 
2.43.0