[RFC LINUX PATCH v1 3/3] perf kvm: implement Xen hypervisor stacktraces

Edwin Török posted 10 patches 4 months, 3 weeks ago
[RFC LINUX PATCH v1 3/3] perf kvm: implement Xen hypervisor stacktraces
Posted by Edwin Török 4 months, 3 weeks ago
Using the new VPMU 0.2 interface.
This is backwards compatible with VPMU 0.1:
the new 'struct xen_pmu_hv_stacktrace` is stored at the end of the page,
and stacktrace_nr would be 0 on old hypervisors.

Signed-off-by: Edwin Török <edwin.torok@cloud.com>
---
 arch/x86/events/core.c |  4 ++-
 arch/x86/xen/pmu.c     | 73 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index ad63bd408cd9..1fca4a77f353 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2764,12 +2764,14 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
 	struct unwind_state state;
 	unsigned long addr;
 
+	perf_hypervisor_callchain(entry, regs);
+
 	if (perf_guest_state()) {
 		/* TODO: We don't support guest os callchain now */
 		return;
 	}
 
-	if (perf_callchain_store(entry, regs->ip))
+	if (!regs->ip || perf_callchain_store(entry, regs->ip))
 		return;
 
 	if (perf_hw_regs(regs))
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index b92dc739fdfb..4996b6904e0b 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -19,11 +19,13 @@
 struct xenpmu {
 	/* Shared page between hypervisor and domain */
 	struct xen_pmu_data *xenpmu_data;
+	const struct xen_pmu_hv_stacktrace *xenpmu_hv_stacktrace;
 
 	uint8_t flags;
 };
 static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared);
 #define get_xenpmu_data()    (this_cpu_ptr(&xenpmu_shared)->xenpmu_data)
+#define get_xenpmu_hv_stacktrace()    (this_cpu_ptr(&xenpmu_shared)->xenpmu_hv_stacktrace)
 #define get_xenpmu_flags()   (this_cpu_ptr(&xenpmu_shared)->flags)
 
 /* Macro for computing address of a PMU MSR bank */
@@ -436,8 +438,19 @@ static unsigned int xen_guest_state(void)
 		return state;
 	}
 
-	if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
-		return state;
+	if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF)) {
+		if (xenpmu_data->domain_id == DOMID_XEN) {
+			/* when inside Xen we output the hypervisor stacktrace if available,
+			 * but only look at guest stacktrace if this is our domid
+			 */
+			const struct xen_pmu_hv_stacktrace *xenpmu_hv_stacktrace =
+				get_xenpmu_hv_stacktrace();
+			if (!xenpmu_hv_stacktrace ||
+			    xenpmu_hv_stacktrace->guest_domain_id == DOMID_SELF)
+				return state;
+		} else
+			return state;
+	}
 
 	state |= PERF_GUEST_ACTIVE;
 
@@ -463,10 +476,54 @@ static unsigned long xen_get_guest_ip(void)
 	return xenpmu_data->pmu.r.regs.ip;
 }
 
+static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
+			     struct pt_regs *regs, uint64_t pmu_flags);
+
+static void xen_hypervisor_callchain(struct perf_callchain_entry_ctx *entry,
+				     struct pt_regs *regs)
+{
+	if (!entry)
+		return;
+
+	const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+	if (!xenpmu_data) {
+		pr_warn_once("%s: pmudata not initialized\n", __func__);
+		return;
+	}
+
+	if (xenpmu_data->domain_id != DOMID_XEN)
+		return;
+
+	if (!regs->ip || perf_callchain_store(entry, regs->ip))
+		return;
+
+	const struct xen_pmu_hv_stacktrace *pmu_stack =
+		get_xenpmu_hv_stacktrace();
+
+	const unsigned int stacktrace_nr = pmu_stack->stacktrace_nr;
+
+	if (stacktrace_nr > ARRAY_SIZE(pmu_stack->stacktrace)) {
+		pr_warn_once("%s: stacktrace_nr out of bounds: %d", __func__,
+			     stacktrace_nr);
+		return;
+	}
+
+	for (unsigned int i = 0; i < stacktrace_nr; i++) {
+		uint64_t addr =
+			pmu_stack->stacktrace[PMU_MAX_STACKTRACE - 1 - i];
+		if (!addr || perf_callchain_store(entry, addr))
+			break;
+	}
+
+	xen_convert_regs(&pmu_stack->guest.r.regs, regs,
+			 xenpmu_data->pmu.pmu_flags);
+}
+
 static struct perf_guest_info_callbacks xen_guest_cbs = {
 	.state                  = xen_guest_state,
 	.get_ip			= xen_get_guest_ip,
-	.hypervisor_callchain   = NULL
+	.hypervisor_callchain   = xen_hypervisor_callchain
 };
 
 /* Convert registers from Xen's format to Linux' */
@@ -490,7 +547,6 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
 	}
 }
 
-
 irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
 {
 	int err, ret = IRQ_NONE;
@@ -527,7 +583,7 @@ void xen_pmu_init(int cpu)
 {
 	int err;
 	struct xen_pmu_params xp;
-	unsigned long pfn;
+	unsigned long pfn, pmu_page;
 	struct xen_pmu_data *xenpmu_data;
 
 	BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
@@ -535,7 +591,8 @@ void xen_pmu_init(int cpu)
 	if (xen_hvm_domain() || (cpu != 0 && !is_xen_pmu))
 		return;
 
-	xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
+	pmu_page = get_zeroed_page(GFP_KERNEL);
+	xenpmu_data = (struct xen_pmu_data *)pmu_page;
 	if (!xenpmu_data) {
 		pr_err("VPMU init: No memory\n");
 		return;
@@ -551,6 +608,10 @@ void xen_pmu_init(int cpu)
 		goto fail;
 
 	per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
+	per_cpu(xenpmu_shared, cpu).xenpmu_hv_stacktrace =
+		(const struct xen_pmu_hv_stacktrace *)
+			    (pmu_page + PAGE_SIZE -
+			    sizeof(struct xen_pmu_hv_stacktrace));
 	per_cpu(xenpmu_shared, cpu).flags = 0;
 
 	if (!is_xen_pmu) {
-- 
2.47.1