usage: trace-sample=N
Allow to use sampling (every N instructions) for tracking the stack.
We implement a fast mode, where instrumentation is only per tb, and
simply dump current stack, and unwind new one, instead of tracking every
frame pointer change.
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
---
contrib/plugins/uftrace.c | 111 +++++++++++++++++++++++++++++++++++---
1 file changed, 105 insertions(+), 6 deletions(-)
diff --git a/contrib/plugins/uftrace.c b/contrib/plugins/uftrace.c
index d55eb32c5e8..1f3b8aaf828 100644
--- a/contrib/plugins/uftrace.c
+++ b/contrib/plugins/uftrace.c
@@ -43,6 +43,9 @@ typedef struct {
typedef struct Cpu {
uint64_t insn_count;
+ uint64_t sample_insn_count;
+ uint64_t sample_timestamp;
+ callstack *sample_cs;
trace *trace;
callstack *cs;
GArray *callstacks; /* callstack *callstacks[] */
@@ -69,6 +72,7 @@ enum uftrace_record_type {
};
static struct qemu_plugin_scoreboard *score;
+static uint64_t trace_sample;
static CpuOps arch_ops;
static void uftrace_write_map(bool system_emulation)
@@ -198,6 +202,13 @@ static callstack *callstack_new(void)
return cs;
}
+static callstack *callstack_clone(const callstack *cs)
+{
+ callstack *clone = g_malloc0(sizeof(callstack));
+ clone->s = g_array_copy(cs->s);
+ return clone;
+}
+
static void callstack_free(callstack *cs)
{
g_array_free(cs->s, true);
@@ -390,6 +401,26 @@ static void cpu_unwind_stack(Cpu *cpu, uint64_t frame_pointer, uint64_t pc)
}
}
+static void cpu_trace_last_sample(Cpu *cpu, uint64_t timestamp)
+{
+ if (!cpu->sample_cs) {
+ return;
+ }
+ uint64_t elapsed = timestamp - cpu->sample_timestamp;
+ uint64_t middle_timestamp = cpu->sample_timestamp + (elapsed / 2);
+ trace_exit_stack(cpu->trace, cpu->sample_cs, middle_timestamp);
+ callstack_free(cpu->sample_cs);
+ cpu->sample_cs = NULL;
+ trace_enter_stack(cpu->trace, cpu->cs, middle_timestamp);
+}
+
+static void cpu_set_new_sample(Cpu *cpu, uint64_t timestamp)
+{
+ cpu->sample_insn_count = 0;
+ cpu->sample_cs = callstack_clone(cpu->cs);
+ cpu->sample_timestamp = timestamp;
+}
+
static uint64_t cpu_get_timestamp(const Cpu *cpu)
{
return cpu->insn_count;
@@ -450,6 +481,13 @@ static void track_callstack(unsigned int cpu_index, void *udata)
callstack *cs = cpu->cs;
trace *t = cpu->trace;
+ if (trace_sample && cpu->sample_insn_count >= trace_sample) {
+ cpu_trace_last_sample(cpu, timestamp);
+ cpu_set_new_sample(cpu, timestamp);
+ }
+
+ bool trace_change = !trace_sample;
+
uint64_t fp = cpu->ops.get_frame_pointer(cpu);
if (!fp && callstack_empty(cs)) {
/*
@@ -458,7 +496,9 @@ static void track_callstack(unsigned int cpu_index, void *udata)
*/
callstack_push(cs, (callstack_entry){.frame_pointer = fp,
.pc = pc});
- trace_enter_function(t, timestamp, pc, callstack_depth(cs));
+ if (trace_change) {
+ trace_enter_function(t, timestamp, pc, callstack_depth(cs));
+ }
return;
}
@@ -472,7 +512,9 @@ static void track_callstack(unsigned int cpu_index, void *udata)
if (fp == caller.frame_pointer) {
/* return */
callstack_entry e = callstack_pop(cs);
- trace_exit_function(t, timestamp, e.pc, callstack_depth(cs));
+ if (trace_change) {
+ trace_exit_function(t, timestamp, e.pc, callstack_depth(cs));
+ }
return;
}
@@ -481,27 +523,65 @@ static void track_callstack(unsigned int cpu_index, void *udata)
/* call */
callstack_push(cs, (callstack_entry){.frame_pointer = fp,
.pc = pc});
- trace_enter_function(t, timestamp, pc, callstack_depth(cs));
+ if (trace_change) {
+ trace_enter_function(t, timestamp, pc, callstack_depth(cs));
+ }
return;
}
/* discontinuity, exit current stack and unwind new one */
- trace_exit_stack(t, cs, timestamp);
+ if (trace_change) {
+ trace_exit_stack(t, cs, timestamp);
+ }
callstack_clear(cs);
cpu_unwind_stack(cpu, fp, pc);
- trace_enter_stack(t, cs, timestamp);
+ if (trace_change) {
+ trace_enter_stack(t, cs, timestamp);
+ }
+}
+
+static void sample_callstack(unsigned int cpu_index, void *udata)
+{
+ uint64_t pc = (uintptr_t) udata;
+ Cpu *cpu = qemu_plugin_scoreboard_find(score, cpu_index);
+ uint64_t timestamp = cpu_get_timestamp(cpu);
+
+ trace_exit_stack(cpu->trace, cpu->cs, timestamp);
+ callstack_clear(cpu->cs);
+
+ cpu_unwind_stack(cpu, cpu->ops.get_frame_pointer(cpu), pc);
+ trace_enter_stack(cpu->trace, cpu->cs, timestamp);
+
+ /* reset counter */
+ cpu->sample_insn_count = 0;
}
static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
{
size_t n_insns = qemu_plugin_tb_n_insns(tb);
+ uintptr_t tb_pc = qemu_plugin_tb_vaddr(tb);
qemu_plugin_u64 insn_count = qemu_plugin_scoreboard_u64_in_struct(
score, Cpu, insn_count);
+ qemu_plugin_u64 sample_insn_count = qemu_plugin_scoreboard_u64_in_struct(
+ score, Cpu, sample_insn_count);
+
+ if (trace_sample) {
+ /* We can do a light instrumentation, per tb only */
+ qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
+ tb, QEMU_PLUGIN_INLINE_ADD_U64, insn_count, n_insns);
+ qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
+ tb, QEMU_PLUGIN_INLINE_ADD_U64, sample_insn_count, n_insns);
+ qemu_plugin_register_vcpu_tb_exec_cond_cb(
+ tb, sample_callstack, QEMU_PLUGIN_CB_R_REGS,
+ QEMU_PLUGIN_COND_GE, sample_insn_count, trace_sample,
+ (void *) tb_pc);
+ return;
+ }
/*
- * We instrument all instructions following one that might have updated
+ * We now instrument all instructions following one that might have updated
* the frame pointer. We always instrument first instruction in block, as
* last executed instruction, in previous tb, may have modified it.
*/
@@ -510,6 +590,8 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(
insn, QEMU_PLUGIN_INLINE_ADD_U64, insn_count, 1);
+ qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(
+ insn, QEMU_PLUGIN_INLINE_ADD_U64, sample_insn_count, 1);
if (instrument_insn) {
uintptr_t pc = qemu_plugin_insn_vaddr(insn);
@@ -535,6 +617,7 @@ static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index)
cpu->buf = g_byte_array_new();
cpu->callstacks = g_array_new(0, 0, sizeof(callstack *));
cpu->traces = g_array_new(0, 0, sizeof(trace *));
+ cpu->sample_timestamp = cpu_get_timestamp(cpu);
g_assert(vcpu_index < 1000);
uint32_t trace_id = 1000 * 1000 + vcpu_index * 1000;
@@ -612,6 +695,22 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
const qemu_info_t *info,
int argc, char **argv)
{
+ for (int i = 0; i < argc; i++) {
+ char *opt = argv[i];
+ g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
+ if (g_strcmp0(tokens[0], "trace-sample") == 0) {
+ gint64 value = g_ascii_strtoll(tokens[1], NULL, 10);
+ if (value <= 0) {
+ fprintf(stderr, "bad trace-sample value: %s\n", tokens[1]);
+ return -1;
+ }
+ trace_sample = value;
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", opt);
+ return -1;
+ }
+ }
+
if (!strcmp(info->target_name, "aarch64")) {
arch_ops = aarch64_ops;
} else {
--
2.47.2
© 2016 - 2025 Red Hat, Inc.