We implement tracing, following uftrace format.
Trace is flushed every 32 MB, so file operations don't impact
performance at runtime.
A different trace is generated per cpu, and we ensure they have a unique
name, based on vcpu_index, while keeping room for privilege level coming
in next commit.
Uftrace format is not officially documented, but it can be found here:
https://github.com/namhyung/uftrace/blob/v0.18/libmcount/record.c#L909
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
---
contrib/plugins/uftrace.c | 150 +++++++++++++++++++++++++++++++++++++-
1 file changed, 149 insertions(+), 1 deletion(-)
diff --git a/contrib/plugins/uftrace.c b/contrib/plugins/uftrace.c
index bb775916270..830967c215b 100644
--- a/contrib/plugins/uftrace.c
+++ b/contrib/plugins/uftrace.c
@@ -12,6 +12,13 @@
#include <qemu-plugin.h>
#include <glib.h>
#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#define MiB (INT64_C(1) << 20)
+#define NANOSECONDS_PER_SECOND 1000000000LL
QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
@@ -24,6 +31,13 @@ typedef struct {
uint64_t frame_pointer;
} CallstackEntry;
+typedef struct {
+ GArray *t;
+ GString *path;
+ GString *name;
+ uint32_t id;
+} Trace;
+
typedef struct Cpu Cpu;
typedef struct {
@@ -34,6 +48,7 @@ typedef struct {
} CpuOps;
typedef struct Cpu {
+ Trace *trace;
Callstack *cs;
GByteArray *buf;
CpuOps ops;
@@ -44,9 +59,41 @@ typedef struct {
struct qemu_plugin_register *reg_fp;
} Aarch64Cpu;
+typedef struct {
+ uint64_t timestamp;
+ uint64_t data;
+} UftraceEntry;
+
+typedef enum {
+ UFTRACE_ENTRY,
+ UFTRACE_EXIT,
+ UFTRACE_LOST,
+ UFTRACE_EVENT
+} UftraceRecordType;
+
static struct qemu_plugin_scoreboard *score;
static CpuOps arch_ops;
+static uint64_t gettime_ns(void)
+{
+#ifdef _WIN32
+ /*
+ * On Windows, timespec_get is available only with UCRT, but not with
+ * MinGW64 environment. Simplify by using only gettimeofday on this
+ * platform. This may result in a precision loss.
+ */
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ uint64_t now_ns = tv.tv_sec * NANOSECONDS_PER_SECOND + tv.tv_usec * 1000;
+#else
+ /* We need nanosecond precision for short lived functions. */
+ struct timespec ts;
+ timespec_get(&ts, TIME_UTC);
+ uint64_t now_ns = ts.tv_sec * NANOSECONDS_PER_SECOND + ts.tv_nsec;
+#endif
+ return now_ns;
+}
+
static Callstack *callstack_new(void)
{
Callstack *cs = g_new0(Callstack, 1);
@@ -112,6 +159,86 @@ static CallstackEntry callstack_pop(Callstack *cs)
return e;
}
+static Trace *trace_new(uint32_t id, GString *name)
+{
+ Trace *t = g_new0(Trace, 1);
+ t->t = g_array_new(false, false, sizeof(UftraceEntry));
+ t->path = g_string_new(NULL);
+ g_string_append_printf(t->path, "./uftrace.data/%"PRIu32".dat", id);
+ t->name = g_string_new(name->str);
+ t->id = id;
+ return t;
+}
+
+static void trace_free(Trace *t)
+{
+ g_assert(t->t->len == 0);
+ g_array_free(t->t, true);
+ t->t = NULL;
+ g_string_free(t->path, true);
+ t->path = NULL;
+ g_string_free(t->name, true);
+ t->name = NULL;
+ g_free(t);
+}
+
+static void trace_flush(Trace *t, bool append)
+{
+ int create_dir = g_mkdir_with_parents("./uftrace.data",
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ g_assert(create_dir == 0);
+ FILE *dat = fopen(t->path->str, append ? "a" : "w");
+ g_assert(dat);
+ GArray *data = t->t;
+ if (data->len) {
+ size_t wrote = fwrite(data->data, sizeof(UftraceEntry), data->len, dat);
+ g_assert(wrote == data->len);
+ }
+ fclose(dat);
+ g_array_set_size(data, 0);
+}
+
+static void trace_add_entry(Trace *t, uint64_t timestamp, uint64_t pc,
+ size_t depth, UftraceRecordType type)
+{
+ /* https://github.com/namhyung/uftrace/blob/v0.18/libmcount/record.c#L909 */
+ const uint64_t record_magic = 0x5;
+ uint64_t data = type | record_magic << 3;
+ data += depth << 6;
+ data += pc << 16;
+ UftraceEntry e = {.timestamp = timestamp, .data = data};
+ g_array_append_val(t->t, e);
+ if (t->t->len * sizeof(UftraceEntry) > 32 * MiB) {
+ trace_flush(t, true);
+ }
+}
+
+static void trace_enter_function(Trace *t, uint64_t timestamp,
+ uint64_t pc, size_t depth)
+{
+ trace_add_entry(t, timestamp, pc, depth, UFTRACE_ENTRY);
+}
+
+static void trace_exit_function(Trace *t, uint64_t timestamp,
+ uint64_t pc, size_t depth)
+{
+ trace_add_entry(t, timestamp, pc, depth, UFTRACE_EXIT);
+}
+
+static void trace_enter_stack(Trace *t, Callstack *cs, uint64_t timestamp)
+{
+ for (size_t depth = 1; depth <= callstack_depth(cs); ++depth) {
+ trace_enter_function(t, timestamp, callstack_at(cs, depth)->pc, depth);
+ }
+}
+
+static void trace_exit_stack(Trace *t, Callstack *cs, uint64_t timestamp)
+{
+ for (size_t depth = callstack_depth(cs); depth > 0; --depth) {
+ trace_exit_function(t, timestamp, callstack_at(cs, depth)->pc, depth);
+ }
+}
+
static uint64_t cpu_read_register64(Cpu *cpu, struct qemu_plugin_register *reg)
{
GByteArray *buf = cpu->buf;
@@ -223,7 +350,9 @@ static void track_callstack(unsigned int cpu_index, void *udata)
{
uint64_t pc = (uintptr_t) udata;
Cpu *cpu = qemu_plugin_scoreboard_find(score, cpu_index);
+ uint64_t timestamp = gettime_ns();
Callstack *cs = cpu->cs;
+ Trace *t = cpu->trace;
uint64_t fp = cpu->ops.get_frame_pointer(cpu);
if (!fp && callstack_empty(cs)) {
@@ -232,6 +361,7 @@ static void track_callstack(unsigned int cpu_index, void *udata)
* long as a proper call does not happen.
*/
callstack_push(cs, (CallstackEntry){.frame_pointer = fp, .pc = pc});
+ trace_enter_function(t, timestamp, pc, callstack_depth(cs));
return;
}
@@ -244,7 +374,8 @@ static void track_callstack(unsigned int cpu_index, void *udata)
CallstackEntry caller = callstack_caller(cs);
if (fp == caller.frame_pointer) {
/* return */
- callstack_pop(cs);
+ CallstackEntry e = callstack_pop(cs);
+ trace_exit_function(t, timestamp, e.pc, callstack_depth(cs));
return;
}
@@ -252,12 +383,16 @@ static void track_callstack(unsigned int cpu_index, void *udata)
if (caller_fp == top.frame_pointer) {
/* call */
callstack_push(cs, (CallstackEntry){.frame_pointer = fp, .pc = pc});
+ trace_enter_function(t, timestamp, pc, callstack_depth(cs));
return;
}
/* discontinuity, exit current stack and unwind new one */
+ trace_exit_stack(t, cs, timestamp);
callstack_clear(cs);
+
cpu_unwind_stack(cpu, fp, pc);
+ trace_enter_stack(t, cs, timestamp);
}
static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
@@ -297,6 +432,16 @@ static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index)
cpu->ops.init(cpu);
cpu->buf = g_byte_array_new();
+ g_assert(vcpu_index < UINT32_MAX / 100);
+ /* trace_id is: cpu_number * 100 */
+ uint32_t trace_id = (vcpu_index + 1) * 100;
+
+ g_autoptr(GString) trace_name = g_string_new(NULL);
+ g_string_append_printf(trace_name, "cpu%u", vcpu_index);
+ cpu->trace = trace_new(trace_id, trace_name);
+ /* create/truncate trace file */
+ trace_flush(cpu->trace, false);
+
cpu->cs = callstack_new();
}
@@ -305,6 +450,7 @@ static void vcpu_end(unsigned int vcpu_index)
Cpu *cpu = qemu_plugin_scoreboard_find(score, vcpu_index);
g_byte_array_free(cpu->buf, true);
+ trace_free(cpu->trace);
callstack_free(cpu->cs);
memset(cpu, 0, sizeof(Cpu));
}
@@ -312,6 +458,8 @@ static void vcpu_end(unsigned int vcpu_index)
static void at_exit(qemu_plugin_id_t id, void *data)
{
for (size_t i = 0; i < qemu_plugin_num_vcpus(); ++i) {
+ Cpu *cpu = qemu_plugin_scoreboard_find(score, i);
+ trace_flush(cpu->trace, true);
vcpu_end(i);
}
--
2.47.2
On 8/8/25 22:41, Pierrick Bouvier wrote:
> We implement tracing, following uftrace format.
> Trace is flushed every 32 MB, so file operations don't impact
> performance at runtime.
>
> A different trace is generated per cpu, and we ensure they have a unique
> name, based on vcpu_index, while keeping room for privilege level coming
> in next commit.
>
> Uftrace format is not officially documented, but it can be found here:
> https://github.com/namhyung/uftrace/blob/v0.18/libmcount/record.c#L909
>
> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> ---
> contrib/plugins/uftrace.c | 150 +++++++++++++++++++++++++++++++++++++-
> 1 file changed, 149 insertions(+), 1 deletion(-)
>
> diff --git a/contrib/plugins/uftrace.c b/contrib/plugins/uftrace.c
> index bb775916270..830967c215b 100644
> --- a/contrib/plugins/uftrace.c
> +++ b/contrib/plugins/uftrace.c
> @@ -12,6 +12,13 @@
> #include <qemu-plugin.h>
> #include <glib.h>
> #include <stdio.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +#include <time.h>
> +#include <unistd.h>
> +
> +#define MiB (INT64_C(1) << 20)
> +#define NANOSECONDS_PER_SECOND 1000000000LL
#define TRACE_FLUSH_SIZE (32 * MiB)
#define TRACE_ID_SCALE 100
(and use)
> @@ -44,9 +59,41 @@ typedef struct {
> struct qemu_plugin_register *reg_fp;
> } Aarch64Cpu;
>
> +typedef struct {
> + uint64_t timestamp;
> + uint64_t data;
> +} UftraceEntry;
> +
> +typedef enum {
> + UFTRACE_ENTRY,
> + UFTRACE_EXIT,
> + UFTRACE_LOST,
> + UFTRACE_EVENT
UFTRACE_EVENT,
(in case more types appear)
> +} UftraceRecordType;
> +static void trace_add_entry(Trace *t, uint64_t timestamp, uint64_t pc,
> + size_t depth, UftraceRecordType type)
> +{
> + /* https://github.com/namhyung/uftrace/blob/v0.18/libmcount/record.c#L909 */
> + const uint64_t record_magic = 0x5;
> + uint64_t data = type | record_magic << 3;
uint64_t data = type | (record_magic << 3);
> + data += depth << 6;
> + data += pc << 16;
> + UftraceEntry e = {.timestamp = timestamp, .data = data};
> + g_array_append_val(t->t, e);
> + if (t->t->len * sizeof(UftraceEntry) > 32 * MiB) {
> + trace_flush(t, true);
> + }
> +}
> static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
> @@ -297,6 +432,16 @@ static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index)
> cpu->ops.init(cpu);
> cpu->buf = g_byte_array_new();
>
> + g_assert(vcpu_index < UINT32_MAX / 100);
> + /* trace_id is: cpu_number * 100 */
> + uint32_t trace_id = (vcpu_index + 1) * 100;
> +
> + g_autoptr(GString) trace_name = g_string_new(NULL);
> + g_string_append_printf(trace_name, "cpu%u", vcpu_index);
g_autofree char *trace_name = g_strdup_printf("cpu%u", vcpu_index);
and pass (const?) char * to trace_new().
> + cpu->trace = trace_new(trace_id, trace_name);
> + /* create/truncate trace file */
> + trace_flush(cpu->trace, false);
> +
> cpu->cs = callstack_new();
> }
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
On 8/8/25 2:45 PM, Philippe Mathieu-Daudé wrote:
> On 8/8/25 22:41, Pierrick Bouvier wrote:
>> We implement tracing, following uftrace format.
>> Trace is flushed every 32 MB, so file operations don't impact
>> performance at runtime.
>>
>> A different trace is generated per cpu, and we ensure they have a unique
>> name, based on vcpu_index, while keeping room for privilege level coming
>> in next commit.
>>
>> Uftrace format is not officially documented, but it can be found here:
>> https://github.com/namhyung/uftrace/blob/v0.18/libmcount/record.c#L909
>>
>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>> ---
>> contrib/plugins/uftrace.c | 150 +++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 149 insertions(+), 1 deletion(-)
>>
>> diff --git a/contrib/plugins/uftrace.c b/contrib/plugins/uftrace.c
>> index bb775916270..830967c215b 100644
>> --- a/contrib/plugins/uftrace.c
>> +++ b/contrib/plugins/uftrace.c
>> @@ -12,6 +12,13 @@
>> #include <qemu-plugin.h>
>> #include <glib.h>
>> #include <stdio.h>
>> +#include <sys/stat.h>
>> +#include <sys/time.h>
>> +#include <time.h>
>> +#include <unistd.h>
>> +
>> +#define MiB (INT64_C(1) << 20)
>> +#define NANOSECONDS_PER_SECOND 1000000000LL
>
> #define TRACE_FLUSH_SIZE (32 * MiB)
>
> #define TRACE_ID_SCALE 100
>
> (and use)
>
Ok.
>> @@ -44,9 +59,41 @@ typedef struct {
>> struct qemu_plugin_register *reg_fp;
>> } Aarch64Cpu;
>>
>> +typedef struct {
>> + uint64_t timestamp;
>> + uint64_t data;
>> +} UftraceEntry;
>> +
>> +typedef enum {
>> + UFTRACE_ENTRY,
>> + UFTRACE_EXIT,
>> + UFTRACE_LOST,
>> + UFTRACE_EVENT
>
> UFTRACE_EVENT,
>
Ok.
> (in case more types appear)
>
>> +} UftraceRecordType;
>
>
>> +static void trace_add_entry(Trace *t, uint64_t timestamp, uint64_t pc,
>> + size_t depth, UftraceRecordType type)
>> +{
>> + /* https://github.com/namhyung/uftrace/blob/v0.18/libmcount/record.c#L909 */
>> + const uint64_t record_magic = 0x5;
>> + uint64_t data = type | record_magic << 3;
>
> uint64_t data = type | (record_magic << 3);
>
There is nothing ambiguous here (by operator precedence), but I'll add
it if it can help with readability.
>> + data += depth << 6;
>> + data += pc << 16;
>> + UftraceEntry e = {.timestamp = timestamp, .data = data};
>> + g_array_append_val(t->t, e);
>> + if (t->t->len * sizeof(UftraceEntry) > 32 * MiB) {
>> + trace_flush(t, true);
>> + }
>> +}
>
>
>> static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>> @@ -297,6 +432,16 @@ static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index)
>> cpu->ops.init(cpu);
>> cpu->buf = g_byte_array_new();
>>
>> + g_assert(vcpu_index < UINT32_MAX / 100);
>> + /* trace_id is: cpu_number * 100 */
>> + uint32_t trace_id = (vcpu_index + 1) * 100;
>> +
>> + g_autoptr(GString) trace_name = g_string_new(NULL);
>> + g_string_append_printf(trace_name, "cpu%u", vcpu_index);
>
> g_autofree char *trace_name = g_strdup_printf("cpu%u", vcpu_index);
>
> and pass (const?) char * to trace_new().
>
I tried to do something clean with GString everywhere for
dynamic/mutable strings, and const char* for immutable ones, favoring
readability and clear memory ownership over C char pointer soup. It does
not optimize anything important in this context since it's called only
once during the program execution.
Thus, I would prefer to keep it as it is if that's acceptable for you.
>> + cpu->trace = trace_new(trace_id, trace_name);
>> + /* create/truncate trace file */
>> + trace_flush(cpu->trace, false);
>> +
>> cpu->cs = callstack_new();
>> }
>
> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>
On 9/8/25 00:05, Pierrick Bouvier wrote:
> On 8/8/25 2:45 PM, Philippe Mathieu-Daudé wrote:
>> On 8/8/25 22:41, Pierrick Bouvier wrote:
>>> We implement tracing, following uftrace format.
>>> Trace is flushed every 32 MB, so file operations don't impact
>>> performance at runtime.
>>>
>>> A different trace is generated per cpu, and we ensure they have a unique
>>> name, based on vcpu_index, while keeping room for privilege level coming
>>> in next commit.
>>>
>>> Uftrace format is not officially documented, but it can be found here:
>>> https://github.com/namhyung/uftrace/blob/v0.18/libmcount/record.c#L909
>>>
>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>> ---
>>> contrib/plugins/uftrace.c | 150 ++++++++++++++++++++++++++++++++++
>>> +++-
>>> 1 file changed, 149 insertions(+), 1 deletion(-)
>>> @@ -297,6 +432,16 @@ static void vcpu_init(qemu_plugin_id_t id,
>>> unsigned int vcpu_index)
>>> cpu->ops.init(cpu);
>>> cpu->buf = g_byte_array_new();
>>> + g_assert(vcpu_index < UINT32_MAX / 100);
>>> + /* trace_id is: cpu_number * 100 */
>>> + uint32_t trace_id = (vcpu_index + 1) * 100;
>>> +
>>> + g_autoptr(GString) trace_name = g_string_new(NULL);
>>> + g_string_append_printf(trace_name, "cpu%u", vcpu_index);
>>
>> g_autofree char *trace_name = g_strdup_printf("cpu%u", vcpu_index);
>>
>> and pass (const?) char * to trace_new().
>>
>
> I tried to do something clean with GString everywhere for dynamic/
> mutable strings, and const char* for immutable ones, favoring
> readability and clear memory ownership over C char pointer soup. It does
> not optimize anything important in this context since it's called only
> once during the program execution.
>
> Thus, I would prefer to keep it as it is if that's acceptable for you.
No problem.
>>> + cpu->trace = trace_new(trace_id, trace_name);
>>> + /* create/truncate trace file */
>>> + trace_flush(cpu->trace, false);
>>> +
>>> cpu->cs = callstack_new();
>>> }
>>
>> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>>
>
© 2016 - 2025 Red Hat, Inc.