From nobody Fri Feb 13 03:15:19 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id EB82C143C54; Tue, 4 Jun 2024 14:42:14 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1717512135; cv=none; b=nmLAnwCBS31HdEs+IeNUZILFJ0xH9MBWQGCZVVl603uFUMWvt4ak92FSuJbex7dIKA+4QCzse2SKVEmdx3mDrHjQGjkIreiQ14RRuqcrYp/UIDLxi4zoUBXWjC/016Pvsnk+Q6omyYXzLFFIZXbz+RWc+h3+M67EzUhwG2mz7+g= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1717512135; c=relaxed/simple; bh=qU8xhCDt9RMOZCSgOY5aMaPJybEq2SOdmfs7FSgnd6I=; h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=tcyk6SzbubqXda4sroTPZNfJmBQinKiHmgrC/v7o6KsMwMJKsU9RWO7ABtbnwATArZ2z4Jv+9HQUvaPYpYxmIlMLVOi4y1OD1OJSr6HyesuB0A+qQCLalFiQLEkc/q3n3tzgV8gyPsEg5IuY8Y8BVFVRV0Q1hyOh+1heoY2zBzw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 Received: by smtp.kernel.org (Postfix) with ESMTPSA id 85950C2BBFC; Tue, 4 Jun 2024 14:42:14 +0000 (UTC) Received: from rostedt by gandalf with local (Exim 4.97) (envelope-from ) id 1sEVMc-00000000Yte-1AR1; Tue, 04 Jun 2024 10:42:14 -0400 Message-ID: <20240604144214.147056566@goodmis.org> User-Agent: quilt/0.68 Date: Tue, 04 Jun 2024 10:41:04 -0400 From: Steven Rostedt To: linux-kernel@vger.kernel.org Cc: Masami Hiramatsu , Mark Rutland , Mathieu Desnoyers , Andrew Morton , Alexei Starovoitov , Florent Revest , Martin KaFai Lau , bpf , Sven Schnelle , Alexei Starovoitov , Jiri Olsa , Arnaldo Carvalho de Melo , Daniel Borkmann , Alan Maguire , Peter Zijlstra , Thomas Gleixner , Guo Ren Subject: [for-next][PATCH 01/27] function_graph: Convert ret_stack to a series of longs References: <20240604144103.293353991@goodmis.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Steven Rostedt (VMware)" In order to make it possible to have multiple callbacks registered with the function_graph tracer, the retstack needs to be converted from an array of ftrace_ret_stack structures to an array of longs. This will allow to store the list of callbacks on the stack for the return side of the functions. Link: https://lore.kernel.org/linux-trace-kernel/171509092742.162236.442773= 7821399314856.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190821.073111754@g= oodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/sched.h | 2 +- kernel/trace/fgraph.c | 136 +++++++++++++++++++++++++----------------- 2 files changed, 83 insertions(+), 55 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..352939dab3a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1402,7 +1402,7 @@ struct task_struct { int curr_ret_depth; =20 /* Stack of return addresses for return function tracing: */ - struct ftrace_ret_stack *ret_stack; + unsigned long *ret_stack; =20 /* Timestamp for last schedule: */ unsigned long long ftrace_timestamp; diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index a130b2d898f7..c62e6db718a0 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -25,6 +25,30 @@ #define ASSIGN_OPS_HASH(opsname, val) #endif =20 +/* + * FGRAPH_FRAME_SIZE: Size in bytes of the meta data on the shadow stack + * FGRAPH_FRAME_OFFSET: Size in long words of the meta data frame + * SHADOW_STACK_SIZE: The size in bytes of the entire shadow stack + * SHADOW_STACK_OFFSET: The size in long words of the shadow stack + * SHADOW_STACK_MAX_OFFSET: The max offset of the stack for a new frame to= be added + */ +#define FGRAPH_FRAME_SIZE sizeof(struct ftrace_ret_stack) +#define FGRAPH_FRAME_OFFSET (ALIGN(FGRAPH_FRAME_SIZE, sizeof(long)) / size= of(long)) +#define SHADOW_STACK_SIZE (PAGE_SIZE) +#define SHADOW_STACK_OFFSET \ + (ALIGN(SHADOW_STACK_SIZE, sizeof(long)) / sizeof(long)) +/* Leave on a buffer at the end */ +#define SHADOW_STACK_MAX_INDEX (SHADOW_STACK_OFFSET - FGRAPH_FRAME_OFFSET) + +/* + * RET_STACK(): Return the frame from a given @offset from task @t + * RET_STACK_INC(): Reserve one frame size on the stack. + * RET_STACK_DEC(): Remove one frame size from the stack. + */ +#define RET_STACK(t, index) ((struct ftrace_ret_stack *)(&(t)->ret_stack[i= ndex])) +#define RET_STACK_INC(c) ({ c +=3D FGRAPH_FRAME_OFFSET; }) +#define RET_STACK_DEC(c) ({ c -=3D FGRAPH_FRAME_OFFSET; }) + DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph); int ftrace_graph_active; =20 @@ -69,6 +93,7 @@ static int ftrace_push_return_trace(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp) { + struct ftrace_ret_stack *ret_stack; unsigned long long calltime; int index; =20 @@ -85,23 +110,25 @@ ftrace_push_return_trace(unsigned long ret, unsigned l= ong func, smp_rmb(); =20 /* The return trace stack is full */ - if (current->curr_ret_stack =3D=3D FTRACE_RETFUNC_DEPTH - 1) { + if (current->curr_ret_stack >=3D SHADOW_STACK_MAX_INDEX) { atomic_inc(¤t->trace_overrun); return -EBUSY; } =20 calltime =3D trace_clock_local(); =20 - index =3D ++current->curr_ret_stack; + index =3D current->curr_ret_stack; + RET_STACK_INC(current->curr_ret_stack); + ret_stack =3D RET_STACK(current, index); barrier(); - current->ret_stack[index].ret =3D ret; - current->ret_stack[index].func =3D func; - current->ret_stack[index].calltime =3D calltime; + ret_stack->ret =3D ret; + ret_stack->func =3D func; + ret_stack->calltime =3D calltime; #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - current->ret_stack[index].fp =3D frame_pointer; + ret_stack->fp =3D frame_pointer; #endif #ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - current->ret_stack[index].retp =3D retp; + ret_stack->retp =3D retp; #endif return 0; } @@ -137,7 +164,7 @@ int function_graph_enter(unsigned long ret, unsigned lo= ng func, =20 return 0; out_ret: - current->curr_ret_stack--; + RET_STACK_DEC(current->curr_ret_stack); out: current->curr_ret_depth--; return -EBUSY; @@ -148,11 +175,13 @@ static void ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, unsigned long frame_pointer) { + struct ftrace_ret_stack *ret_stack; int index; =20 index =3D current->curr_ret_stack; + RET_STACK_DEC(index); =20 - if (unlikely(index < 0 || index >=3D FTRACE_RETFUNC_DEPTH)) { + if (unlikely(index < 0 || index > SHADOW_STACK_MAX_INDEX)) { ftrace_graph_stop(); WARN_ON(1); /* Might as well panic, otherwise we have no where to go */ @@ -160,6 +189,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace,= unsigned long *ret, return; } =20 + ret_stack =3D RET_STACK(current, index); #ifdef HAVE_FUNCTION_GRAPH_FP_TEST /* * The arch may choose to record the frame pointer used @@ -175,22 +205,22 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trac= e, unsigned long *ret, * Note, -mfentry does not use frame pointers, and this test * is not needed if CC_USING_FENTRY is set. */ - if (unlikely(current->ret_stack[index].fp !=3D frame_pointer)) { + if (unlikely(ret_stack->fp !=3D frame_pointer)) { ftrace_graph_stop(); WARN(1, "Bad frame pointer: expected %lx, received %lx\n" " from func %ps return to %lx\n", current->ret_stack[index].fp, frame_pointer, - (void *)current->ret_stack[index].func, - current->ret_stack[index].ret); + (void *)ret_stack->func, + ret_stack->ret); *ret =3D (unsigned long)panic; return; } #endif =20 - *ret =3D current->ret_stack[index].ret; - trace->func =3D current->ret_stack[index].func; - trace->calltime =3D current->ret_stack[index].calltime; + *ret =3D ret_stack->ret; + trace->func =3D ret_stack->func; + trace->calltime =3D ret_stack->calltime; trace->overrun =3D atomic_read(¤t->trace_overrun); trace->depth =3D current->curr_ret_depth--; /* @@ -251,7 +281,7 @@ static unsigned long __ftrace_return_to_handler(struct = fgraph_ret_regs *ret_regs * curr_ret_stack is after that. */ barrier(); - current->curr_ret_stack--; + RET_STACK_DEC(current->curr_ret_stack); =20 if (unlikely(!ret)) { ftrace_graph_stop(); @@ -294,12 +324,13 @@ unsigned long ftrace_return_to_handler(unsigned long = frame_pointer) struct ftrace_ret_stack * ftrace_graph_get_ret_stack(struct task_struct *task, int idx) { - idx =3D task->curr_ret_stack - idx; + int index =3D task->curr_ret_stack; =20 - if (idx >=3D 0 && idx <=3D task->curr_ret_stack) - return &task->ret_stack[idx]; + index -=3D FGRAPH_FRAME_OFFSET * (idx + 1); + if (index < 0) + return NULL; =20 - return NULL; + return RET_STACK(task, index); } =20 /** @@ -321,18 +352,20 @@ ftrace_graph_get_ret_stack(struct task_struct *task, = int idx) unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp) { + struct ftrace_ret_stack *ret_stack; int index =3D task->curr_ret_stack; int i; =20 if (ret !=3D (unsigned long)dereference_kernel_function_descriptor(return= _to_handler)) return ret; =20 - if (index < 0) - return ret; + RET_STACK_DEC(index); =20 - for (i =3D 0; i <=3D index; i++) - if (task->ret_stack[i].retp =3D=3D retp) - return task->ret_stack[i].ret; + for (i =3D index; i >=3D 0; RET_STACK_DEC(i)) { + ret_stack =3D RET_STACK(task, i); + if (ret_stack->retp =3D=3D retp) + return ret_stack->ret; + } =20 return ret; } @@ -346,14 +379,15 @@ unsigned long ftrace_graph_ret_addr(struct task_struc= t *task, int *idx, return ret; =20 task_idx =3D task->curr_ret_stack; + RET_STACK_DEC(task_idx); =20 if (!task->ret_stack || task_idx < *idx) return ret; =20 task_idx -=3D *idx; - (*idx)++; + RET_STACK_INC(*idx); =20 - return task->ret_stack[task_idx].ret; + return RET_STACK(task, task_idx); } #endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ =20 @@ -391,7 +425,7 @@ trace_func_graph_ent_t ftrace_graph_entry =3D ftrace_gr= aph_entry_stub; static trace_func_graph_ent_t __ftrace_graph_entry =3D ftrace_graph_entry_= stub; =20 /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks.= */ -static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_lis= t) +static int alloc_retstack_tasklist(unsigned long **ret_stack_list) { int i; int ret =3D 0; @@ -399,10 +433,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_s= tack **ret_stack_list) struct task_struct *g, *t; =20 for (i =3D 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { - ret_stack_list[i] =3D - kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); + ret_stack_list[i] =3D kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list[i]) { start =3D 0; end =3D i; @@ -420,9 +451,9 @@ static int alloc_retstack_tasklist(struct ftrace_ret_st= ack **ret_stack_list) =20 if (t->ret_stack =3D=3D NULL) { atomic_set(&t->trace_overrun, 0); - t->curr_ret_stack =3D -1; + t->curr_ret_stack =3D 0; t->curr_ret_depth =3D -1; - /* Make sure the tasks see the -1 first: */ + /* Make sure the tasks see the 0 first: */ smp_wmb(); t->ret_stack =3D ret_stack_list[start++]; } @@ -442,6 +473,7 @@ ftrace_graph_probe_sched_switch(void *ignore, bool pree= mpt, struct task_struct *next, unsigned int prev_state) { + struct ftrace_ret_stack *ret_stack; unsigned long long timestamp; int index; =20 @@ -466,8 +498,11 @@ ftrace_graph_probe_sched_switch(void *ignore, bool pre= empt, */ timestamp -=3D next->ftrace_timestamp; =20 - for (index =3D next->curr_ret_stack; index >=3D 0; index--) - next->ret_stack[index].calltime +=3D timestamp; + for (index =3D next->curr_ret_stack - FGRAPH_FRAME_OFFSET; index >=3D 0; = ) { + ret_stack =3D RET_STACK(next, index); + ret_stack->calltime +=3D timestamp; + index -=3D FGRAPH_FRAME_OFFSET; + } } =20 static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace) @@ -510,10 +545,10 @@ void update_function_graph_func(void) ftrace_graph_entry =3D __ftrace_graph_entry; } =20 -static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack); +static DEFINE_PER_CPU(unsigned long *, idle_ret_stack); =20 static void -graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) +graph_init_task(struct task_struct *t, unsigned long *ret_stack) { atomic_set(&t->trace_overrun, 0); t->ftrace_timestamp =3D 0; @@ -528,7 +563,7 @@ graph_init_task(struct task_struct *t, struct ftrace_re= t_stack *ret_stack) */ void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) { - t->curr_ret_stack =3D -1; + t->curr_ret_stack =3D 0; t->curr_ret_depth =3D -1; /* * The idle task has no parent, it either has its own @@ -538,14 +573,11 @@ void ftrace_graph_init_idle_task(struct task_struct *= t, int cpu) WARN_ON(t->ret_stack !=3D per_cpu(idle_ret_stack, cpu)); =20 if (ftrace_graph_active) { - struct ftrace_ret_stack *ret_stack; + unsigned long *ret_stack; =20 ret_stack =3D per_cpu(idle_ret_stack, cpu); if (!ret_stack) { - ret_stack =3D - kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); + ret_stack =3D kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack) return; per_cpu(idle_ret_stack, cpu) =3D ret_stack; @@ -559,15 +591,13 @@ void ftrace_graph_init_task(struct task_struct *t) { /* Make sure we do not use the parent ret_stack */ t->ret_stack =3D NULL; - t->curr_ret_stack =3D -1; + t->curr_ret_stack =3D 0; t->curr_ret_depth =3D -1; =20 if (ftrace_graph_active) { - struct ftrace_ret_stack *ret_stack; + unsigned long *ret_stack; =20 - ret_stack =3D kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); + ret_stack =3D kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack) return; graph_init_task(t, ret_stack); @@ -576,7 +606,7 @@ void ftrace_graph_init_task(struct task_struct *t) =20 void ftrace_graph_exit_task(struct task_struct *t) { - struct ftrace_ret_stack *ret_stack =3D t->ret_stack; + unsigned long *ret_stack =3D t->ret_stack; =20 t->ret_stack =3D NULL; /* NULL must become visible to IRQs before we free it: */ @@ -588,12 +618,10 @@ void ftrace_graph_exit_task(struct task_struct *t) /* Allocate a return stack for each task */ static int start_graph_tracing(void) { - struct ftrace_ret_stack **ret_stack_list; + unsigned long **ret_stack_list; int ret, cpu; =20 - ret_stack_list =3D kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE, - sizeof(struct ftrace_ret_stack *), - GFP_KERNEL); + ret_stack_list =3D kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); =20 if (!ret_stack_list) return -ENOMEM; --=20 2.43.0