[PATCH 3/5] tracing: kprobe-event: Allocate string buffers from heap

Masami Hiramatsu (Google) posted 5 patches 2 months, 2 weeks ago
There is a newer version of this series
[PATCH 3/5] tracing: kprobe-event: Allocate string buffers from heap
Posted by Masami Hiramatsu (Google) 2 months, 2 weeks ago
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Allocate temporary string buffers for parsing kprobe-events
from heap instead of stack.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/trace/trace_kprobe.c |   39 +++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 15d7a381a128..793af6000f16 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -861,20 +861,20 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
 	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
 	 */
 	struct trace_kprobe *tk __free(free_trace_kprobe) = NULL;
+	const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
+	const char **new_argv __free(kfree) = NULL;
 	int i, len, new_argc = 0, ret = 0;
-	bool is_return = false;
 	char *symbol __free(kfree) = NULL;
-	char *tmp = NULL;
-	const char **new_argv __free(kfree) = NULL;
-	const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
+	char *ebuf __free(kfree) = NULL;
+	char *gbuf __free(kfree) = NULL;
+	char *abuf __free(kfree) = NULL;
+	char *dbuf __free(kfree) = NULL;
 	enum probe_print_type ptype;
+	bool is_return = false;
 	int maxactive = 0;
-	long offset = 0;
 	void *addr = NULL;
-	char buf[MAX_EVENT_NAME_LEN];
-	char gbuf[MAX_EVENT_NAME_LEN];
-	char abuf[MAX_BTF_ARGS_LEN];
-	char *dbuf __free(kfree) = NULL;
+	char *tmp = NULL;
+	long offset = 0;
 
 	switch (argv[0][0]) {
 	case 'r':
@@ -893,6 +893,8 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
 		event++;
 
 	if (isdigit(argv[0][1])) {
+		char *buf __free(kfree) = NULL;
+
 		if (!is_return) {
 			trace_probe_log_err(1, BAD_MAXACT_TYPE);
 			return -EINVAL;
@@ -905,7 +907,7 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
 			trace_probe_log_err(1, BAD_MAXACT);
 			return -EINVAL;
 		}
-		memcpy(buf, &argv[0][1], len);
+		buf = kmemdup(&argv[0][1], len + 1, GFP_KERNEL);
 		buf[len] = '\0';
 		ret = kstrtouint(buf, 0, &maxactive);
 		if (ret || !maxactive) {
@@ -973,6 +975,9 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
 
 	trace_probe_log_set_index(0);
 	if (event) {
+		gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
+		if (!gbuf)
+			return -ENOMEM;
 		ret = traceprobe_parse_event_name(&event, &group, gbuf,
 						  event - argv[0]);
 		if (ret)
@@ -981,16 +986,22 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
 
 	if (!event) {
 		/* Make a new event name */
+		ebuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
+		if (!ebuf)
+			return -ENOMEM;
 		if (symbol)
-			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
+			snprintf(ebuf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
 				 is_return ? 'r' : 'p', symbol, offset);
 		else
-			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
+			snprintf(ebuf, MAX_EVENT_NAME_LEN, "%c_0x%p",
 				 is_return ? 'r' : 'p', addr);
-		sanitize_event_name(buf);
-		event = buf;
+		sanitize_event_name(ebuf);
+		event = ebuf;
 	}
 
+	abuf = kmalloc(MAX_BTF_ARGS_LEN, GFP_KERNEL);
+	if (!abuf)
+		return -ENOMEM;
 	argc -= 2; argv += 2;
 	ctx->funcname = symbol;
 	new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc,
Re: [PATCH 3/5] tracing: kprobe-event: Allocate string buffers from heap
Posted by Steven Rostedt 2 months, 2 weeks ago
On Fri, 18 Jul 2025 20:34:29 +0900
"Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:

> From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> 
> Allocate temporary string buffers for parsing kprobe-events
> from heap instead of stack.
> 
> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> ---
>  kernel/trace/trace_kprobe.c |   39 +++++++++++++++++++++++++--------------
>  1 file changed, 25 insertions(+), 14 deletions(-)
> 
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index 15d7a381a128..793af6000f16 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -861,20 +861,20 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
>  	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
>  	 */
>  	struct trace_kprobe *tk __free(free_trace_kprobe) = NULL;
> +	const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
> +	const char **new_argv __free(kfree) = NULL;
>  	int i, len, new_argc = 0, ret = 0;
> -	bool is_return = false;
>  	char *symbol __free(kfree) = NULL;
> -	char *tmp = NULL;
> -	const char **new_argv __free(kfree) = NULL;
> -	const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
> +	char *ebuf __free(kfree) = NULL;
> +	char *gbuf __free(kfree) = NULL;
> +	char *abuf __free(kfree) = NULL;
> +	char *dbuf __free(kfree) = NULL;
>  	enum probe_print_type ptype;
> +	bool is_return = false;
>  	int maxactive = 0;
> -	long offset = 0;
>  	void *addr = NULL;
> -	char buf[MAX_EVENT_NAME_LEN];
> -	char gbuf[MAX_EVENT_NAME_LEN];
> -	char abuf[MAX_BTF_ARGS_LEN];
> -	char *dbuf __free(kfree) = NULL;
> +	char *tmp = NULL;
> +	long offset = 0;
>  
>  	switch (argv[0][0]) {
>  	case 'r':
> @@ -893,6 +893,8 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
>  		event++;
>  
>  	if (isdigit(argv[0][1])) {
> +		char *buf __free(kfree) = NULL;

So this gets freed when this if block ends, right?

> +
>  		if (!is_return) {
>  			trace_probe_log_err(1, BAD_MAXACT_TYPE);
>  			return -EINVAL;
> @@ -905,7 +907,7 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
>  			trace_probe_log_err(1, BAD_MAXACT);
>  			return -EINVAL;
>  		}
> -		memcpy(buf, &argv[0][1], len);
> +		buf = kmemdup(&argv[0][1], len + 1, GFP_KERNEL);
>  		buf[len] = '\0';
>  		ret = kstrtouint(buf, 0, &maxactive);
>  		if (ret || !maxactive) {
> @@ -973,6 +975,9 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
>  
>  	trace_probe_log_set_index(0);
>  	if (event) {
> +		gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
> +		if (!gbuf)
> +			return -ENOMEM;
>  		ret = traceprobe_parse_event_name(&event, &group, gbuf,
>  						  event - argv[0]);

And you can't use the same trick here because
traceprobe_parse_event_name() assigns "group" to gbuf and is used
outside this if block.

I notice there's no comment that states this. At the very minimum,
traceprobe_parse_event_name() should have a kerneldoc comment above its
definition and state this. But that's not an issue with this patch
series. Just an observation. Thus...

Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>

-- Steve


>  		if (ret)
> @@ -981,16 +986,22 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
>  
>  	if (!event) {
>  		/* Make a new event name */
> +		ebuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
> +		if (!ebuf)
> +			return -ENOMEM;
>  		if (symbol)
> -			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
> +			snprintf(ebuf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
>  				 is_return ? 'r' : 'p', symbol, offset);
>  		else
> -			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
> +			snprintf(ebuf, MAX_EVENT_NAME_LEN, "%c_0x%p",
>  				 is_return ? 'r' : 'p', addr);
> -		sanitize_event_name(buf);
> -		event = buf;
> +		sanitize_event_name(ebuf);
> +		event = ebuf;
>  	}
>  
> +	abuf = kmalloc(MAX_BTF_ARGS_LEN, GFP_KERNEL);
> +	if (!abuf)
> +		return -ENOMEM;
>  	argc -= 2; argv += 2;
>  	ctx->funcname = symbol;
>  	new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc,
Re: [PATCH 3/5] tracing: kprobe-event: Allocate string buffers from heap
Posted by Masami Hiramatsu (Google) 2 months, 2 weeks ago
On Fri, 18 Jul 2025 13:46:27 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri, 18 Jul 2025 20:34:29 +0900
> "Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:
> 
> > From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > 
> > Allocate temporary string buffers for parsing kprobe-events
> > from heap instead of stack.
> > 
> > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > ---
> >  kernel/trace/trace_kprobe.c |   39 +++++++++++++++++++++++++--------------
> >  1 file changed, 25 insertions(+), 14 deletions(-)
> > 
> > diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> > index 15d7a381a128..793af6000f16 100644
> > --- a/kernel/trace/trace_kprobe.c
> > +++ b/kernel/trace/trace_kprobe.c
> > @@ -861,20 +861,20 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
> >  	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
> >  	 */
> >  	struct trace_kprobe *tk __free(free_trace_kprobe) = NULL;
> > +	const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
> > +	const char **new_argv __free(kfree) = NULL;
> >  	int i, len, new_argc = 0, ret = 0;
> > -	bool is_return = false;
> >  	char *symbol __free(kfree) = NULL;
> > -	char *tmp = NULL;
> > -	const char **new_argv __free(kfree) = NULL;
> > -	const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
> > +	char *ebuf __free(kfree) = NULL;
> > +	char *gbuf __free(kfree) = NULL;
> > +	char *abuf __free(kfree) = NULL;
> > +	char *dbuf __free(kfree) = NULL;
> >  	enum probe_print_type ptype;
> > +	bool is_return = false;
> >  	int maxactive = 0;
> > -	long offset = 0;
> >  	void *addr = NULL;
> > -	char buf[MAX_EVENT_NAME_LEN];
> > -	char gbuf[MAX_EVENT_NAME_LEN];
> > -	char abuf[MAX_BTF_ARGS_LEN];
> > -	char *dbuf __free(kfree) = NULL;
> > +	char *tmp = NULL;
> > +	long offset = 0;
> >  
> >  	switch (argv[0][0]) {
> >  	case 'r':
> > @@ -893,6 +893,8 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
> >  		event++;
> >  
> >  	if (isdigit(argv[0][1])) {
> > +		char *buf __free(kfree) = NULL;
> 
> So this gets freed when this if block ends, right?

Yes, because in this block, "buf" is used as a really temporary
buffer.

> 
> > +
> >  		if (!is_return) {
> >  			trace_probe_log_err(1, BAD_MAXACT_TYPE);
> >  			return -EINVAL;
> > @@ -905,7 +907,7 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
> >  			trace_probe_log_err(1, BAD_MAXACT);
> >  			return -EINVAL;
> >  		}
> > -		memcpy(buf, &argv[0][1], len);
> > +		buf = kmemdup(&argv[0][1], len + 1, GFP_KERNEL);
> >  		buf[len] = '\0';
> >  		ret = kstrtouint(buf, 0, &maxactive);
> >  		if (ret || !maxactive) {
> > @@ -973,6 +975,9 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
> >  
> >  	trace_probe_log_set_index(0);
> >  	if (event) {
> > +		gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
> > +		if (!gbuf)
> > +			return -ENOMEM;
> >  		ret = traceprobe_parse_event_name(&event, &group, gbuf,
> >  						  event - argv[0]);
> 
> And you can't use the same trick here because
> traceprobe_parse_event_name() assigns "group" to gbuf and is used
> outside this if block.

Yes, that holds the group name used until parsing the probe.

> 
> I notice there's no comment that states this. At the very minimum,
> traceprobe_parse_event_name() should have a kerneldoc comment above its
> definition and state this.

Yeah, that function should be docummented, it is a bit complicated.

> But that's not an issue with this patch
> series. Just an observation. Thus...
> 
> Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>

Thank you!

> 
> -- Steve
> 
> 
> >  		if (ret)
> > @@ -981,16 +986,22 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
> >  
> >  	if (!event) {
> >  		/* Make a new event name */
> > +		ebuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
> > +		if (!ebuf)
> > +			return -ENOMEM;
> >  		if (symbol)
> > -			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
> > +			snprintf(ebuf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
> >  				 is_return ? 'r' : 'p', symbol, offset);
> >  		else
> > -			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
> > +			snprintf(ebuf, MAX_EVENT_NAME_LEN, "%c_0x%p",
> >  				 is_return ? 'r' : 'p', addr);
> > -		sanitize_event_name(buf);
> > -		event = buf;
> > +		sanitize_event_name(ebuf);
> > +		event = ebuf;
> >  	}
> >  
> > +	abuf = kmalloc(MAX_BTF_ARGS_LEN, GFP_KERNEL);
> > +	if (!abuf)
> > +		return -ENOMEM;
> >  	argc -= 2; argv += 2;
> >  	ctx->funcname = symbol;
> >  	new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc,
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>