[PATCH v6 2/3] treewide: Switch memcpy() users of 'task->comm' to a more safer implementation

Bhupesh posted 3 patches 2 months, 1 week ago
There is a newer version of this series
[PATCH v6 2/3] treewide: Switch memcpy() users of 'task->comm' to a more safer implementation
Posted by Bhupesh 2 months, 1 week ago
As Linus mentioned in [1], currently we have several memcpy() use-cases
which use 'current->comm' to copy the task name over to local copies.
For an example:

 ...
 char comm[TASK_COMM_LEN];
 memcpy(comm, current->comm, TASK_COMM_LEN);
 ...

These should be modified so that we can later implement approaches
to handle the task->comm's 16-byte length limitation (TASK_COMM_LEN)
in a more modular way (follow-up patch does the same):

 ...
 char comm[TASK_COMM_LEN];
 memcpy(comm, current->comm, TASK_COMM_LEN);
 comm[TASK_COMM_LEN - 1] = '\0';
 ...

The relevant 'memcpy()' users were identified using the following search
pattern:
 $ git grep 'memcpy.*->comm\>'

[1]. https://lore.kernel.org/all/CAHk-=wjAmmHUg6vho1KjzQi2=psR30+CogFd4aXrThr2gsiS4g@mail.gmail.com/

Signed-off-by: Bhupesh <bhupesh@igalia.com>
---
 include/linux/coredump.h       | 3 ++-
 include/trace/events/block.h   | 5 +++++
 include/trace/events/oom.h     | 1 +
 include/trace/events/osnoise.h | 1 +
 include/trace/events/signal.h  | 1 +
 include/trace/events/task.h    | 2 ++
 6 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 68861da4cf7c..988b233dcc09 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -54,7 +54,8 @@ extern void vfs_coredump(const kernel_siginfo_t *siginfo);
 	do {	\
 		char comm[TASK_COMM_LEN];	\
 		/* This will always be NUL terminated. */ \
-		memcpy(comm, current->comm, sizeof(comm)); \
+		memcpy(comm, current->comm, TASK_COMM_LEN); \
+		comm[TASK_COMM_LEN - 1] = '\0'; \
 		printk_ratelimited(Level "coredump: %d(%*pE): " Format "\n",	\
 			task_tgid_vnr(current), (int)strlen(comm), comm, ##__VA_ARGS__);	\
 	} while (0)	\
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 6aa79e2d799c..dfc20fbe389c 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -214,6 +214,7 @@ DECLARE_EVENT_CLASS(block_rq,
 		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
 		__get_str(cmd)[0] = '\0';
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+		__entry->comm[TASK_COMM_LEN - 1] = '\0';
 	),
 
 	TP_printk("%d,%d %s %u (%s) %llu + %u %s,%u,%u [%s]",
@@ -352,6 +353,7 @@ DECLARE_EVENT_CLASS(block_bio,
 		__entry->nr_sector	= bio_sectors(bio);
 		blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+		__entry->comm[TASK_COMM_LEN - 1] = '\0';
 	),
 
 	TP_printk("%d,%d %s %llu + %u [%s]",
@@ -435,6 +437,7 @@ TRACE_EVENT(block_plug,
 
 	TP_fast_assign(
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+		__entry->comm[TASK_COMM_LEN - 1] = '\0';
 	),
 
 	TP_printk("[%s]", __entry->comm)
@@ -454,6 +457,7 @@ DECLARE_EVENT_CLASS(block_unplug,
 	TP_fast_assign(
 		__entry->nr_rq = depth;
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+		__entry->comm[TASK_COMM_LEN - 1] = '\0';
 	),
 
 	TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
@@ -505,6 +509,7 @@ TRACE_EVENT(block_split,
 		__entry->new_sector	= new_sector;
 		blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+		__entry->comm[TASK_COMM_LEN - 1] = '\0';
 	),
 
 	TP_printk("%d,%d %s %llu / %llu [%s]",
diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 9f0a5d1482c4..a5641ed4285f 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -24,6 +24,7 @@ TRACE_EVENT(oom_score_adj_update,
 	TP_fast_assign(
 		__entry->pid = task->pid;
 		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+		__entry->comm[TASK_COMM_LEN - 1] = '\0';
 		__entry->oom_score_adj = task->signal->oom_score_adj;
 	),
 
diff --git a/include/trace/events/osnoise.h b/include/trace/events/osnoise.h
index 3f4273623801..0321b3f8d532 100644
--- a/include/trace/events/osnoise.h
+++ b/include/trace/events/osnoise.h
@@ -117,6 +117,7 @@ TRACE_EVENT(thread_noise,
 
 	TP_fast_assign(
 		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
+		__entry->comm[TASK_COMM_LEN - 1] = '\0';
 		__entry->pid = t->pid;
 		__entry->start = start;
 		__entry->duration = duration;
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index 1db7e4b07c01..7f490e553db5 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -68,6 +68,7 @@ TRACE_EVENT(signal_generate,
 		__entry->sig	= sig;
 		TP_STORE_SIGINFO(__entry, info);
 		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+		__entry->comm[TASK_COMM_LEN - 1] = '\0';
 		__entry->pid	= task->pid;
 		__entry->group	= group;
 		__entry->result	= result;
diff --git a/include/trace/events/task.h b/include/trace/events/task.h
index af535b053033..4ddf21b69372 100644
--- a/include/trace/events/task.h
+++ b/include/trace/events/task.h
@@ -22,6 +22,7 @@ TRACE_EVENT(task_newtask,
 	TP_fast_assign(
 		__entry->pid = task->pid;
 		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+		__entry->comm[TASK_COMM_LEN - 1] = '\0';
 		__entry->clone_flags = clone_flags;
 		__entry->oom_score_adj = task->signal->oom_score_adj;
 	),
@@ -45,6 +46,7 @@ TRACE_EVENT(task_rename,
 
 	TP_fast_assign(
 		memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN);
+		entry->oldcomm[TASK_COMM_LEN - 1] = '\0';
 		strscpy(entry->newcomm, comm, TASK_COMM_LEN);
 		__entry->oom_score_adj = task->signal->oom_score_adj;
 	),
-- 
2.38.1
Re: [PATCH v6 2/3] treewide: Switch memcpy() users of 'task->comm' to a more safer implementation
Posted by Kees Cook 2 months, 1 week ago
On Thu, Jul 24, 2025 at 06:06:11PM +0530, Bhupesh wrote:
> As Linus mentioned in [1], currently we have several memcpy() use-cases
> which use 'current->comm' to copy the task name over to local copies.
> For an example:
> 
>  ...
>  char comm[TASK_COMM_LEN];
>  memcpy(comm, current->comm, TASK_COMM_LEN);
>  ...
> 
> These should be modified so that we can later implement approaches
> to handle the task->comm's 16-byte length limitation (TASK_COMM_LEN)
> in a more modular way (follow-up patch does the same):
> 
>  ...
>  char comm[TASK_COMM_LEN];
>  memcpy(comm, current->comm, TASK_COMM_LEN);
>  comm[TASK_COMM_LEN - 1] = '\0';
>  ...

Why not switch all of these to get_task_comm()? It will correctly handle
the size check and NUL termination.

In an earlier thread[1], I pointed out that since __set_task_comm() always
keeps a final NUL byte, we're always safe to use strscpy(). (We want to
block over-reads and over-writes but don't care about garbled reads.)

In the new case of copying into a smaller buffer, strscpy() will always
handle writing the final NUL byte.

The only special cases I can think of would be non-fixed-sized
destination buffers, which get_task_comm() doesn't like since it can't
validate how to safely terminate the buffer. The example you give above
isn't that, though.

-Kees

[1] https://lore.kernel.org/all/202411301244.381F2B8D17@keescook/

-- 
Kees Cook
Re: [PATCH v6 2/3] treewide: Switch memcpy() users of 'task->comm' to a more safer implementation
Posted by Linus Torvalds 2 months, 1 week ago
, but

On Thu, 24 Jul 2025 at 16:49, Kees Cook <kees@kernel.org> wrote:
>
> Why not switch all of these to get_task_comm()? It will correctly handle
> the size check and NUL termination.

I'd rather aim to get rid of get_task_comm() entirely.

I don't think it has ever made much sense, except in the "guarantee
NUL termination" sense, but since we have basically agreed to always
guarantee that natively in ->comm[] itself (by *never* writing non-NUL
characters to the last byte, rather than "write something, then
overwrite it") the whole concept is broken.

The alleged other reason for get_task_comm() is to get a stable
result, but since the source can be modified by users, there's no
"stable". If you get some half-way state, that *could* have been a
user just writing odd names.

So the other reason to use get_task_comm() is pretty much pointless too.

And several of the existing users are just pointless overhead, copying
the comm[] to a local copy only to print it out, and making it much
more complex than just using "%s" with tsk->comm.

End result: all get_task_comm() does now is to verify the size of the
result buffer, and that is *BAD*. We shouldn't care. If the result
buffer is smaller than the string, we should just have truncated it.
And if the buffer is bigger, we should zero-pad or not depending on
the use case.

And guess what? We *have* that function. It's called "strscpy()". It
already does the right thing, including passing in the size of a fixed
array and just dealing with it the RightWay(tm). Add '_pad()' if that
is the behavior you want, and now you *document* the fact that the
result is padded.

So I claim that get_task_comm() is bad, and we should feel bad about
ever having introduced it.

Now, the tracing code may actually care about *performance*, and what
it probably wants is that "just copy things and NUL-terminate it", but
I don't think we should use get_task_comm() for that because of all
the horrid bad history it has.

In other words, if "strscpy()" is too expensive (because it's being
careful and returns the size), I think we should look at maybe
optimizing strscpy() further. It already does word-at-a-time stuff,
but what strscpy() does *not* do is the "inline at call site for small
constant sizes".

We could inline sized_strscpy() for small constant sizes, but the real
problem is that it returns the length, and there's no way to do
"inline for small constant sizes when nobody cares about the result"
that I can think of. You can use _Generic() on the arguments, but not
on the "people don't look at the return value".

So we do probably want something for that "just copy comm to a
constant-sized array" case if people care about performance for it
(and tracing probably does), but I still think get_task_comm() has too
much baggage (and takes a size that it shouldn't take).

"get_task_array()" might be more palatable, and is certainly simple to
implement using something like

   static __always_inline void
       __cstr_array_copy(char *dst,
            const char *src, __kernel_size_t size)
   {
        memcpy(dst, src, size);
        dst[size] = 0;
   }

   #define get_task_array(a,b) \
      __cstr_array_copy(dst, src, __must_be_array(dst))

(Entirely untested, hasn't seen a compiler, is written for email, you
get the idea..).

But I think that should be a separate thing (and it should be
documented to be data-racy in the destination and *not* be the kind of
"stable NUL at the end" that strscpy and friends are.

               Linus
Re: [PATCH v6 2/3] treewide: Switch memcpy() users of 'task->comm' to a more safer implementation
Posted by Kees Cook 2 months, 1 week ago

On July 26, 2025 10:50:55 AM PDT, Linus Torvalds <torvalds@linux-foundation.org> wrote:
>, but
>
>On Thu, 24 Jul 2025 at 16:49, Kees Cook <kees@kernel.org> wrote:
>>
>> Why not switch all of these to get_task_comm()? It will correctly handle
>> the size check and NUL termination.
>
>I'd rather aim to get rid of get_task_comm() entirely.

That works for me! I just get twitchy around seeing memcpy used for strings. :) if we're gonna NUL after the memcpy, just use strscpy_pad().

>And guess what? We *have* that function. It's called "strscpy()". It
>already does the right thing, including passing in the size of a fixed
>array and just dealing with it the RightWay(tm). Add '_pad()' if that
>is the behavior you want, and now you *document* the fact that the
>result is padded.

Exactly. Let's see how much we can just replace with strscpy_pad(). It we have other use cases, we can handle those separately.

-Kees

-- 
Kees Cook
Re: [PATCH v6 2/3] treewide: Switch memcpy() users of 'task->comm' to a more safer implementation
Posted by Linus Torvalds 2 months, 1 week ago
On Sat, 26 Jul 2025 at 16:19, Kees Cook <kees@kernel.org> wrote:
>
> That works for me! I just get twitchy around seeing memcpy used for strings. :) if we're gonna NUL after the memcpy, just use strscpy_pad().

I do worry a tiny bit about performance.

Because 'memcpy+set last byte to NUL' really is just a couple of
instructions when we're talking small constant-sized arrays.

strscpy_pad() isn't horrible, but it's still at another level. And
most of the cost is that "return the length" which people often don't
care about.

Dang, I wish we had some compiler trick to say "if the value isn't
used, do X, if it _is_ used do Y".

It's such a trivial thing in the compiler itself, and the information
is there, but I don't think it is exposed in any useful way.

In fact, it *is* exposed in one way I can think of:

   __attribute__((__warn_unused_result__))

but not in a useful form for actually generating different code.

Some kind of "__builtin_if_used(x,y)" where it picks 'x' if the value
is used, and 'y' if it isn't would be lovely for this.

Then you could do things like

    #define my_helper(x) \
        __builtin_if_used( \
                full_semantics(x), \
                simpler_version(x))

when having a return value means extra work and most people don't care.

Maybe it exists in some form that I haven't thought of?

Any compiler people around?

                 Linus
Re: [PATCH v6 2/3] treewide: Switch memcpy() users of 'task->comm' to a more safer implementation
Posted by Bhupesh Sharma 2 months ago
On 7/27/25 5:07 AM, Linus Torvalds wrote:
> On Sat, 26 Jul 2025 at 16:19, Kees Cook <kees@kernel.org> wrote:
>> That works for me! I just get twitchy around seeing memcpy used for strings. :) if we're gonna NUL after the memcpy, just use strscpy_pad().
> I do worry a tiny bit about performance.
>
> Because 'memcpy+set last byte to NUL' really is just a couple of
> instructions when we're talking small constant-sized arrays.
>
> strscpy_pad() isn't horrible, but it's still at another level. And
> most of the cost is that "return the length" which people often don't
> care about.
>
> Dang, I wish we had some compiler trick to say "if the value isn't
> used, do X, if it _is_ used do Y".
>
> It's such a trivial thing in the compiler itself, and the information
> is there, but I don't think it is exposed in any useful way.
>
> In fact, it *is* exposed in one way I can think of:
>
>     __attribute__((__warn_unused_result__))
>
> but not in a useful form for actually generating different code.
>
> Some kind of "__builtin_if_used(x,y)" where it picks 'x' if the value
> is used, and 'y' if it isn't would be lovely for this.
>
> Then you could do things like
>
>      #define my_helper(x) \
>          __builtin_if_used( \
>                  full_semantics(x), \
>                  simpler_version(x))
>
> when having a return value means extra work and most people don't care.
>
> Maybe it exists in some form that I haven't thought of?
>
> Any compiler people around?
>

Sorry for the delay in reply, but I was checking with some *compiler* 
folks and unfortunately couldn't find an equivalent of the above 
*helper* support.
I am not a compiler expert though and relied mostly on my digging of the 
'gcc' code and advise from folks working in compiler world.

In case there are no new suggestions, I think we can go ahead with 
"strscpy_pad()" or "get_task_array()" in place of "get_task_comm()" 
which is implement in the following manner:

    static __always_inline void
        __cstr_array_copy(char *dst,
             const char *src, __kernel_size_t size)
    {
         memcpy(dst, src, size);
         dst[size] = 0;
    }

    #define get_task_array(a,b) \
       __cstr_array_copy(dst, src, __must_be_array(dst))

Please let me know.

Thanks,
Bhupesh