include/linux/overflow.h | 12 ++++++++++++ kernel/trace/trace.c | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-)
From: Steven Rostedt <rostedt@goodmis.org>
The trace_marker_raw file in tracefs takes a buffer from user space that
contains an id as well as a raw data string which is usually a binary
structure. The structure used has the following:
struct raw_data_entry {
struct trace_entry ent;
unsigned int id;
char buf[];
};
Since the passed in "cnt" variable is both the size of buf as well as the
size of id, the code to allocate the location on the ring buffer had:
size = struct_size(entry, buf, cnt - sizeof(entry->id));
Which is quite ugly and hard to understand. Instead, add a helper macro
called struct_offset() which then changes the above to a simple and easy
to understand:
size = struct_offset(entry, id) + cnt;
This will likely come in handy for other use cases too.
Link: https://lore.kernel.org/all/CAHk-=whYZVoEdfO1PmtbirPdBMTV9Nxt9f09CK0k6S+HJD3Zmg@mail.gmail.com/
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
include/linux/overflow.h | 12 ++++++++++++
kernel/trace/trace.c | 2 +-
2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 725f95f7e416..736f633b2d5f 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -458,6 +458,18 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
#define struct_size_t(type, member, count) \
struct_size((type *)NULL, member, count)
+/**
+ * struct_offset() - Calculate the offset of a member within a struct
+ * @p: Pointer to the struct
+ * @member: Name of the member to get the offset of
+ *
+ * Calculates the offset of a particular @member of the structure pointed
+ * to by @p.
+ *
+ * Return: number of bytes to the location of @member.
+ */
+#define struct_offset(p, member) (offsetof(typeof(*(p)), member))
+
/**
* __DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
* Enables caller macro to pass arbitrary trailing expressions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f59645ab5140..1f734e289368 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7649,7 +7649,7 @@ static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
size_t size;
/* cnt includes both the entry->id and the data behind it. */
- size = struct_size(entry, buf, cnt - sizeof(entry->id));
+ size = struct_offset(entry, id) + cnt;
buffer = tr->array_buffer.buffer;
--
2.51.0
On Wed, Nov 26, 2025 at 02:52:49PM -0500, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@goodmis.org>
>
> The trace_marker_raw file in tracefs takes a buffer from user space that
> contains an id as well as a raw data string which is usually a binary
> structure. The structure used has the following:
>
> struct raw_data_entry {
> struct trace_entry ent;
> unsigned int id;
> char buf[];
> };
>
> Since the passed in "cnt" variable is both the size of buf as well as the
> size of id, the code to allocate the location on the ring buffer had:
>
> size = struct_size(entry, buf, cnt - sizeof(entry->id));
>
> Which is quite ugly and hard to understand. Instead, add a helper macro
> called struct_offset() which then changes the above to a simple and easy
> to understand:
>
> size = struct_offset(entry, id) + cnt;
>
> This will likely come in handy for other use cases too.
>
> Link: https://lore.kernel.org/all/CAHk-=whYZVoEdfO1PmtbirPdBMTV9Nxt9f09CK0k6S+HJD3Zmg@mail.gmail.com/
>
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Works for me!
Reviewed-by: Kees Cook <kees@kernel.org>
> ---
> include/linux/overflow.h | 12 ++++++++++++
> kernel/trace/trace.c | 2 +-
> 2 files changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/overflow.h b/include/linux/overflow.h
> index 725f95f7e416..736f633b2d5f 100644
> --- a/include/linux/overflow.h
> +++ b/include/linux/overflow.h
> @@ -458,6 +458,18 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
> #define struct_size_t(type, member, count) \
> struct_size((type *)NULL, member, count)
>
> +/**
> + * struct_offset() - Calculate the offset of a member within a struct
> + * @p: Pointer to the struct
> + * @member: Name of the member to get the offset of
> + *
> + * Calculates the offset of a particular @member of the structure pointed
> + * to by @p.
> + *
> + * Return: number of bytes to the location of @member.
> + */
> +#define struct_offset(p, member) (offsetof(typeof(*(p)), member))
I wonder if the kerndoc for this and offsetof() should reference each
other? "For a type instead of a pointer, use offsetof()" etc...
--
Kees Cook
On Wed, 26 Nov 2025 23:58:01 -0800 Kees Cook <kees@kernel.org> wrote: > > +/** > > + * struct_offset() - Calculate the offset of a member within a struct > > + * @p: Pointer to the struct > > + * @member: Name of the member to get the offset of > > + * > > + * Calculates the offset of a particular @member of the structure pointed > > + * to by @p. > > + * > > + * Return: number of bytes to the location of @member. > > + */ > > +#define struct_offset(p, member) (offsetof(typeof(*(p)), member)) > > I wonder if the kerndoc for this and offsetof() should reference each > other? "For a type instead of a pointer, use offsetof()" etc... I know I pushed this to my for-next branch already, but it's the top patch. Looking at my code, I actually have a lot of places that use the offsetof() for a structure variable and not a pointer to a structure. Thus, I wonder if it is better to have this as: #define struct_offset(s, member) (offsetof(typeof(s), member)) And then for pointers we have: size = struct_offset(*entry, id) + cnt; If you forget the '*' it will error with a complaint that entry is not a structure type. Then I could make changes like this: diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1244d2c5c384..55f1bdab4ffa 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -587,19 +587,19 @@ int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq trace_seq_printf(s, "\tfield: local_t commit;\t" "offset:%u;\tsize:%u;\tsigned:%u;\n", - (unsigned int)offsetof(typeof(field), commit), + (unsigned int)struct_offset(field, commit), (unsigned int)sizeof(field.commit), (unsigned int)is_signed_type(long)); trace_seq_printf(s, "\tfield: int overwrite;\t" "offset:%u;\tsize:%u;\tsigned:%u;\n", - (unsigned int)offsetof(typeof(field), commit), + (unsigned int)struct_offset(field, commit), 1, (unsigned int)is_signed_type(long)); trace_seq_printf(s, "\tfield: char data;\t" "offset:%u;\tsize:%u;\tsigned:%u;\n", - (unsigned int)offsetof(typeof(field), data), + (unsigned int)struct_offset(field, data), (unsigned int)buffer->subbuf_size, (unsigned int)is_signed_type(char)); I would have a lot more places I can make this update then if struct_offset() took a pointer instead of the struct itself. As adding a '*' isn't ugly I think I like this way better. What are your thoughts? -- Steve
On November 27, 2025 5:43:42 PM PST, Steven Rostedt <rostedt@goodmis.org> wrote: >On Wed, 26 Nov 2025 23:58:01 -0800 >Kees Cook <kees@kernel.org> wrote: > >> > +/** >> > + * struct_offset() - Calculate the offset of a member within a struct >> > + * @p: Pointer to the struct >> > + * @member: Name of the member to get the offset of >> > + * >> > + * Calculates the offset of a particular @member of the structure pointed >> > + * to by @p. >> > + * >> > + * Return: number of bytes to the location of @member. >> > + */ >> > +#define struct_offset(p, member) (offsetof(typeof(*(p)), member)) >> >> I wonder if the kerndoc for this and offsetof() should reference each >> other? "For a type instead of a pointer, use offsetof()" etc... > >I know I pushed this to my for-next branch already, but it's the top >patch. Looking at my code, I actually have a lot of places that use the >offsetof() for a structure variable and not a pointer to a structure. > >Thus, I wonder if it is better to have this as: > >#define struct_offset(s, member) (offsetof(typeof(s), member)) I'd rather it keep the same API style as struct_size() if it's going to share the naming style. If you have an instance and not a pointer, just slap on a & :) -- Kees Cook
On Thu, 27 Nov 2025 18:00:01 -0800 Kees Cook <kees@kernel.org> wrote: > I'd rather it keep the same API style as struct_size() if it's going to share the naming style. Fair enough. Then I won't change it. > > If you have an instance and not a pointer, just slap on a & :) :-p I guess that would work. Although it will kinda look a bit strange. At least to me. -- Steve
On Thu, 27 Nov 2025 at 19:27, Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Thu, 27 Nov 2025 18:00:01 -0800
> Kees Cook <kees@kernel.org> wrote:
>
> > If you have an instance and not a pointer, just slap on a & :)
>
> :-p
>
> I guess that would work. Although it will kinda look a bit strange.
> At least to me.
It does look a tiny bit odd, and it's not always wonderful, but there
are advantages to conceptually passing in the pointer rather than the
actual instance.
The two main advantages are that
(a) a pointer is *usually* what you have. Not always, no, but it's
the common thing in the kernel. We have a lot fewer cases of 'struct
on stack' than we have 'pointer to struct somewhere'.
but even more so:
(b) unless the macro makes it very *very* obvious in the name that it
only looks at the *type* of the argument, dereferencing the pointer
can easily look absolutely horrendous.
We had an example of that (b) case recently [*] where the code looked like this:
typeof(VAR) *__obj_ptr = NULL;
....__can_set_flex_counter(__obj_ptr->FAM, __count) ...
and note that combination of "NULL pointer" and "dereference that pointer".
The code was *correct*, because that '__can_set_flex_counter()' macro
in that case only looked at the *type* of the first argument, so the
dereference didn't "really" happen as an actual access to any memory
behind the pointer, but it doesn't make it really look any better. We
really don't want to have those kinds of visual patterns, it just
makes people go "WHAA?!??".
Now, with proper naming, you can make that obvious. You can make it
really clear that "this is looking just at the type of that argument,
not the value of the expression it is given".
But honestly, I think that "really clear" comes close to just having
to have that "__typeof__()" pattern.
Can people learn that a name like "struct_offset()" always just takes
the type of the first argument, and get used to patterns like that?
Sure. Absolutely. But I'm not sure we really want people to learn that
pattern if there are alternatives.
So I do think that the "struct_size()" syntax - which takes the
*pointer* to the object and then does "sizeof(*(p))" internally - is
the better syntax. Not just because we *do* typically have the pointer
as the variable we are trying to get the size of, but exactly because
we do not want it to visually look like a dereference of a pointer
that may not have a valid value yet.
Put another way: yes, it's slightly conceptually odd to pass in the
pointer in order to actually get the size (or offset of a member, in
your case) of the actual *instance*, but I think "slightly
conceptually odd" is a small price to pay to not it look more regular.
And yes, I obviously do realize that 'sizeof()' and 'offsetof' and
'__alignof__' all take the actual type, not the pointer to the type.
But they are very special operations in C, and we should not make
other operations act like them unless they have some very clear and
obvious clue.
For example, we do have 'struct_size_t()' that takes the actual type,
not the pointer to the type. It _only_ takes an explicit type, and
cannot even take an instance of an object (although we could *make* it
take an instance by adding a '__typeof__()' into that macro). But at
least the '_t' part hopefully makes that obvious, and even if we did
make it take an instance, you'd never use it on the above kind of
pointer without a valid value, because then you'd use the non-_t()
version.
So that 'xyz_t()' version then gets used for things where you
explicitly state the type, and it all looks fairly obvious, eg:
len = struct_size_t(struct pid, numbers, level + 1);
doesn't get that "WHAA?!??!" kind of reaction.
[ And so I actually think it's good that it only takes an explicit
type - if you really have an instance, I think it's better to use just
"struct_size(&instance, ...)" even if we _could_ easily make syntax
like "struct_size_t(instance, ...)" work. ]
Linus
[*] https://lore.kernel.org/all/CAHk-=wiNnECns4B3qxRsCykkHwzovT+3wG738fUhq5E+3Lxxbg@mail.gmail.com/
On Thu, 27 Nov 2025 21:35:35 -0800 Linus Torvalds <torvalds@linux-foundation.org> wrote: > So that 'xyz_t()' version then gets used for things where you > explicitly state the type, and it all looks fairly obvious, eg: > > len = struct_size_t(struct pid, numbers, level + 1); > > doesn't get that "WHAA?!??!" kind of reaction. > > [ And so I actually think it's good that it only takes an explicit > type - if you really have an instance, I think it's better to use just > "struct_size(&instance, ...)" even if we _could_ easily make syntax > like "struct_size_t(instance, ...)" work. ] I was thinking about adding a struct_offset_t() but then I noticed that struct_size_t() requires adding the type as it is for just getting the size of the struct without using a variable. Whereas, I would have preferred the struct_offset_t() to use a variable that's not a pointer where typeof() is used. But for my use cases, I can just add a '&' to struct_offset(), as if struct_offset_t() were to take a type, it is no different than offsetof(). -- Steve
On Wed, 26 Nov 2025 14:52:49 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:
> This will likely come in handy for other use cases too.
Just looking at kernel/trace, I can see the following use cases too:
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index e21176f396d5..b6a4c2050127 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1482,7 +1482,7 @@ static void blk_trace_synthesize_old_trace(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
- const int offset = offsetof(struct blk_io_trace, sector);
+ const int offset = struct_offset(t, sector);
struct blk_io_trace old = {
.magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
.time = iter->ts,
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 17c75cf2348e..7ff2a23e23c9 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -955,7 +955,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
int cpu = iter->cpu;
int i;
- args_size = iter->ent_size - offsetof(struct ftrace_graph_ent_entry, args);
+ args_size = iter->ent_size - struct_offset(entry, args);
graph_ret = &ret_entry->ret;
call = &entry->graph_ent;
@@ -1048,7 +1048,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
trace_seq_printf(s, "%ps", (void *)func);
- args_size = iter->ent_size - offsetof(struct ftrace_graph_ent_entry, args);
+ args_size = iter->ent_size - struct_offset(entry, args);
if (args_size >= FTRACE_REGS_MAX_ARGS * sizeof(long))
print_function_args(s, FGRAPH_ENTRY_ARGS(entry), func);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index ebbab3e9622b..30113756d56f 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1167,7 +1167,7 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags,
trace_assign_type(field, iter->ent);
- args_size = iter->ent_size - offsetof(struct ftrace_entry, args);
+ args_size = iter->ent_size - struct_offset(field, args);
if (args_size >= FTRACE_REGS_MAX_ARGS * sizeof(long))
args = field->args;
else
@@ -1799,7 +1799,7 @@ static enum print_line_t trace_raw_data(struct trace_iterator *iter, int flags,
trace_seq_printf(&iter->seq, "# %x buf:", field->id);
- for (i = 0; i < iter->ent_size - offsetof(struct raw_data_entry, buf); i++)
+ for (i = 0; i < iter->ent_size - struct_offset(field, buf); i++)
trace_seq_printf(&iter->seq, " %02x",
(unsigned char)field->buf[i]);
It better associates the variable with its member offset.
-- Steve
© 2016 - 2025 Red Hat, Inc.