fs/readdir.c | 94 +++++++++++++++++++++------------------------------- fs/select.c | 35 ++++++++----------- 2 files changed, 51 insertions(+), 78 deletions(-)
Scoped user access reduces code complexity and seamlessly bring
masked user access on architectures that support it.
Replace user_access_begin/user_access_end blocks by
scoped user access.
Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
v2:
- Fix build failure with CONFIG_COMPAT
- Handled checkpatch.pl output
v3:
- Fix again build failure with CONFIG_COMPAT. I was obviously too tired when I sent out v2.
---
fs/readdir.c | 94 +++++++++++++++++++++-------------------------------
fs/select.c | 35 ++++++++-----------
2 files changed, 51 insertions(+), 78 deletions(-)
diff --git a/fs/readdir.c b/fs/readdir.c
index 73707b6816e9..644e2b69ae62 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -198,18 +198,14 @@ static bool fillonedir(struct dir_context *ctx, const char *name, int namlen,
}
buf->result++;
dirent = buf->dirent;
- if (!user_write_access_begin(dirent,
- (unsigned long)(dirent->d_name + namlen + 1) -
- (unsigned long)dirent))
- goto efault;
- unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
- unsafe_put_user(offset, &dirent->d_offset, efault_end);
- unsafe_put_user(namlen, &dirent->d_namlen, efault_end);
- unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
- user_write_access_end();
+ scoped_user_write_access_size(dirent, (unsigned long)(dirent->d_name + namlen + 1) -
+ (unsigned long)dirent, efault) {
+ unsafe_put_user(d_ino, &dirent->d_ino, efault);
+ unsafe_put_user(offset, &dirent->d_offset, efault);
+ unsafe_put_user(namlen, &dirent->d_namlen, efault);
+ unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault);
+ }
return true;
-efault_end:
- user_write_access_end();
efault:
buf->result = -EFAULT;
return false;
@@ -287,23 +283,19 @@ static bool filldir(struct dir_context *ctx, const char *name, int namlen,
return false;
dirent = buf->current_dir;
prev = (void __user *) dirent - prev_reclen;
- if (!user_write_access_begin(prev, reclen + prev_reclen))
- goto efault;
-
- /* This might be 'dirent->d_off', but if so it will get overwritten */
- unsafe_put_user(offset, &prev->d_off, efault_end);
- unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
- unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
- unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
- unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
- user_write_access_end();
+ scoped_user_write_access_size(prev, reclen + prev_reclen, efault) {
+ /* This might be 'dirent->d_off', but if so it will get overwritten */
+ unsafe_put_user(offset, &prev->d_off, efault);
+ unsafe_put_user(d_ino, &dirent->d_ino, efault);
+ unsafe_put_user(reclen, &dirent->d_reclen, efault);
+ unsafe_put_user(d_type, (char __user *)dirent + reclen - 1, efault);
+ unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault);
+ }
buf->current_dir = (void __user *)dirent + reclen;
buf->prev_reclen = reclen;
ctx->count -= reclen;
return true;
-efault_end:
- user_write_access_end();
efault:
buf->error = -EFAULT;
return false;
@@ -371,24 +363,20 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
return false;
dirent = buf->current_dir;
prev = (void __user *)dirent - prev_reclen;
- if (!user_write_access_begin(prev, reclen + prev_reclen))
- goto efault;
-
- /* This might be 'dirent->d_off', but if so it will get overwritten */
- unsafe_put_user(offset, &prev->d_off, efault_end);
- unsafe_put_user(ino, &dirent->d_ino, efault_end);
- unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
- unsafe_put_user(d_type, &dirent->d_type, efault_end);
- unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
- user_write_access_end();
+ scoped_user_write_access_size(prev, reclen + prev_reclen, efault) {
+ /* This might be 'dirent->d_off', but if so it will get overwritten */
+ unsafe_put_user(offset, &prev->d_off, efault);
+ unsafe_put_user(ino, &dirent->d_ino, efault);
+ unsafe_put_user(reclen, &dirent->d_reclen, efault);
+ unsafe_put_user(d_type, &dirent->d_type, efault);
+ unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault);
+ }
buf->prev_reclen = reclen;
buf->current_dir = (void __user *)dirent + reclen;
ctx->count -= reclen;
return true;
-efault_end:
- user_write_access_end();
efault:
buf->error = -EFAULT;
return false;
@@ -460,18 +448,14 @@ static bool compat_fillonedir(struct dir_context *ctx, const char *name,
}
buf->result++;
dirent = buf->dirent;
- if (!user_write_access_begin(dirent,
- (unsigned long)(dirent->d_name + namlen + 1) -
- (unsigned long)dirent))
- goto efault;
- unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
- unsafe_put_user(offset, &dirent->d_offset, efault_end);
- unsafe_put_user(namlen, &dirent->d_namlen, efault_end);
- unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
- user_write_access_end();
+ scoped_user_write_access_size(dirent, (unsigned long)(dirent->d_name + namlen + 1) -
+ (unsigned long)dirent, efault) {
+ unsafe_put_user(d_ino, &dirent->d_ino, efault);
+ unsafe_put_user(offset, &dirent->d_offset, efault);
+ unsafe_put_user(namlen, &dirent->d_namlen, efault);
+ unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault);
+ }
return true;
-efault_end:
- user_write_access_end();
efault:
buf->result = -EFAULT;
return false;
@@ -543,22 +527,18 @@ static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen
return false;
dirent = buf->current_dir;
prev = (void __user *) dirent - prev_reclen;
- if (!user_write_access_begin(prev, reclen + prev_reclen))
- goto efault;
-
- unsafe_put_user(offset, &prev->d_off, efault_end);
- unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
- unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
- unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
- unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
- user_write_access_end();
+ scoped_user_write_access_size(prev, reclen + prev_reclen, efault) {
+ unsafe_put_user(offset, &prev->d_off, efault);
+ unsafe_put_user(d_ino, &dirent->d_ino, efault);
+ unsafe_put_user(reclen, &dirent->d_reclen, efault);
+ unsafe_put_user(d_type, (char __user *)dirent + reclen - 1, efault);
+ unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault);
+ }
buf->prev_reclen = reclen;
buf->current_dir = (void __user *)dirent + reclen;
ctx->count -= reclen;
return true;
-efault_end:
- user_write_access_end();
efault:
buf->error = -EFAULT;
return false;
diff --git a/fs/select.c b/fs/select.c
index e0244dbe4429..75978b18f48f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -1004,17 +1004,17 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
fdcount = do_poll(head, &table, end_time);
poll_freewait(&table);
- if (!user_write_access_begin(ufds, nfds * sizeof(*ufds)))
- goto out_fds;
+ scoped_user_write_access_size(ufds, nfds * sizeof(*ufds), out_fds) {
+ struct pollfd __user *_ufds = ufds;
- for (walk = head; walk; walk = walk->next) {
- struct pollfd *fds = walk->entries;
- unsigned int j;
+ for (walk = head; walk; walk = walk->next) {
+ struct pollfd *fds = walk->entries;
+ unsigned int j;
- for (j = walk->len; j; fds++, ufds++, j--)
- unsafe_put_user(fds->revents, &ufds->revents, Efault);
- }
- user_write_access_end();
+ for (j = walk->len; j; fds++, _ufds++, j--)
+ unsafe_put_user(fds->revents, &_ufds->revents, out_fds);
+ }
+ }
err = fdcount;
out_fds:
@@ -1026,11 +1026,6 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
}
return err;
-
-Efault:
- user_write_access_end();
- err = -EFAULT;
- goto out_fds;
}
static long do_restart_poll(struct restart_block *restart_block)
@@ -1338,15 +1333,13 @@ static inline int get_compat_sigset_argpack(struct compat_sigset_argpack *to,
struct compat_sigset_argpack __user *from)
{
if (from) {
- if (!user_read_access_begin(from, sizeof(*from)))
- return -EFAULT;
- unsafe_get_user(to->p, &from->p, Efault);
- unsafe_get_user(to->size, &from->size, Efault);
- user_read_access_end();
+ scoped_user_read_access(from, efault) {
+ unsafe_get_user(to->p, &from->p, efault);
+ unsafe_get_user(to->size, &from->size, efault);
+ }
}
return 0;
-Efault:
- user_read_access_end();
+efault:
return -EFAULT;
}
--
2.49.0
On Mon, 16 Mar 2026 at 01:53, Christophe Leroy (CS GROUP)
<chleroy@kernel.org> wrote:
>
> - if (!user_write_access_begin(dirent,
> - (unsigned long)(dirent->d_name + namlen + 1) -
> - (unsigned long)dirent))
> - goto efault;
This was already pretty unreadable (my bad), but..
> + scoped_user_write_access_size(dirent, (unsigned long)(dirent->d_name + namlen + 1) -
> + (unsigned long)dirent, efault) {
.. in my opinion, this is even worse. It's a 90+ character long line
(or something), *and* it continues on the next line.
Yes, yes, the old code was disgusting too, but when changing it for
something that is supposed to be easier to read, please let's make it
*really* easier to read.
(And yes, there's another case of this same pattern a bit later).
Adding a helper inline function like dirent_size() might do it. And I
think it might as well be cleaned up while at it, and make it be
something like
static inline size_t dirent_size(int namelen)
{
return offsetof(struct linux_dirent, d_name) + namelen + 1;
}
[ Making things doubly sad, the size shouldn't even be *used* in sane
situations, because the user access validity is often checked by only
checking the beginning,
The x86-64 __access_ok() does actually do that optimization, but
only if we can statically see that the thing is smaller than one page,
which in this case it can't, because while namelen is range checked,
it is allowed to be up to PATH_MAX in size, so even if the compiler
does do the full value range analysis, we'd need to relax that
__access_ok() check a bit more ]
Hmm? Can you do at least that dirent_size() kind of cleanup?
Linus
Le 16/03/2026 à 18:12, Linus Torvalds a écrit :
> On Mon, 16 Mar 2026 at 01:53, Christophe Leroy (CS GROUP)
> <chleroy@kernel.org> wrote:
>>
>> - if (!user_write_access_begin(dirent,
>> - (unsigned long)(dirent->d_name + namlen + 1) -
>> - (unsigned long)dirent))
>> - goto efault;
>
> This was already pretty unreadable (my bad), but..
>
>> + scoped_user_write_access_size(dirent, (unsigned long)(dirent->d_name + namlen + 1) -
>> + (unsigned long)dirent, efault) {
>
> .. in my opinion, this is even worse. It's a 90+ character long line
> (or something), *and* it continues on the next line.
>
> Yes, yes, the old code was disgusting too, but when changing it for
> something that is supposed to be easier to read, please let's make it
> *really* easier to read.
>
> (And yes, there's another case of this same pattern a bit later).
The other pattern looks similar but is not exactly the same.
>
> Adding a helper inline function like dirent_size() might do it. And I
> think it might as well be cleaned up while at it, and make it be
> something like
>
> static inline size_t dirent_size(int namelen)
> {
> return offsetof(struct linux_dirent, d_name) + namelen + 1;
> }
>
...
>
> Hmm? Can you do at least that dirent_size() kind of cleanup?
Thanks for the suggestion.
I went for a local var alternative, see v4. A helper would be of no help
as the two patterns are not exactly the same:
size_t size = offsetof(struct old_linux_dirent, d_name) + namlen + 1;
size_t size = offsetof(struct compat_old_linux_dirent, d_name) + namlen
+ 1;
Christophe
On Wed, 18 Mar 2026 at 05:29, Christophe Leroy (CS GROUP)
<chleroy@kernel.org> wrote:
>
> I went for a local var alternative, see v4. A helper would be of no help
> as the two patterns are not exactly the same:
>
> size_t size = offsetof(struct old_linux_dirent, d_name) + namlen + 1;
>
> size_t size = offsetof(struct compat_old_linux_dirent, d_name) + namlen + 1;
Your v4 looks fine to me, but admittedly a helper in the form of a
macro could have worked just fine:
#define dirent_size(dirent, len) offsetof(typeof(dirent), d_name[len])
where I intentionally did *not* do the "+1", because some of our users
want to use "+2" because they put the d_type information in the byte
after the name (because they didn't have a 'd_type' field originally,
but did have the 'reclen' field).
Because we actually have that pattern more than two times, just in
different guises.
Something untested like the attached...
Linus
On Wed, 18 Mar 2026 at 08:49, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> #define dirent_size(dirent, len) offsetof(typeof(dirent), d_name[len])
That 'typeof(dirent)' needs to be 'typeof(*(dirent))' to be convenient.
It was correct in the patch I attached, but I'll just point it out anyway.
And we actually have a helper macro for that: struct_offset(). Which
wasn't what I used in that attached patch, but *should* have been.
IOW, the macro should look something like
#define dirent_size(dirent, len) struct_offset(dirent, d_name[len])
instead. That looks fairly clean, no?
Linus
Le 18/03/2026 à 16:53, Linus Torvalds a écrit :
> On Wed, 18 Mar 2026 at 08:49, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
>>
>> #define dirent_size(dirent, len) offsetof(typeof(dirent), d_name[len])
>
> That 'typeof(dirent)' needs to be 'typeof(*(dirent))' to be convenient.
>
> It was correct in the patch I attached, but I'll just point it out anyway.
>
> And we actually have a helper macro for that: struct_offset(). Which
> wasn't what I used in that attached patch, but *should* have been.
>
> IOW, the macro should look something like
>
> #define dirent_size(dirent, len) struct_offset(dirent, d_name[len])
>
Sending v5 (series of 2) with your suggested patch as patch 1.
Following feedback from David which I tend to agree with, I left it as
you did initialy.
struct_offset() has only one caller and I don't feel it has much added
value compared to offsetof(typeof(),) pattern which is more explicit and
already used 144 times in the kernel:
$ git grep "offsetof(typeof(" | wc -l
144
Christophe
On Wed, 18 Mar 2026 08:53:52 -0700 Linus Torvalds <torvalds@linux-foundation.org> wrote: > On Wed, 18 Mar 2026 at 08:49, Linus Torvalds > <torvalds@linux-foundation.org> wrote: > > > > #define dirent_size(dirent, len) offsetof(typeof(dirent), d_name[len]) > > That 'typeof(dirent)' needs to be 'typeof(*(dirent))' to be convenient. > > It was correct in the patch I attached, but I'll just point it out anyway. > > And we actually have a helper macro for that: struct_offset(). Which > wasn't what I used in that attached patch, but *should* have been. > > IOW, the macro should look something like > > #define dirent_size(dirent, len) struct_offset(dirent, d_name[len]) > > instead. That looks fairly clean, no? And unnecessary - the struct_offset() isn't that much longer. Actually, from a readability point of view even struct_offset() almost makes things worse. If you are being paranoid it is another definition to find and check. There isn't that much difference in the lengths: dirent_size(dirent, len) struct_offset(dirent, d_name[len]) offsetof(typeof(*dirent), d_name[len]) and the last one really does tell you what it being calculated. I do remember one compiler (and I thought it was gcc, but it might only have been msvc) that required that offsetof() always generate a constant. ISTR the C language requires that - which makes all the above invalid. > > Linus
On Mon, 16 Mar 2026 10:12:23 -0700
Linus Torvalds <torvalds@linux-foundation.org> wrote:
> On Mon, 16 Mar 2026 at 01:53, Christophe Leroy (CS GROUP)
> <chleroy@kernel.org> wrote:
> >
> > - if (!user_write_access_begin(dirent,
> > - (unsigned long)(dirent->d_name + namlen + 1) -
> > - (unsigned long)dirent))
> > - goto efault;
>
> This was already pretty unreadable (my bad), but..
>
> > + scoped_user_write_access_size(dirent, (unsigned long)(dirent->d_name + namlen + 1) -
> > + (unsigned long)dirent, efault) {
>
> .. in my opinion, this is even worse. It's a 90+ character long line
> (or something), *and* it continues on the next line.
>
> Yes, yes, the old code was disgusting too, but when changing it for
> something that is supposed to be easier to read, please let's make it
> *really* easier to read.
>
> (And yes, there's another case of this same pattern a bit later).
>
> Adding a helper inline function like dirent_size() might do it. And I
> think it might as well be cleaned up while at it, and make it be
> something like
>
> static inline size_t dirent_size(int namelen)
> {
> return offsetof(struct linux_dirent, d_name) + namelen + 1;
> }
That definition would fit one one line (possibly as a continuation line).
> [ Making things doubly sad, the size shouldn't even be *used* in sane
> situations, because the user access validity is often checked by only
> checking the beginning,
>
> The x86-64 __access_ok() does actually do that optimization, but
> only if we can statically see that the thing is smaller than one page,
> which in this case it can't, because while namelen is range checked,
> it is allowed to be up to PATH_MAX in size, so even if the compiler
> does do the full value range analysis, we'd need to relax that
> __access_ok() check a bit more ]
Except is doesn't matter for x86-64 because this is the 'masked' case
where the accesses are required to be 'reasonably sequential'.
Although requiring a guard page on all architectures would make life
simpler overall.
I'm not even sure that some of the reasons that x86-64 has to have
a guard page (to stop speculative execution and system call return
to non-canonical addresses) don't have potential counterparts on x86-32
and other architectures.
For x86-32 I believe that historically the user stack was right at the
top of the user address space (or the constant wouldn't be STACK_TOP).
So forcing a guard page would realign the stack.
But I've not got an x86-32 system setup (I've got plenty of hardware)
so see what actually happens or test any hacking.
I'm also not sure where the vdso ends up - I'd have thought it might
be right at the top?
David
>
> Hmm? Can you do at least that dirent_size() kind of cleanup?
>
> Linus
© 2016 - 2026 Red Hat, Inc.