[PATCH] stat: fix inconsistency between struct stat and struct compat_stat

Mikulas Patocka posted 1 patch 4 years ago
arch/x86/include/asm/compat.h |    6 ++----
fs/stat.c                     |   19 ++++++++++---------
2 files changed, 12 insertions(+), 13 deletions(-)
[PATCH] stat: fix inconsistency between struct stat and struct compat_stat
Posted by Mikulas Patocka 4 years ago


On Mon, 11 Apr 2022, Linus Torvalds wrote:

> On Mon, Apr 11, 2022 at 7:13 AM Mikulas Patocka <mpatocka@redhat.com> wrote:
> >
> > Should we perhaps hash the number, take 16 bits of the hash and hope
> > than the collision won't happen?
> 
> That would "work", but I think it would be incredibly annoying to
> users with basically random results.
> 
> I think the solution is to just put the bits in the high bits. Yes,
> they might be masked off if people use 'MAJOR()' to pick them out, but
> the common "compare st_dev and st_ino" model at least works. That's
> the one that wants unique numbers.
> 
> > For me, the failure happens in cp_compat_stat (I have a 64-bit kernel). In
> > struct compat_stat in arch/x86/include/asm/compat.h, st_dev and st_rdev
> > are compat_dev_t which is 16-bit. But they are followed by 16-bit
> > paddings, so they could be extended.
> 
> Ok, that actually looks like a bug.
> 
> The compat structure should match the native structure.  Those "u16
> __padX" fields seem to be just a symptom of the bug.
> 
> The only user of that compat_stat structure is the kernel, so that
> should just be fixed.
> 
> Of course, who knows what the libraries have done, so user space could
> still have screwed up.

Here I'm sending a patch that makes struct compat_stat match struct stat.



stat: fix inconsistency between struct stat and struct compat_stat

struct stat (defined in arch/x86/include/uapi/asm/stat.h) has 32-bit
st_dev and st_rdev; struct compat_stat (defined in
arch/x86/include/asm/compat.h) has 16-bit st_dev and st_rdev followed by a
16-bit padding. This patch fixes struct compat_stat to match struct stat.

Note that we can't change compat_dev_t because it is used by
compat_loop_info.

Also, if the st_dev and st_rdev values are 32-bit, we don't have to use
old_valid_dev to test if the value fits into them. This fixes -EOVERFLOW
on filesystems that are on NVMe because NVMe uses the major number 259.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 arch/x86/include/asm/compat.h |    6 ++----
 fs/stat.c                     |   19 ++++++++++---------
 2 files changed, 12 insertions(+), 13 deletions(-)

Index: linux-5.17.2/arch/x86/include/asm/compat.h
===================================================================
--- linux-5.17.2.orig/arch/x86/include/asm/compat.h	2022-01-21 10:29:12.000000000 +0100
+++ linux-5.17.2/arch/x86/include/asm/compat.h	2022-04-12 11:27:14.000000000 +0200
@@ -28,15 +28,13 @@ typedef u16		compat_ipc_pid_t;
 typedef __kernel_fsid_t	compat_fsid_t;
 
 struct compat_stat {
-	compat_dev_t	st_dev;
-	u16		__pad1;
+	u32		st_dev;
 	compat_ino_t	st_ino;
 	compat_mode_t	st_mode;
 	compat_nlink_t	st_nlink;
 	__compat_uid_t	st_uid;
 	__compat_gid_t	st_gid;
-	compat_dev_t	st_rdev;
-	u16		__pad2;
+	u32		st_rdev;
 	u32		st_size;
 	u32		st_blksize;
 	u32		st_blocks;
Index: linux-5.17.2/fs/stat.c
===================================================================
--- linux-5.17.2.orig/fs/stat.c	2022-04-12 10:39:46.000000000 +0200
+++ linux-5.17.2/fs/stat.c	2022-04-12 10:58:28.000000000 +0200
@@ -334,9 +334,6 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd,
 #  define choose_32_64(a,b) b
 #endif
 
-#define valid_dev(x)  choose_32_64(old_valid_dev(x),true)
-#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x)
-
 #ifndef INIT_STRUCT_STAT_PADDING
 #  define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st))
 #endif
@@ -345,7 +342,9 @@ static int cp_new_stat(struct kstat *sta
 {
 	struct stat tmp;
 
-	if (!valid_dev(stat->dev) || !valid_dev(stat->rdev))
+	if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev))
+		return -EOVERFLOW;
+	if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev))
 		return -EOVERFLOW;
 #if BITS_PER_LONG == 32
 	if (stat->size > MAX_NON_LFS)
@@ -353,7 +352,7 @@ static int cp_new_stat(struct kstat *sta
 #endif
 
 	INIT_STRUCT_STAT_PADDING(tmp);
-	tmp.st_dev = encode_dev(stat->dev);
+	tmp.st_dev = new_encode_dev(stat->dev);
 	tmp.st_ino = stat->ino;
 	if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
 		return -EOVERFLOW;
@@ -363,7 +362,7 @@ static int cp_new_stat(struct kstat *sta
 		return -EOVERFLOW;
 	SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
 	SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
-	tmp.st_rdev = encode_dev(stat->rdev);
+	tmp.st_rdev = new_encode_dev(stat->rdev);
 	tmp.st_size = stat->size;
 	tmp.st_atime = stat->atime.tv_sec;
 	tmp.st_mtime = stat->mtime.tv_sec;
@@ -644,11 +643,13 @@ static int cp_compat_stat(struct kstat *
 {
 	struct compat_stat tmp;
 
-	if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
+	if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev))
+		return -EOVERFLOW;
+	if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev))
 		return -EOVERFLOW;
 
 	memset(&tmp, 0, sizeof(tmp));
-	tmp.st_dev = old_encode_dev(stat->dev);
+	tmp.st_dev = new_encode_dev(stat->dev);
 	tmp.st_ino = stat->ino;
 	if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
 		return -EOVERFLOW;
@@ -658,7 +659,7 @@ static int cp_compat_stat(struct kstat *
 		return -EOVERFLOW;
 	SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
 	SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
-	tmp.st_rdev = old_encode_dev(stat->rdev);
+	tmp.st_rdev = new_encode_dev(stat->rdev);
 	if ((u64) stat->size > MAX_NON_LFS)
 		return -EOVERFLOW;
 	tmp.st_size = stat->size;
Re: [PATCH] stat: fix inconsistency between struct stat and struct compat_stat
Posted by Linus Torvalds 4 years ago
On Mon, Apr 11, 2022 at 11:41 PM Mikulas Patocka <mpatocka@redhat.com> wrote:
>
> Also, if the st_dev and st_rdev values are 32-bit, we don't have to use
> old_valid_dev to test if the value fits into them. This fixes -EOVERFLOW
> on filesystems that are on NVMe because NVMe uses the major number 259.

The problem with this part of the patch is that this:

> @@ -353,7 +352,7 @@ static int cp_new_stat(struct kstat *sta
>  #endif
>
>         INIT_STRUCT_STAT_PADDING(tmp);
> -       tmp.st_dev = encode_dev(stat->dev);
> +       tmp.st_dev = new_encode_dev(stat->dev);

completely changes the format of that st_dev field.

For completely insane historical reasons, we have had the rule that

 - 32-bit architectures encode the device into a 16 bit value

 - 64-bit architectures encode the device number into a 32 bit value

and that has been true *despite* the fact that the actual "st_dev"
field has been 32-bit and 64-bit respectively since 2003!

And it doesn't help that to confuse things even more, the _naming_ of
those "encode_dev" functions is "old and new", so that logically you'd
think that "cp_new_stat()" would use "new_encode_dev()". Nope.

So on 32-bit architectures, cp_new_stat() uses "old_encode_dev()",
which historically put the minor number in bits 0..7, and the major
number in bits 8..15.

End result: on a 32-bit system (or in the compat syscall mode),
changing to new_encode_dev() would confuse anybody (like just "ls -l
/dev") that uses that old stat call and tries to print out major/minor
numbers.

Now,. the good news is that

 (a) nobody should use that old stat call, since the new world order
is called "stat64" and has been for a loooong time - also since at
least 2003)

 (b) we could just hide the bits in upper bits instead.

So what I suggest we do is to make old_encode_dev() put the minor bits
in bits 0..7 _and_ 16..23, and the major bits in 8..15 _and_ 24..32.

And then the -EOVERFLOW should be something like

        unsigned int st_dev = encode_dev(stat->dev);
        tmp.st_dev = st_dev;
        if (st_dev != tmp.st_dev)
                return -EOVERFLOW;

for the lcase that tmp.st_dev is actually 16-bit (ie the compat case
for some architecture where the padding wasn't there?)

NOTE: That will still screw up 'ls -l' output, but only for the
devices that previously would have returned -EOVERFLOW.

And it will make anybopdy who does that "stat1->st_dev ==
stat2->st_dev && ino == ino2" thing for testing "same inode" work just
fine.

              Linus
Re: [PATCH] stat: fix inconsistency between struct stat and struct compat_stat
Posted by Mikulas Patocka 4 years ago

On Tue, 12 Apr 2022, Linus Torvalds wrote:

> On Mon, Apr 11, 2022 at 11:41 PM Mikulas Patocka <mpatocka@redhat.com> wrote:
> >
> > Also, if the st_dev and st_rdev values are 32-bit, we don't have to use
> > old_valid_dev to test if the value fits into them. This fixes -EOVERFLOW
> > on filesystems that are on NVMe because NVMe uses the major number 259.
> 
> The problem with this part of the patch is that this:
> 
> > @@ -353,7 +352,7 @@ static int cp_new_stat(struct kstat *sta
> >  #endif
> >
> >         INIT_STRUCT_STAT_PADDING(tmp);
> > -       tmp.st_dev = encode_dev(stat->dev);
> > +       tmp.st_dev = new_encode_dev(stat->dev);
> 
> completely changes the format of that st_dev field.

we have these definitions:

static __always_inline u16 old_encode_dev(dev_t dev)
{
        return (MAJOR(dev) << 8) | MINOR(dev);
}

static __always_inline u32 new_encode_dev(dev_t dev)
{
        unsigned major = MAJOR(dev);
        unsigned minor = MINOR(dev);
        return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12);
}

As long as both major and minor numbers are less than 256, these functions 
return equivalent results. So, I think it's safe to replace old_encode_dev 
with new_encode_dev.

old_encode_dev shouldn't be called with minor >= 256, because it blends 
the upper minor bits into the major field - the kernel doesn't do this and 
checks the value with old_valid_dev before calling old_encode_dev. But 
when old_valid_dev returns true, it doesn't matter if you use 
old_encode_dev or new_encode_dev - both give equivalent results.

When I tested it, both gcc and openwatcom return st_dev 0x10301, which is 
the expected value (the NVMe device has major 259 and minor 1).

>  (b) we could just hide the bits in upper bits instead.
> 
> So what I suggest we do is to make old_encode_dev() put the minor bits
> in bits 0..7 _and_ 16..23, and the major bits in 8..15 _and_ 24..32.

new_encode_dev puts the minor value into bits 0..7, 20..31 and the major 
value into bits 8..19

So, we can use this instead of inventing a new format.

Mikulas
Re: [PATCH] stat: fix inconsistency between struct stat and struct compat_stat
Posted by Linus Torvalds 4 years ago
On Tue, Apr 12, 2022 at 7:42 AM Mikulas Patocka <mpatocka@redhat.com> wrote:
>
> As long as both major and minor numbers are less than 256, these functions
> return equivalent results. So, I think it's safe to replace old_encode_dev
> with new_encode_dev.

You are of course 100% right, and I should have looked more closely at
the code rather than going by my (broken) assumptions based on old
memory of what we did when we did that "new" stat expansion.

I take back all my objections that were completely bogus.

             Linus