[PATCH RFC] ext4: don't treat fhandle lookup of ea_inode as FS corruption

Jann Horn posted 1 patch 5 months, 2 weeks ago
fs/ext4/inode.c | 68 ++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 48 insertions(+), 20 deletions(-)
[PATCH RFC] ext4: don't treat fhandle lookup of ea_inode as FS corruption
Posted by Jann Horn 5 months, 2 weeks ago
A file handle that userspace provides to open_by_handle_at() can
legitimately contain an outdated inode number that has since been reused
for another purpose - that's why the file handle also contains a generation
number.

But if the inode number has been reused for an ea_inode, check_igot_inode()
will notice, __ext4_iget() will go through ext4_error_inode(), and if the
inode was newly created, it will also be marked as bad by iget_failed().
This all happens before the point where the inode generation is checked.

ext4_error_inode() is supposed to only be used on filesystem corruption; it
should not be used when userspace just got unlucky with a stale file
handle. So when this happens, let __ext4_iget() just return an error.

Fixes: b3e6bcb94590 ("ext4: add EA_INODE checking to ext4_iget()")
Signed-off-by: Jann Horn <jannh@google.com>
---
I'm sending this as an RFC patch because the patch I came up with is pretty
ugly; I think it would be ideal if someone else comes up with a nicer
patch that can be used instead of this one.
I'm also not sure whether it actually matters if we call iget_failed() when this happens with a new inode.

The following testcase demonstrates this issue, and shows that a filesystem
mounted with errors=remount-ro is remounted to read-only when hitting it.
Run this as root.

```
#define _GNU_SOURCE
#include <err.h>
#include <stdarg.h>
#include <stdio.h>
#include <sched.h>
#include <stddef.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <sys/mount.h>
#include <sys/mman.h>
#include <sys/xattr.h>
#include <sys/stat.h>

#define SYSCHK(x) ({          \
  typeof(x) __res = (x);      \
  if (__res == (typeof(x))-1) \
    err(1, "SYSCHK(" #x ")"); \
  __res;                      \
})

static int systemf(const char *cmd, ...) {
  char *full_cmd;
  va_list ap;
  va_start(ap, cmd);
  if (vasprintf(&full_cmd, cmd, ap) == -1)
    err(1, "vasprintf");
  int res = system(full_cmd);
  free(full_cmd);
  return res;
}

int main(void) {
  // avoid messing with the main mount hierarchy
  SYSCHK(unshare(CLONE_NEWNS));
  SYSCHK(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL));

  // create and mount new ext4 fs
  int fs_fd = SYSCHK(memfd_create("ext4-image", 0));
  SYSCHK(ftruncate(fs_fd, 1024*1024));
  if (systemf("mkfs.ext4 -O ea_inode /proc/self/fd/%d", fs_fd))
    errx(1, "mkfs failed");
  if (systemf("mount -o errors=remount-ro -t ext4 /proc/self/fd/%d /mnt", fs_fd))
    errx(1, "mount failed");

  // create file with inode xattr
  char xattr_body[8192];
  memset(xattr_body, 'A', sizeof(xattr_body));
  int file_fd = SYSCHK(open("/mnt/file", O_RDWR|O_CREAT, 0600));
  SYSCHK(fsetxattr(file_fd, "user.foo", xattr_body, sizeof(xattr_body), XATTR_CREATE));
  struct stat st;
  SYSCHK(fstat(file_fd, &st));

  // trigger bug: do fhandle lookup on inode of file plus one (which will be
  // the xattr inode)
  struct handle {
    unsigned int handle_bytes;
    unsigned int handle_type;
    unsigned int ino, gen, parent_ino, parent_gen;
  } handle = {
    .handle_bytes = sizeof(handle) - offsetof(struct handle, ino),
    .handle_type = 1/*FILEID_INO32_GEN*/,
    .ino = st.st_ino+1,
    .gen = 0
  };
  // this should fail
  SYSCHK(open_by_handle_at(file_fd, (void*)&handle, O_RDONLY));
}
```

resulting dmesg:
```
EXT4-fs (loop0): mounted filesystem 13b7e98f-901a-41a4-ba59-4cc58d597798 r/w without journal. Quota mode: none.
EXT4-fs error (device loop0): ext4_nfs_get_inode:1545: inode #13: comm ext4-ea-inode-f: unexpected EA_INODE flag
EXT4-fs (loop0): Remounting filesystem read-only
EXT4-fs (loop0): unmounting filesystem 13b7e98f-901a-41a4-ba59-4cc58d597798.
```
---
 fs/ext4/inode.c | 68 ++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 89aade6f45f62d9fd6300ef84c118c6b919cddc9..8a8cc29b211c875a1d22b943004dc3f10b9c4d79 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4705,22 +4705,43 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val)
 		inode_set_iversion_queried(inode, val);
 }
 
-static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags)
-
+static int check_igot_inode(struct inode *inode, ext4_iget_flags flags,
+			    const char *function, unsigned int line)
 {
+	const char *err_str;
+
 	if (flags & EXT4_IGET_EA_INODE) {
-		if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
-			return "missing EA_INODE flag";
+		if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+			err_str = "missing EA_INODE flag";
+			goto error;
+		}
 		if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
-		    EXT4_I(inode)->i_file_acl)
-			return "ea_inode with extended attributes";
+		    EXT4_I(inode)->i_file_acl) {
+			err_str = "ea_inode with extended attributes";
+			goto error;
+		}
 	} else {
-		if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
-			return "unexpected EA_INODE flag";
+		if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+			/*
+			 * open_by_handle_at() could provide an old inode number
+			 * that has since been reused for an ea_inode; this does
+			 * not indicate filesystem corruption
+			 */
+			if (flags & EXT4_IGET_HANDLE)
+				return -ESTALE;
+			err_str = "unexpected EA_INODE flag";
+			goto error;
+		}
+	}
+	if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) {
+		err_str = "unexpected bad inode w/o EXT4_IGET_BAD";
+		goto error;
 	}
-	if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD))
-		return "unexpected bad inode w/o EXT4_IGET_BAD";
-	return NULL;
+	return 0;
+
+error:
+	ext4_error_inode(inode, function, line, 0, err_str);
+	return -EFSCORRUPTED;
 }
 
 struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
@@ -4732,7 +4753,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
 	struct ext4_inode_info *ei;
 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 	struct inode *inode;
-	const char *err_str;
 	journal_t *journal = EXT4_SB(sb)->s_journal;
 	long ret;
 	loff_t size;
@@ -4761,10 +4781,10 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 	if (!(inode->i_state & I_NEW)) {
-		if ((err_str = check_igot_inode(inode, flags)) != NULL) {
-			ext4_error_inode(inode, function, line, 0, err_str);
+		ret = check_igot_inode(inode, flags, function, line);
+		if (ret) {
 			iput(inode);
-			return ERR_PTR(-EFSCORRUPTED);
+			return ERR_PTR(ret);
 		}
 		return inode;
 	}
@@ -5036,13 +5056,21 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
 		ret = -EFSCORRUPTED;
 		goto bad_inode;
 	}
-	if ((err_str = check_igot_inode(inode, flags)) != NULL) {
-		ext4_error_inode(inode, function, line, 0, err_str);
-		ret = -EFSCORRUPTED;
-		goto bad_inode;
+	ret = check_igot_inode(inode, flags, function, line);
+	/*
+	 * -ESTALE here means there is nothing inherently wrong with the inode,
+	 * it's just not an inode we can return for an fhandle lookup.
+	 */
+	if (ret == -ESTALE) {
+		brelse(iloc.bh);
+		unlock_new_inode(inode);
+		iput(inode);
+		return ERR_PTR(-ESTALE);
 	}
-
+	if (ret)
+		goto bad_inode;
 	brelse(iloc.bh);
+
 	unlock_new_inode(inode);
 	return inode;
 

---
base-commit: b86545e02e8c22fb89218f29d381fa8e8b91d815
change-id: 20241129-ext4-ignore-ea-fhandle-743d3723c5e9

-- 
Jann Horn <jannh@google.com>
Re: [PATCH RFC] ext4: don't treat fhandle lookup of ea_inode as FS corruption
Posted by Jan Kara 1 month, 3 weeks ago
Hi Ted!

was this fix missed?

On Fri 29-11-24 21:20:53, Jann Horn wrote:
> A file handle that userspace provides to open_by_handle_at() can
> legitimately contain an outdated inode number that has since been reused
> for another purpose - that's why the file handle also contains a generation
> number.
> 
> But if the inode number has been reused for an ea_inode, check_igot_inode()
> will notice, __ext4_iget() will go through ext4_error_inode(), and if the
> inode was newly created, it will also be marked as bad by iget_failed().
> This all happens before the point where the inode generation is checked.
> 
> ext4_error_inode() is supposed to only be used on filesystem corruption; it
> should not be used when userspace just got unlucky with a stale file
> handle. So when this happens, let __ext4_iget() just return an error.
> 
> Fixes: b3e6bcb94590 ("ext4: add EA_INODE checking to ext4_iget()")
> Signed-off-by: Jann Horn <jannh@google.com>

The patch looks good to me. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
> I'm sending this as an RFC patch because the patch I came up with is
> pretty ugly; I think it would be ideal if someone else comes up with a
> nicer patch that can be used instead of this one.  I'm also not sure
> whether it actually matters if we call iget_failed() when this happens
> with a new inode.
> 
> The following testcase demonstrates this issue, and shows that a filesystem
> mounted with errors=remount-ro is remounted to read-only when hitting it.
> Run this as root.
> 
> ```
> #define _GNU_SOURCE
> #include <err.h>
> #include <stdarg.h>
> #include <stdio.h>
> #include <sched.h>
> #include <stddef.h>
> #include <unistd.h>
> #include <fcntl.h>
> #include <string.h>
> #include <stdlib.h>
> #include <sys/mount.h>
> #include <sys/mman.h>
> #include <sys/xattr.h>
> #include <sys/stat.h>
> 
> #define SYSCHK(x) ({          \
>   typeof(x) __res = (x);      \
>   if (__res == (typeof(x))-1) \
>     err(1, "SYSCHK(" #x ")"); \
>   __res;                      \
> })
> 
> static int systemf(const char *cmd, ...) {
>   char *full_cmd;
>   va_list ap;
>   va_start(ap, cmd);
>   if (vasprintf(&full_cmd, cmd, ap) == -1)
>     err(1, "vasprintf");
>   int res = system(full_cmd);
>   free(full_cmd);
>   return res;
> }
> 
> int main(void) {
>   // avoid messing with the main mount hierarchy
>   SYSCHK(unshare(CLONE_NEWNS));
>   SYSCHK(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL));
> 
>   // create and mount new ext4 fs
>   int fs_fd = SYSCHK(memfd_create("ext4-image", 0));
>   SYSCHK(ftruncate(fs_fd, 1024*1024));
>   if (systemf("mkfs.ext4 -O ea_inode /proc/self/fd/%d", fs_fd))
>     errx(1, "mkfs failed");
>   if (systemf("mount -o errors=remount-ro -t ext4 /proc/self/fd/%d /mnt", fs_fd))
>     errx(1, "mount failed");
> 
>   // create file with inode xattr
>   char xattr_body[8192];
>   memset(xattr_body, 'A', sizeof(xattr_body));
>   int file_fd = SYSCHK(open("/mnt/file", O_RDWR|O_CREAT, 0600));
>   SYSCHK(fsetxattr(file_fd, "user.foo", xattr_body, sizeof(xattr_body), XATTR_CREATE));
>   struct stat st;
>   SYSCHK(fstat(file_fd, &st));
> 
>   // trigger bug: do fhandle lookup on inode of file plus one (which will be
>   // the xattr inode)
>   struct handle {
>     unsigned int handle_bytes;
>     unsigned int handle_type;
>     unsigned int ino, gen, parent_ino, parent_gen;
>   } handle = {
>     .handle_bytes = sizeof(handle) - offsetof(struct handle, ino),
>     .handle_type = 1/*FILEID_INO32_GEN*/,
>     .ino = st.st_ino+1,
>     .gen = 0
>   };
>   // this should fail
>   SYSCHK(open_by_handle_at(file_fd, (void*)&handle, O_RDONLY));
> }
> ```
> 
> resulting dmesg:
> ```
> EXT4-fs (loop0): mounted filesystem 13b7e98f-901a-41a4-ba59-4cc58d597798 r/w without journal. Quota mode: none.
> EXT4-fs error (device loop0): ext4_nfs_get_inode:1545: inode #13: comm ext4-ea-inode-f: unexpected EA_INODE flag
> EXT4-fs (loop0): Remounting filesystem read-only
> EXT4-fs (loop0): unmounting filesystem 13b7e98f-901a-41a4-ba59-4cc58d597798.
> ```
> ---
>  fs/ext4/inode.c | 68 ++++++++++++++++++++++++++++++++++++++++-----------------
>  1 file changed, 48 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 89aade6f45f62d9fd6300ef84c118c6b919cddc9..8a8cc29b211c875a1d22b943004dc3f10b9c4d79 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4705,22 +4705,43 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val)
>  		inode_set_iversion_queried(inode, val);
>  }
>  
> -static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags)
> -
> +static int check_igot_inode(struct inode *inode, ext4_iget_flags flags,
> +			    const char *function, unsigned int line)
>  {
> +	const char *err_str;
> +
>  	if (flags & EXT4_IGET_EA_INODE) {
> -		if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
> -			return "missing EA_INODE flag";
> +		if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
> +			err_str = "missing EA_INODE flag";
> +			goto error;
> +		}
>  		if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
> -		    EXT4_I(inode)->i_file_acl)
> -			return "ea_inode with extended attributes";
> +		    EXT4_I(inode)->i_file_acl) {
> +			err_str = "ea_inode with extended attributes";
> +			goto error;
> +		}
>  	} else {
> -		if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
> -			return "unexpected EA_INODE flag";
> +		if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
> +			/*
> +			 * open_by_handle_at() could provide an old inode number
> +			 * that has since been reused for an ea_inode; this does
> +			 * not indicate filesystem corruption
> +			 */
> +			if (flags & EXT4_IGET_HANDLE)
> +				return -ESTALE;
> +			err_str = "unexpected EA_INODE flag";
> +			goto error;
> +		}
> +	}
> +	if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) {
> +		err_str = "unexpected bad inode w/o EXT4_IGET_BAD";
> +		goto error;
>  	}
> -	if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD))
> -		return "unexpected bad inode w/o EXT4_IGET_BAD";
> -	return NULL;
> +	return 0;
> +
> +error:
> +	ext4_error_inode(inode, function, line, 0, err_str);
> +	return -EFSCORRUPTED;
>  }
>  
>  struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
> @@ -4732,7 +4753,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
>  	struct ext4_inode_info *ei;
>  	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
>  	struct inode *inode;
> -	const char *err_str;
>  	journal_t *journal = EXT4_SB(sb)->s_journal;
>  	long ret;
>  	loff_t size;
> @@ -4761,10 +4781,10 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
>  	if (!inode)
>  		return ERR_PTR(-ENOMEM);
>  	if (!(inode->i_state & I_NEW)) {
> -		if ((err_str = check_igot_inode(inode, flags)) != NULL) {
> -			ext4_error_inode(inode, function, line, 0, err_str);
> +		ret = check_igot_inode(inode, flags, function, line);
> +		if (ret) {
>  			iput(inode);
> -			return ERR_PTR(-EFSCORRUPTED);
> +			return ERR_PTR(ret);
>  		}
>  		return inode;
>  	}
> @@ -5036,13 +5056,21 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
>  		ret = -EFSCORRUPTED;
>  		goto bad_inode;
>  	}
> -	if ((err_str = check_igot_inode(inode, flags)) != NULL) {
> -		ext4_error_inode(inode, function, line, 0, err_str);
> -		ret = -EFSCORRUPTED;
> -		goto bad_inode;
> +	ret = check_igot_inode(inode, flags, function, line);
> +	/*
> +	 * -ESTALE here means there is nothing inherently wrong with the inode,
> +	 * it's just not an inode we can return for an fhandle lookup.
> +	 */
> +	if (ret == -ESTALE) {
> +		brelse(iloc.bh);
> +		unlock_new_inode(inode);
> +		iput(inode);
> +		return ERR_PTR(-ESTALE);
>  	}
> -
> +	if (ret)
> +		goto bad_inode;
>  	brelse(iloc.bh);
> +
>  	unlock_new_inode(inode);
>  	return inode;
>  
> 
> ---
> base-commit: b86545e02e8c22fb89218f29d381fa8e8b91d815
> change-id: 20241129-ext4-ignore-ea-fhandle-743d3723c5e9
> 
> -- 
> Jann Horn <jannh@google.com>
> 
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR
Re: [PATCH RFC] ext4: don't treat fhandle lookup of ea_inode as FS corruption
Posted by Theodore Ts'o 1 month, 1 week ago
On Thu, Mar 27, 2025 at 12:04:38PM +0100, Jan Kara wrote:
> Hi Ted!
> 
> was this fix missed?

Yes, oops.   I've applied it now.

					- Ted