fs/ext4/inode.c | 68 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 20 deletions(-)
A file handle that userspace provides to open_by_handle_at() can
legitimately contain an outdated inode number that has since been reused
for another purpose - that's why the file handle also contains a generation
number.
But if the inode number has been reused for an ea_inode, check_igot_inode()
will notice, __ext4_iget() will go through ext4_error_inode(), and if the
inode was newly created, it will also be marked as bad by iget_failed().
This all happens before the point where the inode generation is checked.
ext4_error_inode() is supposed to only be used on filesystem corruption; it
should not be used when userspace just got unlucky with a stale file
handle. So when this happens, let __ext4_iget() just return an error.
Fixes: b3e6bcb94590 ("ext4: add EA_INODE checking to ext4_iget()")
Signed-off-by: Jann Horn <jannh@google.com>
---
I'm sending this as an RFC patch because the patch I came up with is pretty
ugly; I think it would be ideal if someone else comes up with a nicer
patch that can be used instead of this one.
I'm also not sure whether it actually matters if we call iget_failed() when this happens with a new inode.
The following testcase demonstrates this issue, and shows that a filesystem
mounted with errors=remount-ro is remounted to read-only when hitting it.
Run this as root.
```
#define _GNU_SOURCE
#include <err.h>
#include <stdarg.h>
#include <stdio.h>
#include <sched.h>
#include <stddef.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <sys/mount.h>
#include <sys/mman.h>
#include <sys/xattr.h>
#include <sys/stat.h>
#define SYSCHK(x) ({ \
typeof(x) __res = (x); \
if (__res == (typeof(x))-1) \
err(1, "SYSCHK(" #x ")"); \
__res; \
})
static int systemf(const char *cmd, ...) {
char *full_cmd;
va_list ap;
va_start(ap, cmd);
if (vasprintf(&full_cmd, cmd, ap) == -1)
err(1, "vasprintf");
int res = system(full_cmd);
free(full_cmd);
return res;
}
int main(void) {
// avoid messing with the main mount hierarchy
SYSCHK(unshare(CLONE_NEWNS));
SYSCHK(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL));
// create and mount new ext4 fs
int fs_fd = SYSCHK(memfd_create("ext4-image", 0));
SYSCHK(ftruncate(fs_fd, 1024*1024));
if (systemf("mkfs.ext4 -O ea_inode /proc/self/fd/%d", fs_fd))
errx(1, "mkfs failed");
if (systemf("mount -o errors=remount-ro -t ext4 /proc/self/fd/%d /mnt", fs_fd))
errx(1, "mount failed");
// create file with inode xattr
char xattr_body[8192];
memset(xattr_body, 'A', sizeof(xattr_body));
int file_fd = SYSCHK(open("/mnt/file", O_RDWR|O_CREAT, 0600));
SYSCHK(fsetxattr(file_fd, "user.foo", xattr_body, sizeof(xattr_body), XATTR_CREATE));
struct stat st;
SYSCHK(fstat(file_fd, &st));
// trigger bug: do fhandle lookup on inode of file plus one (which will be
// the xattr inode)
struct handle {
unsigned int handle_bytes;
unsigned int handle_type;
unsigned int ino, gen, parent_ino, parent_gen;
} handle = {
.handle_bytes = sizeof(handle) - offsetof(struct handle, ino),
.handle_type = 1/*FILEID_INO32_GEN*/,
.ino = st.st_ino+1,
.gen = 0
};
// this should fail
SYSCHK(open_by_handle_at(file_fd, (void*)&handle, O_RDONLY));
}
```
resulting dmesg:
```
EXT4-fs (loop0): mounted filesystem 13b7e98f-901a-41a4-ba59-4cc58d597798 r/w without journal. Quota mode: none.
EXT4-fs error (device loop0): ext4_nfs_get_inode:1545: inode #13: comm ext4-ea-inode-f: unexpected EA_INODE flag
EXT4-fs (loop0): Remounting filesystem read-only
EXT4-fs (loop0): unmounting filesystem 13b7e98f-901a-41a4-ba59-4cc58d597798.
```
---
fs/ext4/inode.c | 68 ++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 48 insertions(+), 20 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 89aade6f45f62d9fd6300ef84c118c6b919cddc9..8a8cc29b211c875a1d22b943004dc3f10b9c4d79 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4705,22 +4705,43 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val)
inode_set_iversion_queried(inode, val);
}
-static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags)
-
+static int check_igot_inode(struct inode *inode, ext4_iget_flags flags,
+ const char *function, unsigned int line)
{
+ const char *err_str;
+
if (flags & EXT4_IGET_EA_INODE) {
- if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
- return "missing EA_INODE flag";
+ if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+ err_str = "missing EA_INODE flag";
+ goto error;
+ }
if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
- EXT4_I(inode)->i_file_acl)
- return "ea_inode with extended attributes";
+ EXT4_I(inode)->i_file_acl) {
+ err_str = "ea_inode with extended attributes";
+ goto error;
+ }
} else {
- if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
- return "unexpected EA_INODE flag";
+ if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+ /*
+ * open_by_handle_at() could provide an old inode number
+ * that has since been reused for an ea_inode; this does
+ * not indicate filesystem corruption
+ */
+ if (flags & EXT4_IGET_HANDLE)
+ return -ESTALE;
+ err_str = "unexpected EA_INODE flag";
+ goto error;
+ }
+ }
+ if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) {
+ err_str = "unexpected bad inode w/o EXT4_IGET_BAD";
+ goto error;
}
- if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD))
- return "unexpected bad inode w/o EXT4_IGET_BAD";
- return NULL;
+ return 0;
+
+error:
+ ext4_error_inode(inode, function, line, 0, err_str);
+ return -EFSCORRUPTED;
}
struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
@@ -4732,7 +4753,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
struct ext4_inode_info *ei;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
struct inode *inode;
- const char *err_str;
journal_t *journal = EXT4_SB(sb)->s_journal;
long ret;
loff_t size;
@@ -4761,10 +4781,10 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
if (!inode)
return ERR_PTR(-ENOMEM);
if (!(inode->i_state & I_NEW)) {
- if ((err_str = check_igot_inode(inode, flags)) != NULL) {
- ext4_error_inode(inode, function, line, 0, err_str);
+ ret = check_igot_inode(inode, flags, function, line);
+ if (ret) {
iput(inode);
- return ERR_PTR(-EFSCORRUPTED);
+ return ERR_PTR(ret);
}
return inode;
}
@@ -5036,13 +5056,21 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ret = -EFSCORRUPTED;
goto bad_inode;
}
- if ((err_str = check_igot_inode(inode, flags)) != NULL) {
- ext4_error_inode(inode, function, line, 0, err_str);
- ret = -EFSCORRUPTED;
- goto bad_inode;
+ ret = check_igot_inode(inode, flags, function, line);
+ /*
+ * -ESTALE here means there is nothing inherently wrong with the inode,
+ * it's just not an inode we can return for an fhandle lookup.
+ */
+ if (ret == -ESTALE) {
+ brelse(iloc.bh);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(-ESTALE);
}
-
+ if (ret)
+ goto bad_inode;
brelse(iloc.bh);
+
unlock_new_inode(inode);
return inode;
---
base-commit: b86545e02e8c22fb89218f29d381fa8e8b91d815
change-id: 20241129-ext4-ignore-ea-fhandle-743d3723c5e9
--
Jann Horn <jannh@google.com>
Hi Ted! was this fix missed? On Fri 29-11-24 21:20:53, Jann Horn wrote: > A file handle that userspace provides to open_by_handle_at() can > legitimately contain an outdated inode number that has since been reused > for another purpose - that's why the file handle also contains a generation > number. > > But if the inode number has been reused for an ea_inode, check_igot_inode() > will notice, __ext4_iget() will go through ext4_error_inode(), and if the > inode was newly created, it will also be marked as bad by iget_failed(). > This all happens before the point where the inode generation is checked. > > ext4_error_inode() is supposed to only be used on filesystem corruption; it > should not be used when userspace just got unlucky with a stale file > handle. So when this happens, let __ext4_iget() just return an error. > > Fixes: b3e6bcb94590 ("ext4: add EA_INODE checking to ext4_iget()") > Signed-off-by: Jann Horn <jannh@google.com> The patch looks good to me. Feel free to add: Reviewed-by: Jan Kara <jack@suse.cz> Honza > --- > I'm sending this as an RFC patch because the patch I came up with is > pretty ugly; I think it would be ideal if someone else comes up with a > nicer patch that can be used instead of this one. I'm also not sure > whether it actually matters if we call iget_failed() when this happens > with a new inode. > > The following testcase demonstrates this issue, and shows that a filesystem > mounted with errors=remount-ro is remounted to read-only when hitting it. > Run this as root. > > ``` > #define _GNU_SOURCE > #include <err.h> > #include <stdarg.h> > #include <stdio.h> > #include <sched.h> > #include <stddef.h> > #include <unistd.h> > #include <fcntl.h> > #include <string.h> > #include <stdlib.h> > #include <sys/mount.h> > #include <sys/mman.h> > #include <sys/xattr.h> > #include <sys/stat.h> > > #define SYSCHK(x) ({ \ > typeof(x) __res = (x); \ > if (__res == (typeof(x))-1) \ > err(1, "SYSCHK(" #x ")"); \ > __res; \ > }) > > static int systemf(const char *cmd, ...) { > char *full_cmd; > va_list ap; > va_start(ap, cmd); > if (vasprintf(&full_cmd, cmd, ap) == -1) > err(1, "vasprintf"); > int res = system(full_cmd); > free(full_cmd); > return res; > } > > int main(void) { > // avoid messing with the main mount hierarchy > SYSCHK(unshare(CLONE_NEWNS)); > SYSCHK(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL)); > > // create and mount new ext4 fs > int fs_fd = SYSCHK(memfd_create("ext4-image", 0)); > SYSCHK(ftruncate(fs_fd, 1024*1024)); > if (systemf("mkfs.ext4 -O ea_inode /proc/self/fd/%d", fs_fd)) > errx(1, "mkfs failed"); > if (systemf("mount -o errors=remount-ro -t ext4 /proc/self/fd/%d /mnt", fs_fd)) > errx(1, "mount failed"); > > // create file with inode xattr > char xattr_body[8192]; > memset(xattr_body, 'A', sizeof(xattr_body)); > int file_fd = SYSCHK(open("/mnt/file", O_RDWR|O_CREAT, 0600)); > SYSCHK(fsetxattr(file_fd, "user.foo", xattr_body, sizeof(xattr_body), XATTR_CREATE)); > struct stat st; > SYSCHK(fstat(file_fd, &st)); > > // trigger bug: do fhandle lookup on inode of file plus one (which will be > // the xattr inode) > struct handle { > unsigned int handle_bytes; > unsigned int handle_type; > unsigned int ino, gen, parent_ino, parent_gen; > } handle = { > .handle_bytes = sizeof(handle) - offsetof(struct handle, ino), > .handle_type = 1/*FILEID_INO32_GEN*/, > .ino = st.st_ino+1, > .gen = 0 > }; > // this should fail > SYSCHK(open_by_handle_at(file_fd, (void*)&handle, O_RDONLY)); > } > ``` > > resulting dmesg: > ``` > EXT4-fs (loop0): mounted filesystem 13b7e98f-901a-41a4-ba59-4cc58d597798 r/w without journal. Quota mode: none. > EXT4-fs error (device loop0): ext4_nfs_get_inode:1545: inode #13: comm ext4-ea-inode-f: unexpected EA_INODE flag > EXT4-fs (loop0): Remounting filesystem read-only > EXT4-fs (loop0): unmounting filesystem 13b7e98f-901a-41a4-ba59-4cc58d597798. > ``` > --- > fs/ext4/inode.c | 68 ++++++++++++++++++++++++++++++++++++++++----------------- > 1 file changed, 48 insertions(+), 20 deletions(-) > > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 89aade6f45f62d9fd6300ef84c118c6b919cddc9..8a8cc29b211c875a1d22b943004dc3f10b9c4d79 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -4705,22 +4705,43 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val) > inode_set_iversion_queried(inode, val); > } > > -static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags) > - > +static int check_igot_inode(struct inode *inode, ext4_iget_flags flags, > + const char *function, unsigned int line) > { > + const char *err_str; > + > if (flags & EXT4_IGET_EA_INODE) { > - if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) > - return "missing EA_INODE flag"; > + if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { > + err_str = "missing EA_INODE flag"; > + goto error; > + } > if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) || > - EXT4_I(inode)->i_file_acl) > - return "ea_inode with extended attributes"; > + EXT4_I(inode)->i_file_acl) { > + err_str = "ea_inode with extended attributes"; > + goto error; > + } > } else { > - if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) > - return "unexpected EA_INODE flag"; > + if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { > + /* > + * open_by_handle_at() could provide an old inode number > + * that has since been reused for an ea_inode; this does > + * not indicate filesystem corruption > + */ > + if (flags & EXT4_IGET_HANDLE) > + return -ESTALE; > + err_str = "unexpected EA_INODE flag"; > + goto error; > + } > + } > + if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) { > + err_str = "unexpected bad inode w/o EXT4_IGET_BAD"; > + goto error; > } > - if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) > - return "unexpected bad inode w/o EXT4_IGET_BAD"; > - return NULL; > + return 0; > + > +error: > + ext4_error_inode(inode, function, line, 0, err_str); > + return -EFSCORRUPTED; > } > > struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, > @@ -4732,7 +4753,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, > struct ext4_inode_info *ei; > struct ext4_super_block *es = EXT4_SB(sb)->s_es; > struct inode *inode; > - const char *err_str; > journal_t *journal = EXT4_SB(sb)->s_journal; > long ret; > loff_t size; > @@ -4761,10 +4781,10 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, > if (!inode) > return ERR_PTR(-ENOMEM); > if (!(inode->i_state & I_NEW)) { > - if ((err_str = check_igot_inode(inode, flags)) != NULL) { > - ext4_error_inode(inode, function, line, 0, err_str); > + ret = check_igot_inode(inode, flags, function, line); > + if (ret) { > iput(inode); > - return ERR_PTR(-EFSCORRUPTED); > + return ERR_PTR(ret); > } > return inode; > } > @@ -5036,13 +5056,21 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, > ret = -EFSCORRUPTED; > goto bad_inode; > } > - if ((err_str = check_igot_inode(inode, flags)) != NULL) { > - ext4_error_inode(inode, function, line, 0, err_str); > - ret = -EFSCORRUPTED; > - goto bad_inode; > + ret = check_igot_inode(inode, flags, function, line); > + /* > + * -ESTALE here means there is nothing inherently wrong with the inode, > + * it's just not an inode we can return for an fhandle lookup. > + */ > + if (ret == -ESTALE) { > + brelse(iloc.bh); > + unlock_new_inode(inode); > + iput(inode); > + return ERR_PTR(-ESTALE); > } > - > + if (ret) > + goto bad_inode; > brelse(iloc.bh); > + > unlock_new_inode(inode); > return inode; > > > --- > base-commit: b86545e02e8c22fb89218f29d381fa8e8b91d815 > change-id: 20241129-ext4-ignore-ea-fhandle-743d3723c5e9 > > -- > Jann Horn <jannh@google.com> > > -- Jan Kara <jack@suse.com> SUSE Labs, CR
© 2016 - 2025 Red Hat, Inc.