fs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+)
Notably make sure the count is 0 after the return from ->drop_inode(),
provided we are going to drop.
Inspired by suspicious games played by f2fs.
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
---
boots on ext4 without splats
fs/inode.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/fs/inode.c b/fs/inode.c
index ec9339024ac3..fa82cb810af4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1879,6 +1879,7 @@ static void iput_final(struct inode *inode)
int drop;
WARN_ON(inode->i_state & I_NEW);
+ VFS_BUG_ON_INODE(atomic_read(&inode->i_count) != 0, inode);
if (op->drop_inode)
drop = op->drop_inode(inode);
@@ -1893,6 +1894,12 @@ static void iput_final(struct inode *inode)
return;
}
+ /*
+ * Re-check ->i_count in case the ->drop_inode() hooks played games.
+ * Note we only execute this if the verdict was to drop the inode.
+ */
+ VFS_BUG_ON_INODE(atomic_read(&inode->i_count) != 0, inode);
+
state = inode->i_state;
if (!drop) {
WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
--
2.34.1
On Wed 01-10-25 03:00:10, Mateusz Guzik wrote: > Notably make sure the count is 0 after the return from ->drop_inode(), > provided we are going to drop. > > Inspired by suspicious games played by f2fs. Whoo, those are indeed interesting. > Signed-off-by: Mateusz Guzik <mjguzik@gmail.com> > --- > > boots on ext4 without splats > > fs/inode.c | 7 +++++++ > 1 file changed, 7 insertions(+) > > diff --git a/fs/inode.c b/fs/inode.c > index ec9339024ac3..fa82cb810af4 100644 > --- a/fs/inode.c > +++ b/fs/inode.c > @@ -1879,6 +1879,7 @@ static void iput_final(struct inode *inode) > int drop; > > WARN_ON(inode->i_state & I_NEW); > + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) != 0, inode); This seems pointless given when iput_final() is called... > if (op->drop_inode) > drop = op->drop_inode(inode); > @@ -1893,6 +1894,12 @@ static void iput_final(struct inode *inode) > return; > } > > + /* > + * Re-check ->i_count in case the ->drop_inode() hooks played games. > + * Note we only execute this if the verdict was to drop the inode. > + */ > + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) != 0, inode); > + I'm not sure this can catch much but OK... Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR
On Wed, Oct 1, 2025 at 2:07 PM Jan Kara <jack@suse.cz> wrote: > > diff --git a/fs/inode.c b/fs/inode.c > > index ec9339024ac3..fa82cb810af4 100644 > > --- a/fs/inode.c > > +++ b/fs/inode.c > > @@ -1879,6 +1879,7 @@ static void iput_final(struct inode *inode) > > int drop; > > > > WARN_ON(inode->i_state & I_NEW); > > + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) != 0, inode); > > This seems pointless given when iput_final() is called... > This and the other check explicitly "wrap" the ->drop_inode call. > > if (op->drop_inode) > > drop = op->drop_inode(inode); > > @@ -1893,6 +1894,12 @@ static void iput_final(struct inode *inode) > > return; > > } > > > > + /* > > + * Re-check ->i_count in case the ->drop_inode() hooks played games. > > + * Note we only execute this if the verdict was to drop the inode. > > + */ > > + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) != 0, inode); > > + > > I'm not sure this can catch much but OK... > It can catch drop routines which bumped the ref but did not release it, or which indicated to continue with drop while someone else snatched the reference. Preferaby the APIs would prevent that in the first place, but there is quite a bit of shit-shoveling before that happens.
On Wed 01-10-25 14:12:13, Mateusz Guzik wrote: > On Wed, Oct 1, 2025 at 2:07 PM Jan Kara <jack@suse.cz> wrote: > > > diff --git a/fs/inode.c b/fs/inode.c > > > index ec9339024ac3..fa82cb810af4 100644 > > > --- a/fs/inode.c > > > +++ b/fs/inode.c > > > @@ -1879,6 +1879,7 @@ static void iput_final(struct inode *inode) > > > int drop; > > > > > > WARN_ON(inode->i_state & I_NEW); > > > + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) != 0, inode); > > > > This seems pointless given when iput_final() is called... > > > > This and the other check explicitly "wrap" the ->drop_inode call. I understand but given iput() has just decremented i_count to 0 before calling iput_final() this beginning of the "wrap" looks pretty pointless to me. > > > if (op->drop_inode) > > > drop = op->drop_inode(inode); > > > @@ -1893,6 +1894,12 @@ static void iput_final(struct inode *inode) > > > return; > > > } > > > > > > + /* > > > + * Re-check ->i_count in case the ->drop_inode() hooks played games. > > > + * Note we only execute this if the verdict was to drop the inode. > > > + */ > > > + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) != 0, inode); > > > + > > > > I'm not sure this can catch much but OK... > > > > It can catch drop routines which bumped the ref but did not release > it, or which indicated to continue with drop while someone else > snatched the reference. Right. > Preferaby the APIs would prevent that in the first place, but there is > quite a bit of shit-shoveling before that happens. Agreed. Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR
On Wed, Oct 1, 2025 at 3:08 PM Jan Kara <jack@suse.cz> wrote: > > On Wed 01-10-25 14:12:13, Mateusz Guzik wrote: > > On Wed, Oct 1, 2025 at 2:07 PM Jan Kara <jack@suse.cz> wrote: > > > > diff --git a/fs/inode.c b/fs/inode.c > > > > index ec9339024ac3..fa82cb810af4 100644 > > > > --- a/fs/inode.c > > > > +++ b/fs/inode.c > > > > @@ -1879,6 +1879,7 @@ static void iput_final(struct inode *inode) > > > > int drop; > > > > > > > > WARN_ON(inode->i_state & I_NEW); > > > > + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) != 0, inode); > > > > > > This seems pointless given when iput_final() is called... > > > > > > > This and the other check explicitly "wrap" the ->drop_inode call. > > I understand but given iput() has just decremented i_count to 0 before > calling iput_final() this beginning of the "wrap" looks pretty pointless to > me. > To my understanding you are not NAKing the patch, are merely not particularly fond of it. ;) Given that these asserts don't show up in production kernels, the layer should be moving towards always spelling out all assumptions at the entry point. Worst case does not hurt in production anyway, best case it will catch something. For iput_final specifically, at the moment there is only one consumer so this indeed may look overzealous. But for the sake of argument suppose someone noticed that dentry_unlink_inode() performs: spin_unlock(&inode->i_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) dentry->d_op->d_iput(dentry, inode); else iput(inode); ... and that with some minor rototoiling the inode lock can survive both fsnotify and custom d_iput in the common case. Should that happen, iput_locked() could be added to shave off a lock trip in the common case of whacking the inode. But then there is 2 consumers of iput_final. etc.
On Wed 01-10-25 16:28:15, Mateusz Guzik wrote: > On Wed, Oct 1, 2025 at 3:08 PM Jan Kara <jack@suse.cz> wrote: > > > > On Wed 01-10-25 14:12:13, Mateusz Guzik wrote: > > > On Wed, Oct 1, 2025 at 2:07 PM Jan Kara <jack@suse.cz> wrote: > > > > > diff --git a/fs/inode.c b/fs/inode.c > > > > > index ec9339024ac3..fa82cb810af4 100644 > > > > > --- a/fs/inode.c > > > > > +++ b/fs/inode.c > > > > > @@ -1879,6 +1879,7 @@ static void iput_final(struct inode *inode) > > > > > int drop; > > > > > > > > > > WARN_ON(inode->i_state & I_NEW); > > > > > + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) != 0, inode); > > > > > > > > This seems pointless given when iput_final() is called... > > > > > > > > > > This and the other check explicitly "wrap" the ->drop_inode call. > > > > I understand but given iput() has just decremented i_count to 0 before > > calling iput_final() this beginning of the "wrap" looks pretty pointless to > > me. > > > > To my understanding you are not NAKing the patch, are merely not > particularly fond of it. ;) Yes, it isn't annoying me enough to nak it but I couldn't resist complaining :) > Given that these asserts don't show up in production kernels, the > layer should be moving towards always spelling out all assumptions at > the entry point. Worst case does not hurt in production anyway, best > case it will catch something. Well, I think that when we get too many asserts, the code is harder to read. > For iput_final specifically, at the moment there is only one consumer > so this indeed may look overzealous. > > But for the sake of argument suppose someone noticed that > dentry_unlink_inode() performs: > spin_unlock(&inode->i_lock); > if (!inode->i_nlink) > fsnotify_inoderemove(inode); > if (dentry->d_op && dentry->d_op->d_iput) > dentry->d_op->d_iput(dentry, inode); > else > iput(inode); > > ... and that with some minor rototoiling the inode lock can survive > both fsnotify and custom d_iput in the common case. Should that > happen, iput_locked() could be added to shave off a lock trip in the > common case of whacking the inode. But then there is 2 consumers of > iput_final. etc. Right. And when we grow second iput_final() caller, I'd withdraw my complaint about pointless assert ;). Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR
© 2016 - 2025 Red Hat, Inc.