From: NeilBrown <neil@brown.name>
xfs, fuse, ipc/mqueue need variants of start_creating or start_removing
which do not check permissions.
This patch adds _noperm versions of these functions.
Note that do_mq_open() was only calling mntget() so it could call
path_put() - it didn't really need an extra reference on the mnt.
Now it doesn't call mntget() and uses end_creating() which does
the dput() half of path_put().
Also mq_unlink() previously passed
d_inode(dentry->d_parent)
as the dir inode to vfs_unlink(). This is after locking
d_inode(mnt->mnt_root)
These two inodes are the same, but normally calls use the textual
parent.
So I've changes the vfs_unlink() call to be given d_inode(mnt->mnt_root).
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neil@brown.name>
--
changes since v2:
- dir arg passed to vfs_unlink() in mq_unlink() changed to match
the dir passed to lookup_noperm()
- restore assignment to path->mnt even though the mntget() is removed.
---
fs/fuse/dir.c | 19 +++++++---------
fs/namei.c | 48 ++++++++++++++++++++++++++++++++++++++++
fs/xfs/scrub/orphanage.c | 11 ++++-----
include/linux/namei.h | 2 ++
ipc/mqueue.c | 32 ++++++++++-----------------
5 files changed, 74 insertions(+), 38 deletions(-)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 316922d5dd13..a0d5b302bcc2 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1397,27 +1397,25 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
if (!parent)
return -ENOENT;
- inode_lock_nested(parent, I_MUTEX_PARENT);
if (!S_ISDIR(parent->i_mode))
- goto unlock;
+ goto put_parent;
err = -ENOENT;
dir = d_find_alias(parent);
if (!dir)
- goto unlock;
+ goto put_parent;
- name->hash = full_name_hash(dir, name->name, name->len);
- entry = d_lookup(dir, name);
+ entry = start_removing_noperm(dir, name);
dput(dir);
- if (!entry)
- goto unlock;
+ if (IS_ERR(entry))
+ goto put_parent;
fuse_dir_changed(parent);
if (!(flags & FUSE_EXPIRE_ONLY))
d_invalidate(entry);
fuse_invalidate_entry_cache(entry);
- if (child_nodeid != 0 && d_really_is_positive(entry)) {
+ if (child_nodeid != 0) {
inode_lock(d_inode(entry));
if (get_node_id(d_inode(entry)) != child_nodeid) {
err = -ENOENT;
@@ -1445,10 +1443,9 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
} else {
err = 0;
}
- dput(entry);
- unlock:
- inode_unlock(parent);
+ end_removing(entry);
+ put_parent:
iput(parent);
return err;
}
diff --git a/fs/namei.c b/fs/namei.c
index 38dda29552f6..da01b828ede6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3275,6 +3275,54 @@ struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
}
EXPORT_SYMBOL(start_removing);
+/**
+ * start_creating_noperm - prepare to create a given name without permission checking
+ * @parent: directory in which to prepare to create the name
+ * @name: the name to be created
+ *
+ * Locks are taken and a lookup in performed prior to creating
+ * an object in a directory.
+ *
+ * If the name already exists, a positive dentry is returned.
+ *
+ * Returns: a negative or positive dentry, or an error.
+ */
+struct dentry *start_creating_noperm(struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_noperm_common(name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return start_dirop(parent, name, LOOKUP_CREATE);
+}
+EXPORT_SYMBOL(start_creating_noperm);
+
+/**
+ * start_removing_noperm - prepare to remove a given name without permission checking
+ * @parent: directory in which to find the name
+ * @name: the name to be removed
+ *
+ * Locks are taken and a lookup in performed prior to removing
+ * an object from a directory.
+ *
+ * If the name doesn't exist, an error is returned.
+ *
+ * end_removing() should be called when removal is complete, or aborted.
+ *
+ * Returns: a positive dentry, or an error.
+ */
+struct dentry *start_removing_noperm(struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_noperm_common(name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return start_dirop(parent, name, 0);
+}
+EXPORT_SYMBOL(start_removing_noperm);
+
#ifdef CONFIG_UNIX98_PTYS
int path_pts(struct path *path)
{
diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c
index 9c12cb844231..e732605924a1 100644
--- a/fs/xfs/scrub/orphanage.c
+++ b/fs/xfs/scrub/orphanage.c
@@ -152,11 +152,10 @@ xrep_orphanage_create(
}
/* Try to find the orphanage directory. */
- inode_lock_nested(root_inode, I_MUTEX_PARENT);
- orphanage_dentry = lookup_noperm(&QSTR(ORPHANAGE), root_dentry);
+ orphanage_dentry = start_creating_noperm(root_dentry, &QSTR(ORPHANAGE));
if (IS_ERR(orphanage_dentry)) {
error = PTR_ERR(orphanage_dentry);
- goto out_unlock_root;
+ goto out_dput_root;
}
/*
@@ -170,7 +169,7 @@ xrep_orphanage_create(
orphanage_dentry, 0750);
error = PTR_ERR(orphanage_dentry);
if (IS_ERR(orphanage_dentry))
- goto out_unlock_root;
+ goto out_dput_orphanage;
}
/* Not a directory? Bail out. */
@@ -200,9 +199,7 @@ xrep_orphanage_create(
sc->orphanage_ilock_flags = 0;
out_dput_orphanage:
- dput(orphanage_dentry);
-out_unlock_root:
- inode_unlock(VFS_I(sc->mp->m_rootip));
+ end_creating(orphanage_dentry, root_dentry);
out_dput_root:
dput(root_dentry);
out:
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 6d1069f93ebf..0441f5921f87 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -93,6 +93,8 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
struct qstr *name);
struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
struct qstr *name);
+struct dentry *start_creating_noperm(struct dentry *parent, struct qstr *name);
+struct dentry *start_removing_noperm(struct dentry *parent, struct qstr *name);
/**
* end_creating - finish action started with start_creating
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 093551fe66a7..6d7610310003 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -913,13 +913,12 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
goto out_putname;
ro = mnt_want_write(mnt); /* we'll drop it in any case */
- inode_lock(d_inode(root));
- path.dentry = lookup_noperm(&QSTR(name->name), root);
+ path.dentry = start_creating_noperm(root, &QSTR(name->name));
if (IS_ERR(path.dentry)) {
error = PTR_ERR(path.dentry);
goto out_putfd;
}
- path.mnt = mntget(mnt);
+ path.mnt = mnt;
error = prepare_open(path.dentry, oflag, ro, mode, name, attr);
if (!error) {
struct file *file = dentry_open(&path, oflag, current_cred());
@@ -928,13 +927,12 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
else
error = PTR_ERR(file);
}
- path_put(&path);
out_putfd:
if (error) {
put_unused_fd(fd);
fd = error;
}
- inode_unlock(d_inode(root));
+ end_creating(path.dentry, root);
if (!ro)
mnt_drop_write(mnt);
out_putname:
@@ -957,7 +955,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
int err;
struct filename *name;
struct dentry *dentry;
- struct inode *inode = NULL;
+ struct inode *inode;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
struct vfsmount *mnt = ipc_ns->mq_mnt;
@@ -969,26 +967,20 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
err = mnt_want_write(mnt);
if (err)
goto out_name;
- inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
- dentry = lookup_noperm(&QSTR(name->name), mnt->mnt_root);
+ dentry = start_removing_noperm(mnt->mnt_root, &QSTR(name->name));
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
- goto out_unlock;
+ goto out_drop_write;
}
inode = d_inode(dentry);
- if (!inode) {
- err = -ENOENT;
- } else {
- ihold(inode);
- err = vfs_unlink(&nop_mnt_idmap, d_inode(dentry->d_parent),
- dentry, NULL);
- }
- dput(dentry);
-
-out_unlock:
- inode_unlock(d_inode(mnt->mnt_root));
+ ihold(inode);
+ err = vfs_unlink(&nop_mnt_idmap, d_inode(mnt->mnt_root),
+ dentry, NULL);
+ end_removing(dentry);
iput(inode);
+
+out_drop_write:
mnt_drop_write(mnt);
out_name:
putname(name);
--
2.50.0.107.gf914562f5916.dirty
Hi, On 11/12/25 9:18 PM, NeilBrown wrote: > From: NeilBrown <neil@brown.name> > > xfs, fuse, ipc/mqueue need variants of start_creating or start_removing > which do not check permissions. > This patch adds _noperm versions of these functions. > [..] > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c > index 316922d5dd13..a0d5b302bcc2 100644 > --- a/fs/fuse/dir.c > +++ b/fs/fuse/dir.c > @@ -1397,27 +1397,25 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, > if (!parent) > return -ENOENT; > > - inode_lock_nested(parent, I_MUTEX_PARENT); > if (!S_ISDIR(parent->i_mode)) > - goto unlock; > + goto put_parent; > > err = -ENOENT; > dir = d_find_alias(parent); > if (!dir) > - goto unlock; > + goto put_parent; > > - name->hash = full_name_hash(dir, name->name, name->len); > - entry = d_lookup(dir, name); > + entry = start_removing_noperm(dir, name); > dput(dir); > - if (!entry) > - goto unlock; > + if (IS_ERR(entry)) > + goto put_parent; This broke xdg-document-portal (and potentially other FUSE filesystems) by introducing a massive deadlock. ❯ doas cat /proc/40751/stack # main thread [<0>] __fuse_simple_request+0x37c/0x5c0 [fuse] [<0>] fuse_lookup_name+0x12c/0x2a0 [fuse] [<0>] fuse_lookup+0x9c/0x1e8 [fuse] [<0>] lookup_one_qstr_excl+0xd4/0x160 [<0>] start_removing_noperm+0x5c/0x90 [<0>] fuse_reverse_inval_entry+0x64/0x1e0 [fuse] [<0>] fuse_dev_do_write+0x13a8/0x16a8 [fuse] [<0>] fuse_dev_write+0x64/0xa8 [fuse] [<0>] do_iter_readv_writev+0x170/0x1d0 [<0>] vfs_writev+0x100/0x2d0 [<0>] do_writev+0x88/0x130 d_lookup which was previously used here —from what I could understand by reading it— is cache-only and does not call into the FS's lookup at all. This new start_removing_noperm calls start_dirop which calls lookup_one_qstr_excl which according to its own comment is the "one and only case when ->lookup() gets called on non in-lookup dentries". Well, ->lookup() is the request back to the userspace FUSE server.. but the FUSE server is waiting for the write() to the FUSE device that invokes this operation to return! We cannot reenter the FUSE server from fuse_reverse_inval_entry. x-d-p issue link: https://github.com/flatpak/xdg-desktop-portal/issues/1871 Reverting the fuse/dir.c changes has fixed that for me. Thanks, ~val
On Sat, Nov 29, 2025 at 09:01:05PM -0300, Val Packett wrote: > > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c > > index 316922d5dd13..a0d5b302bcc2 100644 > > --- a/fs/fuse/dir.c > > +++ b/fs/fuse/dir.c > > @@ -1397,27 +1397,25 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, > > if (!parent) > > return -ENOENT; > > - inode_lock_nested(parent, I_MUTEX_PARENT); > > if (!S_ISDIR(parent->i_mode)) > > - goto unlock; > > + goto put_parent; > > err = -ENOENT; > > dir = d_find_alias(parent); > > if (!dir) > > - goto unlock; > > + goto put_parent; > > - name->hash = full_name_hash(dir, name->name, name->len); > > - entry = d_lookup(dir, name); > > + entry = start_removing_noperm(dir, name); > > dput(dir); > > - if (!entry) > > - goto unlock; > > + if (IS_ERR(entry)) > > + goto put_parent; > > This broke xdg-document-portal (and potentially other FUSE filesystems) by > introducing a massive deadlock. ACK. That chunk needs to be reverted - this is *not* "remove an object by parent and name", it's "invalidate stuff under that parent with this first name component" and I would like to understand what FUSE_EXPIRE_ONLY thing is about. Miklos, could you give some details on that thing? This chunk definitely needs to go, the question is what that code is trying to do other than d_invalidate()...
From: NeilBrown <neil@brown.name>
The recent conversion of fuse_reverse_inval_entry() to use
start_removing() was wrong.
As Val Packett points out the original code did not call ->lookup
while the new code does. This can lead to a deadlock.
Rather than using full_name_hash() and d_lookup() as the old code
did, we can use try_lookup_noperm() which combines these. Then
the result can be given to start_removing_dentry() to get the required
locks for removal. We then double check that the name hasn't
changed.
As 'dir' needs to be used several times now, we load the dput() until
the end, and initialise to NULL so dput() is always safe.
Reported-by: Val Packett <val@packett.cool>
Closes: https://lore.kernel.org/all/6713ea38-b583-4c86-b74a-bea55652851d@packett.cool
Fixes: c9ba789dad15 ("VFS: introduce start_creating_noperm() and start_removing_noperm()")
Signed-off-by: NeilBrown <neil@brown.name>
---
fs/fuse/dir.c | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index a0d5b302bcc2..8384fa96cf53 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1390,8 +1390,8 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
{
int err = -ENOTDIR;
struct inode *parent;
- struct dentry *dir;
- struct dentry *entry;
+ struct dentry *dir = NULL;
+ struct dentry *entry = NULL;
parent = fuse_ilookup(fc, parent_nodeid, NULL);
if (!parent)
@@ -1404,11 +1404,19 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
dir = d_find_alias(parent);
if (!dir)
goto put_parent;
-
- entry = start_removing_noperm(dir, name);
- dput(dir);
- if (IS_ERR(entry))
- goto put_parent;
+ while (!entry) {
+ struct dentry *child = try_lookup_noperm(name, dir);
+ if (!child || IS_ERR(child))
+ goto put_parent;
+ entry = start_removing_dentry(dir, child);
+ dput(child);
+ if (IS_ERR(entry))
+ goto put_parent;
+ if (!d_same_name(entry, dir, name)) {
+ end_removing(entry);
+ entry = NULL;
+ }
+ }
fuse_dir_changed(parent);
if (!(flags & FUSE_EXPIRE_ONLY))
@@ -1446,6 +1454,7 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
end_removing(entry);
put_parent:
+ dput(dir);
iput(parent);
return err;
}
--
2.50.0.107.gf914562f5916.dirty
On Sun, Nov 30, 2025 at 11:06 PM NeilBrown <neilb@ownmail.net> wrote:
>
>
> From: NeilBrown <neil@brown.name>
>
> The recent conversion of fuse_reverse_inval_entry() to use
> start_removing() was wrong.
> As Val Packett points out the original code did not call ->lookup
> while the new code does. This can lead to a deadlock.
>
> Rather than using full_name_hash() and d_lookup() as the old code
> did, we can use try_lookup_noperm() which combines these. Then
> the result can be given to start_removing_dentry() to get the required
> locks for removal. We then double check that the name hasn't
> changed.
>
> As 'dir' needs to be used several times now, we load the dput() until
> the end, and initialise to NULL so dput() is always safe.
>
> Reported-by: Val Packett <val@packett.cool>
> Closes: https://lore.kernel.org/all/6713ea38-b583-4c86-b74a-bea55652851d@packett.cool
> Fixes: c9ba789dad15 ("VFS: introduce start_creating_noperm() and start_removing_noperm()")
> Signed-off-by: NeilBrown <neil@brown.name>
> ---
> fs/fuse/dir.c | 23 ++++++++++++++++-------
> 1 file changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index a0d5b302bcc2..8384fa96cf53 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -1390,8 +1390,8 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
> {
> int err = -ENOTDIR;
> struct inode *parent;
> - struct dentry *dir;
> - struct dentry *entry;
> + struct dentry *dir = NULL;
> + struct dentry *entry = NULL;
>
> parent = fuse_ilookup(fc, parent_nodeid, NULL);
> if (!parent)
> @@ -1404,11 +1404,19 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
> dir = d_find_alias(parent);
> if (!dir)
> goto put_parent;
> -
> - entry = start_removing_noperm(dir, name);
> - dput(dir);
> - if (IS_ERR(entry))
> - goto put_parent;
> + while (!entry) {
> + struct dentry *child = try_lookup_noperm(name, dir);
> + if (!child || IS_ERR(child))
> + goto put_parent;
> + entry = start_removing_dentry(dir, child);
> + dput(child);
> + if (IS_ERR(entry))
> + goto put_parent;
> + if (!d_same_name(entry, dir, name)) {
> + end_removing(entry);
> + entry = NULL;
> + }
> + }
Can you explain why it is so important to use
start_removing_dentry() around shrink_dcache_parent()?
Is there a problem with reverting the change in this function
instead of accomodating start_removing_dentry()?
I don't think there is a point in optimizing parallel dir operations
with FUSE server cache invalidation, but maybe I am missing
something.
Thanks,
Amir.
On Mon, Dec 01, 2025 at 09:22:54AM +0100, Amir Goldstein wrote: > I don't think there is a point in optimizing parallel dir operations > with FUSE server cache invalidation, but maybe I am missing > something. The interesting part is the expected semantics of operation; d_invalidate() side definitely doesn't need any of that cruft, but I would really like to understand what that function is supposed to do. Miklos, could you post a brain dump on that?
On Mon, 1 Dec 2025 at 09:33, Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Mon, Dec 01, 2025 at 09:22:54AM +0100, Amir Goldstein wrote:
>
> > I don't think there is a point in optimizing parallel dir operations
> > with FUSE server cache invalidation, but maybe I am missing
> > something.
>
> The interesting part is the expected semantics of operation;
> d_invalidate() side definitely doesn't need any of that cruft,
> but I would really like to understand what that function
> is supposed to do.
>
> Miklos, could you post a brain dump on that?
This function is supposed to invalidate a dentry due to remote changes
(FUSE_NOTIFY_INVAL_ENTRY). Originally it was supplied a parent ID and
a name and called d_invalidate() on the looked up dentry.
Then it grew a variant (FUSE_NOTIFY_DELETE) that was also supplied a
child ID, which was matched against the looked up inode. This was
commit 451d0f599934 ("FUSE: Notifying the kernel of deletion."),
Apparently this worked around the fact that at that time
d_invalidate() returned -EBUSY if the target was still in use and
didn't unhash the dentry in that case.
That was later changed by commit bafc9b754f75 ("vfs: More precise
tests in d_invalidate") to unconditionally unhash the target, which
effectively made FUSE_NOTIFY_INVAL_ENTRY and FUSE_NOTIFY_DELETE
equivalent and the code in question unnecessary.
For the future, we could also introduce FUSE_NOTIFY_MOVE, that would
differentiate between a delete and a move, while
FUSE_NOTIFY_INVAL_ENTRY would continue to be the common (deleted or
moved) notification.
Attaching untested patch to remove this cruft.
Thanks,
Miklos
On Mon, Dec 01, 2025 at 03:03:08PM +0100, Miklos Szeredi wrote:
> On Mon, 1 Dec 2025 at 09:33, Al Viro <viro@zeniv.linux.org.uk> wrote:
> >
> > On Mon, Dec 01, 2025 at 09:22:54AM +0100, Amir Goldstein wrote:
> >
> > > I don't think there is a point in optimizing parallel dir operations
> > > with FUSE server cache invalidation, but maybe I am missing
> > > something.
> >
> > The interesting part is the expected semantics of operation;
> > d_invalidate() side definitely doesn't need any of that cruft,
> > but I would really like to understand what that function
> > is supposed to do.
> >
> > Miklos, could you post a brain dump on that?
>
> This function is supposed to invalidate a dentry due to remote changes
> (FUSE_NOTIFY_INVAL_ENTRY). Originally it was supplied a parent ID and
> a name and called d_invalidate() on the looked up dentry.
>
> Then it grew a variant (FUSE_NOTIFY_DELETE) that was also supplied a
> child ID, which was matched against the looked up inode. This was
> commit 451d0f599934 ("FUSE: Notifying the kernel of deletion."),
> Apparently this worked around the fact that at that time
> d_invalidate() returned -EBUSY if the target was still in use and
> didn't unhash the dentry in that case.
>
> That was later changed by commit bafc9b754f75 ("vfs: More precise
> tests in d_invalidate") to unconditionally unhash the target, which
> effectively made FUSE_NOTIFY_INVAL_ENTRY and FUSE_NOTIFY_DELETE
> equivalent and the code in question unnecessary.
>
> For the future, we could also introduce FUSE_NOTIFY_MOVE, that would
> differentiate between a delete and a move, while
> FUSE_NOTIFY_INVAL_ENTRY would continue to be the common (deleted or
> moved) notification.
>
> Attaching untested patch to remove this cruft.
Should we revert the fuse specific bits of c9ba789dad15 ("VFS: introduce
start_creating_noperm() and start_removing_noperm()") and then apply
your changes afterwards?
On Mon, Dec 01, 2025 at 03:03:08PM +0100, Miklos Szeredi wrote:
> On Mon, 1 Dec 2025 at 09:33, Al Viro <viro@zeniv.linux.org.uk> wrote:
> >
> > On Mon, Dec 01, 2025 at 09:22:54AM +0100, Amir Goldstein wrote:
> >
> > > I don't think there is a point in optimizing parallel dir operations
> > > with FUSE server cache invalidation, but maybe I am missing
> > > something.
> >
> > The interesting part is the expected semantics of operation;
> > d_invalidate() side definitely doesn't need any of that cruft,
> > but I would really like to understand what that function
> > is supposed to do.
> >
> > Miklos, could you post a brain dump on that?
>
> This function is supposed to invalidate a dentry due to remote changes
> (FUSE_NOTIFY_INVAL_ENTRY). Originally it was supplied a parent ID and
> a name and called d_invalidate() on the looked up dentry.
>
> Then it grew a variant (FUSE_NOTIFY_DELETE) that was also supplied a
> child ID, which was matched against the looked up inode. This was
> commit 451d0f599934 ("FUSE: Notifying the kernel of deletion."),
> Apparently this worked around the fact that at that time
> d_invalidate() returned -EBUSY if the target was still in use and
> didn't unhash the dentry in that case.
>
> That was later changed by commit bafc9b754f75 ("vfs: More precise
> tests in d_invalidate") to unconditionally unhash the target, which
> effectively made FUSE_NOTIFY_INVAL_ENTRY and FUSE_NOTIFY_DELETE
> equivalent and the code in question unnecessary.
>
> For the future, we could also introduce FUSE_NOTIFY_MOVE, that would
> differentiate between a delete and a move, while
> FUSE_NOTIFY_INVAL_ENTRY would continue to be the common (deleted or
> moved) notification.
Then as far as VFS is concerned, it's an equivalent of "we'd done
a dcache lookup and revalidate told us to bugger off", which does
*not* need locking the parent - the same sequence can very well
happen without touching any inode locks.
IOW, from the point of view of locking protocol changes that's not
a removal at all.
Or do you need them serialized for fuse-internal purposes?
On Mon, 1 Dec 2025 at 18:08, Al Viro <viro@zeniv.linux.org.uk> wrote: > Then as far as VFS is concerned, it's an equivalent of "we'd done > a dcache lookup and revalidate told us to bugger off", which does > *not* need locking the parent - the same sequence can very well > happen without touching any inode locks. Okay. > IOW, from the point of view of locking protocol changes that's not > a removal at all. > > Or do you need them serialized for fuse-internal purposes? Not as far as I can see. As to any fuse filesystem being reliant on this behavior, I think that's unlikely, though it's sort of documented in the libfuse APIs as: * To avoid a deadlock this function must not be called in the * execution path of a related filesystem operation or within any code * that could hold a lock that could be needed to execute such an * operation. As of kernel 4.18, a "related operation" is a lookup(), * symlink(), mknod(), mkdir(), unlink(), rename(), link() or create() * request for the parent, and a setattr(), unlink(), rmdir(), * rename(), setxattr(), removexattr(), readdir() or readdirplus() * request for the inode itself. Why the locking was added in the first place? Oversight, probably. Thanks, Miklos
© 2016 - 2025 Red Hat, Inc.