[v6] Create and use APIs to centralise locking for directory ops.

[PATCH v6 06/15] VFS: introduce start_creating_noperm() and start_removing_noperm()

Posted by NeilBrown 2 months, 4 weeks ago

From: NeilBrown <neil@brown.name>

xfs, fuse, ipc/mqueue need variants of start_creating or start_removing
which do not check permissions.
This patch adds _noperm versions of these functions.

Note that do_mq_open() was only calling mntget() so it could call
path_put() - it didn't really need an extra reference on the mnt.
Now it doesn't call mntget() and uses end_creating() which does
the dput() half of path_put().

Also mq_unlink() previously passed
   d_inode(dentry->d_parent)
as the dir inode to vfs_unlink().  This is after locking
   d_inode(mnt->mnt_root)
These two inodes are the same, but normally calls use the textual
parent.
So I've changes the vfs_unlink() call to be given d_inode(mnt->mnt_root).

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neil@brown.name>

--
changes since v2:
 - dir arg passed to vfs_unlink() in mq_unlink() changed to match
   the dir passed to lookup_noperm()
 - restore assignment to path->mnt even though the mntget() is removed.
---
 fs/fuse/dir.c            | 19 +++++++---------
 fs/namei.c               | 48 ++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/orphanage.c | 11 ++++-----
 include/linux/namei.h    |  2 ++
 ipc/mqueue.c             | 32 ++++++++++-----------------
 5 files changed, 74 insertions(+), 38 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 316922d5dd13..a0d5b302bcc2 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1397,27 +1397,25 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
 	if (!parent)
 		return -ENOENT;
 
-	inode_lock_nested(parent, I_MUTEX_PARENT);
 	if (!S_ISDIR(parent->i_mode))
-		goto unlock;
+		goto put_parent;
 
 	err = -ENOENT;
 	dir = d_find_alias(parent);
 	if (!dir)
-		goto unlock;
+		goto put_parent;
 
-	name->hash = full_name_hash(dir, name->name, name->len);
-	entry = d_lookup(dir, name);
+	entry = start_removing_noperm(dir, name);
 	dput(dir);
-	if (!entry)
-		goto unlock;
+	if (IS_ERR(entry))
+		goto put_parent;
 
 	fuse_dir_changed(parent);
 	if (!(flags & FUSE_EXPIRE_ONLY))
 		d_invalidate(entry);
 	fuse_invalidate_entry_cache(entry);
 
-	if (child_nodeid != 0 && d_really_is_positive(entry)) {
+	if (child_nodeid != 0) {
 		inode_lock(d_inode(entry));
 		if (get_node_id(d_inode(entry)) != child_nodeid) {
 			err = -ENOENT;
@@ -1445,10 +1443,9 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
 	} else {
 		err = 0;
 	}
-	dput(entry);
 
- unlock:
-	inode_unlock(parent);
+	end_removing(entry);
+ put_parent:
 	iput(parent);
 	return err;
 }
diff --git a/fs/namei.c b/fs/namei.c
index 38dda29552f6..da01b828ede6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3275,6 +3275,54 @@ struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
 }
 EXPORT_SYMBOL(start_removing);
 
+/**
+ * start_creating_noperm - prepare to create a given name without permission checking
+ * @parent: directory in which to prepare to create the name
+ * @name:   the name to be created
+ *
+ * Locks are taken and a lookup in performed prior to creating
+ * an object in a directory.
+ *
+ * If the name already exists, a positive dentry is returned.
+ *
+ * Returns: a negative or positive dentry, or an error.
+ */
+struct dentry *start_creating_noperm(struct dentry *parent,
+				     struct qstr *name)
+{
+	int err = lookup_noperm_common(name, parent);
+
+	if (err)
+		return ERR_PTR(err);
+	return start_dirop(parent, name, LOOKUP_CREATE);
+}
+EXPORT_SYMBOL(start_creating_noperm);
+
+/**
+ * start_removing_noperm - prepare to remove a given name without permission checking
+ * @parent: directory in which to find the name
+ * @name:   the name to be removed
+ *
+ * Locks are taken and a lookup in performed prior to removing
+ * an object from a directory.
+ *
+ * If the name doesn't exist, an error is returned.
+ *
+ * end_removing() should be called when removal is complete, or aborted.
+ *
+ * Returns: a positive dentry, or an error.
+ */
+struct dentry *start_removing_noperm(struct dentry *parent,
+				     struct qstr *name)
+{
+	int err = lookup_noperm_common(name, parent);
+
+	if (err)
+		return ERR_PTR(err);
+	return start_dirop(parent, name, 0);
+}
+EXPORT_SYMBOL(start_removing_noperm);
+
 #ifdef CONFIG_UNIX98_PTYS
 int path_pts(struct path *path)
 {
diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c
index 9c12cb844231..e732605924a1 100644
--- a/fs/xfs/scrub/orphanage.c
+++ b/fs/xfs/scrub/orphanage.c
@@ -152,11 +152,10 @@ xrep_orphanage_create(
 	}
 
 	/* Try to find the orphanage directory. */
-	inode_lock_nested(root_inode, I_MUTEX_PARENT);
-	orphanage_dentry = lookup_noperm(&QSTR(ORPHANAGE), root_dentry);
+	orphanage_dentry = start_creating_noperm(root_dentry, &QSTR(ORPHANAGE));
 	if (IS_ERR(orphanage_dentry)) {
 		error = PTR_ERR(orphanage_dentry);
-		goto out_unlock_root;
+		goto out_dput_root;
 	}
 
 	/*
@@ -170,7 +169,7 @@ xrep_orphanage_create(
 					     orphanage_dentry, 0750);
 		error = PTR_ERR(orphanage_dentry);
 		if (IS_ERR(orphanage_dentry))
-			goto out_unlock_root;
+			goto out_dput_orphanage;
 	}
 
 	/* Not a directory? Bail out. */
@@ -200,9 +199,7 @@ xrep_orphanage_create(
 	sc->orphanage_ilock_flags = 0;
 
 out_dput_orphanage:
-	dput(orphanage_dentry);
-out_unlock_root:
-	inode_unlock(VFS_I(sc->mp->m_rootip));
+	end_creating(orphanage_dentry, root_dentry);
 out_dput_root:
 	dput(root_dentry);
 out:
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 6d1069f93ebf..0441f5921f87 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -93,6 +93,8 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
 			      struct qstr *name);
 struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
 			      struct qstr *name);
+struct dentry *start_creating_noperm(struct dentry *parent, struct qstr *name);
+struct dentry *start_removing_noperm(struct dentry *parent, struct qstr *name);
 
 /**
  * end_creating - finish action started with start_creating
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 093551fe66a7..6d7610310003 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -913,13 +913,12 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
 		goto out_putname;
 
 	ro = mnt_want_write(mnt);	/* we'll drop it in any case */
-	inode_lock(d_inode(root));
-	path.dentry = lookup_noperm(&QSTR(name->name), root);
+	path.dentry = start_creating_noperm(root, &QSTR(name->name));
 	if (IS_ERR(path.dentry)) {
 		error = PTR_ERR(path.dentry);
 		goto out_putfd;
 	}
-	path.mnt = mntget(mnt);
+	path.mnt = mnt;
 	error = prepare_open(path.dentry, oflag, ro, mode, name, attr);
 	if (!error) {
 		struct file *file = dentry_open(&path, oflag, current_cred());
@@ -928,13 +927,12 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
 		else
 			error = PTR_ERR(file);
 	}
-	path_put(&path);
 out_putfd:
 	if (error) {
 		put_unused_fd(fd);
 		fd = error;
 	}
-	inode_unlock(d_inode(root));
+	end_creating(path.dentry, root);
 	if (!ro)
 		mnt_drop_write(mnt);
 out_putname:
@@ -957,7 +955,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
 	int err;
 	struct filename *name;
 	struct dentry *dentry;
-	struct inode *inode = NULL;
+	struct inode *inode;
 	struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
 	struct vfsmount *mnt = ipc_ns->mq_mnt;
 
@@ -969,26 +967,20 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
 	err = mnt_want_write(mnt);
 	if (err)
 		goto out_name;
-	inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
-	dentry = lookup_noperm(&QSTR(name->name), mnt->mnt_root);
+	dentry = start_removing_noperm(mnt->mnt_root, &QSTR(name->name));
 	if (IS_ERR(dentry)) {
 		err = PTR_ERR(dentry);
-		goto out_unlock;
+		goto out_drop_write;
 	}
 
 	inode = d_inode(dentry);
-	if (!inode) {
-		err = -ENOENT;
-	} else {
-		ihold(inode);
-		err = vfs_unlink(&nop_mnt_idmap, d_inode(dentry->d_parent),
-				 dentry, NULL);
-	}
-	dput(dentry);
-
-out_unlock:
-	inode_unlock(d_inode(mnt->mnt_root));
+	ihold(inode);
+	err = vfs_unlink(&nop_mnt_idmap, d_inode(mnt->mnt_root),
+			 dentry, NULL);
+	end_removing(dentry);
 	iput(inode);
+
+out_drop_write:
 	mnt_drop_write(mnt);
 out_name:
 	putname(name);
-- 
2.50.0.107.gf914562f5916.dirty

Re: [PATCH v6 06/15] VFS: introduce start_creating_noperm() and start_removing_noperm()

Posted by Val Packett 2 months, 1 week ago

Hi,

On 11/12/25 9:18 PM, NeilBrown wrote:
> From: NeilBrown <neil@brown.name>
>
> xfs, fuse, ipc/mqueue need variants of start_creating or start_removing
> which do not check permissions.
> This patch adds _noperm versions of these functions.
> [..]
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index 316922d5dd13..a0d5b302bcc2 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -1397,27 +1397,25 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
>   	if (!parent)
>   		return -ENOENT;
>   
> -	inode_lock_nested(parent, I_MUTEX_PARENT);
>   	if (!S_ISDIR(parent->i_mode))
> -		goto unlock;
> +		goto put_parent;
>   
>   	err = -ENOENT;
>   	dir = d_find_alias(parent);
>   	if (!dir)
> -		goto unlock;
> +		goto put_parent;
>   
> -	name->hash = full_name_hash(dir, name->name, name->len);
> -	entry = d_lookup(dir, name);
> +	entry = start_removing_noperm(dir, name);
>   	dput(dir);
> -	if (!entry)
> -		goto unlock;
> +	if (IS_ERR(entry))
> +		goto put_parent;

This broke xdg-document-portal (and potentially other FUSE filesystems) 
by introducing a massive deadlock.

❯ doas cat /proc/40751/stack # main thread
[<0>] __fuse_simple_request+0x37c/0x5c0 [fuse]
[<0>] fuse_lookup_name+0x12c/0x2a0 [fuse]
[<0>] fuse_lookup+0x9c/0x1e8 [fuse]
[<0>] lookup_one_qstr_excl+0xd4/0x160
[<0>] start_removing_noperm+0x5c/0x90
[<0>] fuse_reverse_inval_entry+0x64/0x1e0 [fuse]
[<0>] fuse_dev_do_write+0x13a8/0x16a8 [fuse]
[<0>] fuse_dev_write+0x64/0xa8 [fuse]
[<0>] do_iter_readv_writev+0x170/0x1d0
[<0>] vfs_writev+0x100/0x2d0
[<0>] do_writev+0x88/0x130

d_lookup which was previously used here —from what I could understand by 
reading it— is cache-only and does not call into the FS's lookup at all.

This new start_removing_noperm calls start_dirop which calls 
lookup_one_qstr_excl which according to its own comment is the "one and 
only case when ->lookup() gets called on non in-lookup dentries". Well, 
->lookup() is the request back to the userspace FUSE server.. but the 
FUSE server is waiting for the write() to the FUSE device that invokes 
this operation to return! We cannot reenter the FUSE server 
from fuse_reverse_inval_entry.

x-d-p issue link: https://github.com/flatpak/xdg-desktop-portal/issues/1871

Reverting the fuse/dir.c changes has fixed that for me.

Thanks,
~val

Re: [PATCH v6 06/15] VFS: introduce start_creating_noperm() and start_removing_noperm()

Posted by Al Viro 2 months, 1 week ago

On Sat, Nov 29, 2025 at 09:01:05PM -0300, Val Packett wrote:

> > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> > index 316922d5dd13..a0d5b302bcc2 100644
> > --- a/fs/fuse/dir.c
> > +++ b/fs/fuse/dir.c
> > @@ -1397,27 +1397,25 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
> >   	if (!parent)
> >   		return -ENOENT;
> > -	inode_lock_nested(parent, I_MUTEX_PARENT);
> >   	if (!S_ISDIR(parent->i_mode))
> > -		goto unlock;
> > +		goto put_parent;
> >   	err = -ENOENT;
> >   	dir = d_find_alias(parent);
> >   	if (!dir)
> > -		goto unlock;
> > +		goto put_parent;
> > -	name->hash = full_name_hash(dir, name->name, name->len);
> > -	entry = d_lookup(dir, name);
> > +	entry = start_removing_noperm(dir, name);
> >   	dput(dir);
> > -	if (!entry)
> > -		goto unlock;
> > +	if (IS_ERR(entry))
> > +		goto put_parent;
> 
> This broke xdg-document-portal (and potentially other FUSE filesystems) by
> introducing a massive deadlock.

ACK.  That chunk needs to be reverted - this is *not* "remove an object by
parent and name", it's "invalidate stuff under that parent with this
first name component" and I would like to understand what FUSE_EXPIRE_ONLY
thing is about.

Miklos, could you give some details on that thing?  This chunk definitely
needs to go, the question is what that code is trying to do other than
d_invalidate()...

[PATCH] fuse: fix conversion of fuse_reverse_inval_entry() to start_removing()

Posted by NeilBrown 2 months, 1 week ago


From: NeilBrown <neil@brown.name>

The recent conversion of fuse_reverse_inval_entry() to use
start_removing() was wrong.
As Val Packett points out the original code did not call ->lookup
while the new code does.  This can lead to a deadlock.

Rather than using full_name_hash() and d_lookup() as the old code
did, we can use try_lookup_noperm() which combines these.  Then
the result can be given to start_removing_dentry() to get the required
locks for removal.  We then double check that the name hasn't
changed.

As 'dir' needs to be used several times now, we load the dput() until
the end, and initialise to NULL so dput() is always safe.

Reported-by: Val Packett <val@packett.cool>
Closes: https://lore.kernel.org/all/6713ea38-b583-4c86-b74a-bea55652851d@packett.cool
Fixes: c9ba789dad15 ("VFS: introduce start_creating_noperm() and start_removing_noperm()")
Signed-off-by: NeilBrown <neil@brown.name>
---
 fs/fuse/dir.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index a0d5b302bcc2..8384fa96cf53 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1390,8 +1390,8 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
 {
 	int err = -ENOTDIR;
 	struct inode *parent;
-	struct dentry *dir;
-	struct dentry *entry;
+	struct dentry *dir = NULL;
+	struct dentry *entry = NULL;
 
 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
 	if (!parent)
@@ -1404,11 +1404,19 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
 	dir = d_find_alias(parent);
 	if (!dir)
 		goto put_parent;
-
-	entry = start_removing_noperm(dir, name);
-	dput(dir);
-	if (IS_ERR(entry))
-		goto put_parent;
+	while (!entry) {
+		struct dentry *child = try_lookup_noperm(name, dir);
+		if (!child || IS_ERR(child))
+			goto put_parent;
+		entry = start_removing_dentry(dir, child);
+		dput(child);
+		if (IS_ERR(entry))
+			goto put_parent;
+		if (!d_same_name(entry, dir, name)) {
+			end_removing(entry);
+			entry = NULL;
+		}
+	}
 
 	fuse_dir_changed(parent);
 	if (!(flags & FUSE_EXPIRE_ONLY))
@@ -1446,6 +1454,7 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
 
 	end_removing(entry);
  put_parent:
+	dput(dir);
 	iput(parent);
 	return err;
 }
-- 
2.50.0.107.gf914562f5916.dirty