[v5] Create and use APIs to centralise locking for directory ops.

[PATCH v5 05/14] VFS/nfsd/cachefiles/ovl: introduce start_removing() and end_removing()

Posted by NeilBrown 3 months ago

From: NeilBrown <neil@brown.name>

start_removing() is similar to start_creating() but will only return a
positive dentry with the expectation that it will be removed.  This is
used by nfsd, cachefiles, and overlayfs.  They are changed to also use
end_removing() to terminate the action begun by start_removing().  This
is a simple alias for end_dirop().

Apart from changes to the error paths, as we no longer need to unlock on
a lookup error, an effect on callers is that they don't need to test if
the found dentry is positive or negative - they can be sure it is
positive.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
---
 fs/cachefiles/namei.c    | 32 ++++++++++++++------------------
 fs/namei.c               | 27 +++++++++++++++++++++++++++
 fs/nfsd/nfs4recover.c    | 18 +++++-------------
 fs/nfsd/vfs.c            | 26 ++++++++++----------------
 fs/overlayfs/dir.c       | 15 +++++++--------
 fs/overlayfs/overlayfs.h |  8 ++++++++
 include/linux/namei.h    | 18 ++++++++++++++++++
 7 files changed, 89 insertions(+), 55 deletions(-)

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 0a136eb434da..c7f0c6ab9b88 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -260,6 +260,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
  * - File backed objects are unlinked
  * - Directory backed objects are stuffed into the graveyard for userspace to
  *   delete
+ * On entry dir must be locked.  It will be unlocked on exit.
  */
 int cachefiles_bury_object(struct cachefiles_cache *cache,
 			   struct cachefiles_object *object,
@@ -274,28 +275,30 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
 
 	_enter(",'%pd','%pd'", dir, rep);
 
+	/* end_removing() will dput() @rep but we need to keep
+	 * a ref, so take one now.  This also stops the dentry
+	 * being negated when unlinked which we need.
+	 */
+	dget(rep);
+
 	if (rep->d_parent != dir) {
-		inode_unlock(d_inode(dir));
+		end_removing(rep);
 		_leave(" = -ESTALE");
 		return -ESTALE;
 	}
 
 	/* non-directories can just be unlinked */
 	if (!d_is_dir(rep)) {
-		dget(rep); /* Stop the dentry being negated if it's only pinned
-			    * by a file struct.
-			    */
 		ret = cachefiles_unlink(cache, object, dir, rep, why);
-		dput(rep);
+		end_removing(rep);
 
-		inode_unlock(d_inode(dir));
 		_leave(" = %d", ret);
 		return ret;
 	}
 
 	/* directories have to be moved to the graveyard */
 	_debug("move stale object to graveyard");
-	inode_unlock(d_inode(dir));
+	end_removing(rep);
 
 try_again:
 	/* first step is to make up a grave dentry in the graveyard */
@@ -749,26 +752,20 @@ static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache,
 	struct dentry *victim;
 	int ret = -ENOENT;
 
-	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+	victim = start_removing(&nop_mnt_idmap, dir, &QSTR(filename));
 
-	victim = lookup_one(&nop_mnt_idmap, &QSTR(filename), dir);
 	if (IS_ERR(victim))
 		goto lookup_error;
-	if (d_is_negative(victim))
-		goto lookup_put;
 	if (d_inode(victim)->i_flags & S_KERNEL_FILE)
 		goto lookup_busy;
 	return victim;
 
 lookup_busy:
 	ret = -EBUSY;
-lookup_put:
-	inode_unlock(d_inode(dir));
-	dput(victim);
+	end_removing(victim);
 	return ERR_PTR(ret);
 
 lookup_error:
-	inode_unlock(d_inode(dir));
 	ret = PTR_ERR(victim);
 	if (ret == -ENOENT)
 		return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */
@@ -816,18 +813,17 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
 
 	ret = cachefiles_bury_object(cache, NULL, dir, victim,
 				     FSCACHE_OBJECT_WAS_CULLED);
+	dput(victim);
 	if (ret < 0)
 		goto error;
 
 	fscache_count_culled();
-	dput(victim);
 	_leave(" = 0");
 	return 0;
 
 error_unlock:
-	inode_unlock(d_inode(dir));
+	end_removing(victim);
 error:
-	dput(victim);
 	if (ret == -ENOENT)
 		return -ESTALE; /* Probably got retired by the netfs */
 
diff --git a/fs/namei.c b/fs/namei.c
index 8873ad0f05b0..38dda29552f6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3248,6 +3248,33 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
 }
 EXPORT_SYMBOL(start_creating);
 
+/**
+ * start_removing - prepare to remove a given name with permission checking
+ * @idmap:  idmap of the mount
+ * @parent: directory in which to find the name
+ * @name:   the name to be removed
+ *
+ * Locks are taken and a lookup in performed prior to removing
+ * an object from a directory.  Permission checking (MAY_EXEC) is performed
+ * against @idmap.
+ *
+ * If the name doesn't exist, an error is returned.
+ *
+ * end_removing() should be called when removal is complete, or aborted.
+ *
+ * Returns: a positive dentry, or an error.
+ */
+struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
+			      struct qstr *name)
+{
+	int err = lookup_one_common(idmap, name, parent);
+
+	if (err)
+		return ERR_PTR(err);
+	return start_dirop(parent, name, 0);
+}
+EXPORT_SYMBOL(start_removing);
+
 #ifdef CONFIG_UNIX98_PTYS
 int path_pts(struct path *path)
 {
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index c247a7c3291c..3eefaa2202e3 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -324,20 +324,12 @@ nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn)
 	dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name);
 
 	dir = nn->rec_file->f_path.dentry;
-	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
-	dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir);
-	if (IS_ERR(dentry)) {
-		status = PTR_ERR(dentry);
-		goto out_unlock;
-	}
-	status = -ENOENT;
-	if (d_really_is_negative(dentry))
-		goto out;
+	dentry = start_removing(&nop_mnt_idmap, dir, &QSTR(name));
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
 	status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry);
-out:
-	dput(dentry);
-out_unlock:
-	inode_unlock(d_inode(dir));
+	end_removing(dentry);
 	return status;
 }
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 24e501abad0e..6291c371caa7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -2044,7 +2044,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 {
 	struct dentry	*dentry, *rdentry;
 	struct inode	*dirp;
-	struct inode	*rinode;
+	struct inode	*rinode = NULL;
 	__be32		err;
 	int		host_err;
 
@@ -2063,24 +2063,21 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 
 	dentry = fhp->fh_dentry;
 	dirp = d_inode(dentry);
-	inode_lock_nested(dirp, I_MUTEX_PARENT);
 
-	rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
+	rdentry = start_removing(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen));
+
 	host_err = PTR_ERR(rdentry);
 	if (IS_ERR(rdentry))
-		goto out_unlock;
+		goto out_drop_write;
 
-	if (d_really_is_negative(rdentry)) {
-		dput(rdentry);
-		host_err = -ENOENT;
-		goto out_unlock;
-	}
-	rinode = d_inode(rdentry);
 	err = fh_fill_pre_attrs(fhp);
 	if (err != nfs_ok)
 		goto out_unlock;
 
+	rinode = d_inode(rdentry);
+	/* Prevent truncation until after locks dropped */
 	ihold(rinode);
+
 	if (!type)
 		type = d_inode(rdentry)->i_mode & S_IFMT;
 
@@ -2102,10 +2099,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	}
 	fh_fill_post_attrs(fhp);
 
-	inode_unlock(dirp);
-	if (!host_err)
+out_unlock:
+	end_removing(rdentry);
+	if (!err && !host_err)
 		host_err = commit_metadata(fhp);
-	dput(rdentry);
 	iput(rinode);    /* truncate the inode here */
 
 out_drop_write:
@@ -2123,9 +2120,6 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	}
 out:
 	return err != nfs_ok ? err : nfserrno(host_err);
-out_unlock:
-	inode_unlock(dirp);
-	goto out_drop_write;
 }
 
 /*
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index b9160fefbd00..20682afdbd20 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -866,17 +866,17 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
 			goto out;
 	}
 
-	inode_lock_nested(dir, I_MUTEX_PARENT);
-	upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
-				 dentry->d_name.len);
+	upper = ovl_start_removing_upper(ofs, upperdir,
+					 &QSTR_LEN(dentry->d_name.name,
+						   dentry->d_name.len));
 	err = PTR_ERR(upper);
 	if (IS_ERR(upper))
-		goto out_unlock;
+		goto out_dput;
 
 	err = -ESTALE;
 	if ((opaquedir && upper != opaquedir) ||
 	    (!opaquedir && !ovl_matches_upper(dentry, upper)))
-		goto out_dput_upper;
+		goto out_unlock;
 
 	if (is_dir)
 		err = ovl_do_rmdir(ofs, dir, upper);
@@ -892,10 +892,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
 	 */
 	if (!err)
 		d_drop(dentry);
-out_dput_upper:
-	dput(upper);
 out_unlock:
-	inode_unlock(dir);
+	end_removing(upper);
+out_dput:
 	dput(opaquedir);
 out:
 	return err;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index beeba96cfcb2..49ad65f829dc 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -423,6 +423,14 @@ static inline struct dentry *ovl_start_creating_upper(struct ovl_fs *ofs,
 			      parent, name);
 }
 
+static inline struct dentry *ovl_start_removing_upper(struct ovl_fs *ofs,
+						      struct dentry *parent,
+						      struct qstr *name)
+{
+	return start_removing(ovl_upper_mnt_idmap(ofs),
+			      parent, name);
+}
+
 static inline bool ovl_open_flags_need_copy_up(int flags)
 {
 	if (!flags)
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 37b72f4a64f0..6d1069f93ebf 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -91,6 +91,8 @@ struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap,
 
 struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
 			      struct qstr *name);
+struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
+			      struct qstr *name);
 
 /**
  * end_creating - finish action started with start_creating
@@ -122,6 +124,22 @@ static inline void end_creating(struct dentry *child, struct dentry *parent)
 		end_dirop(child);
 }
 
+/**
+ * end_removing - finish action started with start_removing
+ * @child:  dentry returned by start_removing()
+ * @parent: dentry given to start_removing()
+ *
+ * Unlock and release the child.
+ *
+ * This is identical to end_dirop().  It can be passed the result of
+ * start_removing() whether that was successful or not, but it not needed
+ * if start_removing() failed.
+ */
+static inline void end_removing(struct dentry *child)
+{
+	end_dirop(child);
+}
+
 extern int follow_down_one(struct path *);
 extern int follow_down(struct path *path, unsigned int flags);
 extern int follow_up(struct path *);
-- 
2.50.0.107.gf914562f5916.dirty

Re: [PATCH v5 05/14] VFS/nfsd/cachefiles/ovl: introduce start_removing() and end_removing()

Posted by Jeff Layton 2 months, 4 weeks ago

On Thu, 2025-11-06 at 11:50 +1100, NeilBrown wrote:
> From: NeilBrown <neil@brown.name>
> 
> start_removing() is similar to start_creating() but will only return a
> positive dentry with the expectation that it will be removed.  This is
> used by nfsd, cachefiles, and overlayfs.  They are changed to also use
> end_removing() to terminate the action begun by start_removing().  This
> is a simple alias for end_dirop().
> 
> Apart from changes to the error paths, as we no longer need to unlock on
> a lookup error, an effect on callers is that they don't need to test if
> the found dentry is positive or negative - they can be sure it is
> positive.
> 
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> Signed-off-by: NeilBrown <neil@brown.name>
> ---
>  fs/cachefiles/namei.c    | 32 ++++++++++++++------------------
>  fs/namei.c               | 27 +++++++++++++++++++++++++++
>  fs/nfsd/nfs4recover.c    | 18 +++++-------------
>  fs/nfsd/vfs.c            | 26 ++++++++++----------------
>  fs/overlayfs/dir.c       | 15 +++++++--------
>  fs/overlayfs/overlayfs.h |  8 ++++++++
>  include/linux/namei.h    | 18 ++++++++++++++++++
>  7 files changed, 89 insertions(+), 55 deletions(-)
> 
> diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> index 0a136eb434da..c7f0c6ab9b88 100644
> --- a/fs/cachefiles/namei.c
> +++ b/fs/cachefiles/namei.c
> @@ -260,6 +260,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
>   * - File backed objects are unlinked
>   * - Directory backed objects are stuffed into the graveyard for userspace to
>   *   delete
> + * On entry dir must be locked.  It will be unlocked on exit.
>   */
>  int cachefiles_bury_object(struct cachefiles_cache *cache,
>  			   struct cachefiles_object *object,
> @@ -274,28 +275,30 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
>  
>  	_enter(",'%pd','%pd'", dir, rep);
>  
> +	/* end_removing() will dput() @rep but we need to keep
> +	 * a ref, so take one now.  This also stops the dentry
> +	 * being negated when unlinked which we need.
> +	 */
> +	dget(rep);
> +
>  	if (rep->d_parent != dir) {
> -		inode_unlock(d_inode(dir));
> +		end_removing(rep);
>  		_leave(" = -ESTALE");
>  		return -ESTALE;
>  	}
>  
>  	/* non-directories can just be unlinked */
>  	if (!d_is_dir(rep)) {
> -		dget(rep); /* Stop the dentry being negated if it's only pinned
> -			    * by a file struct.
> -			    */
>  		ret = cachefiles_unlink(cache, object, dir, rep, why);
> -		dput(rep);
> +		end_removing(rep);
>  
> -		inode_unlock(d_inode(dir));
>  		_leave(" = %d", ret);
>  		return ret;
>  	}
>  
>  	/* directories have to be moved to the graveyard */
>  	_debug("move stale object to graveyard");
> -	inode_unlock(d_inode(dir));
> +	end_removing(rep);
>  
>  try_again:
>  	/* first step is to make up a grave dentry in the graveyard */
> @@ -749,26 +752,20 @@ static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache,
>  	struct dentry *victim;
>  	int ret = -ENOENT;
>  
> -	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
> +	victim = start_removing(&nop_mnt_idmap, dir, &QSTR(filename));
>  
> -	victim = lookup_one(&nop_mnt_idmap, &QSTR(filename), dir);
>  	if (IS_ERR(victim))
>  		goto lookup_error;
> -	if (d_is_negative(victim))
> -		goto lookup_put;
>  	if (d_inode(victim)->i_flags & S_KERNEL_FILE)
>  		goto lookup_busy;
>  	return victim;
>  
>  lookup_busy:
>  	ret = -EBUSY;
> -lookup_put:
> -	inode_unlock(d_inode(dir));
> -	dput(victim);
> +	end_removing(victim);
>  	return ERR_PTR(ret);
>  
>  lookup_error:
> -	inode_unlock(d_inode(dir));
>  	ret = PTR_ERR(victim);
>  	if (ret == -ENOENT)
>  		return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */
> @@ -816,18 +813,17 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
>  
>  	ret = cachefiles_bury_object(cache, NULL, dir, victim,
>  				     FSCACHE_OBJECT_WAS_CULLED);
> +	dput(victim);
>  	if (ret < 0)
>  		goto error;
>  
>  	fscache_count_culled();
> -	dput(victim);
>  	_leave(" = 0");
>  	return 0;
>  
>  error_unlock:
> -	inode_unlock(d_inode(dir));
> +	end_removing(victim);
>  error:
> -	dput(victim);
>  	if (ret == -ENOENT)
>  		return -ESTALE; /* Probably got retired by the netfs */
>  
> diff --git a/fs/namei.c b/fs/namei.c
> index 8873ad0f05b0..38dda29552f6 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3248,6 +3248,33 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
>  }
>  EXPORT_SYMBOL(start_creating);
>  
> +/**
> + * start_removing - prepare to remove a given name with permission checking
> + * @idmap:  idmap of the mount
> + * @parent: directory in which to find the name
> + * @name:   the name to be removed
> + *
> + * Locks are taken and a lookup in performed prior to removing
> + * an object from a directory.  Permission checking (MAY_EXEC) is performed
> + * against @idmap.
> + *
> + * If the name doesn't exist, an error is returned.
> + *
> + * end_removing() should be called when removal is complete, or aborted.
> + *
> + * Returns: a positive dentry, or an error.
> + */
> +struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
> +			      struct qstr *name)
> +{
> +	int err = lookup_one_common(idmap, name, parent);
> +
> +	if (err)
> +		return ERR_PTR(err);
> +	return start_dirop(parent, name, 0);
> +}
> +EXPORT_SYMBOL(start_removing);
> +
>  #ifdef CONFIG_UNIX98_PTYS
>  int path_pts(struct path *path)
>  {
> diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
> index c247a7c3291c..3eefaa2202e3 100644
> --- a/fs/nfsd/nfs4recover.c
> +++ b/fs/nfsd/nfs4recover.c
> @@ -324,20 +324,12 @@ nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn)
>  	dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name);
>  
>  	dir = nn->rec_file->f_path.dentry;
> -	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
> -	dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir);
> -	if (IS_ERR(dentry)) {
> -		status = PTR_ERR(dentry);
> -		goto out_unlock;
> -	}
> -	status = -ENOENT;
> -	if (d_really_is_negative(dentry))
> -		goto out;
> +	dentry = start_removing(&nop_mnt_idmap, dir, &QSTR(name));
> +	if (IS_ERR(dentry))
> +		return PTR_ERR(dentry);
> +
>  	status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry);
> -out:
> -	dput(dentry);
> -out_unlock:
> -	inode_unlock(d_inode(dir));
> +	end_removing(dentry);
>  	return status;
>  }
>  
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 24e501abad0e..6291c371caa7 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -2044,7 +2044,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
>  {
>  	struct dentry	*dentry, *rdentry;
>  	struct inode	*dirp;
> -	struct inode	*rinode;
> +	struct inode	*rinode = NULL;
>  	__be32		err;
>  	int		host_err;
>  
> @@ -2063,24 +2063,21 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
>  
>  	dentry = fhp->fh_dentry;
>  	dirp = d_inode(dentry);
> -	inode_lock_nested(dirp, I_MUTEX_PARENT);
>  
> -	rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
> +	rdentry = start_removing(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen));
> +
>  	host_err = PTR_ERR(rdentry);
>  	if (IS_ERR(rdentry))
> -		goto out_unlock;
> +		goto out_drop_write;
>  
> -	if (d_really_is_negative(rdentry)) {
> -		dput(rdentry);
> -		host_err = -ENOENT;
> -		goto out_unlock;
> -	}
> -	rinode = d_inode(rdentry);
>  	err = fh_fill_pre_attrs(fhp);
>  	if (err != nfs_ok)
>  		goto out_unlock;
>  
> +	rinode = d_inode(rdentry);
> +	/* Prevent truncation until after locks dropped */
>  	ihold(rinode);
> +
>  	if (!type)
>  		type = d_inode(rdentry)->i_mode & S_IFMT;
>  
> @@ -2102,10 +2099,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
>  	}
>  	fh_fill_post_attrs(fhp);
>  
> -	inode_unlock(dirp);
> -	if (!host_err)
> +out_unlock:
> +	end_removing(rdentry);
> +	if (!err && !host_err)
>  		host_err = commit_metadata(fhp);
> -	dput(rdentry);
>  	iput(rinode);    /* truncate the inode here */
>  
>  out_drop_write:
> @@ -2123,9 +2120,6 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
>  	}
>  out:
>  	return err != nfs_ok ? err : nfserrno(host_err);
> -out_unlock:
> -	inode_unlock(dirp);
> -	goto out_drop_write;
>  }
>  
>  /*
> diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> index b9160fefbd00..20682afdbd20 100644
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -866,17 +866,17 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
>  			goto out;
>  	}
>  
> -	inode_lock_nested(dir, I_MUTEX_PARENT);
> -	upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
> -				 dentry->d_name.len);
> +	upper = ovl_start_removing_upper(ofs, upperdir,
> +					 &QSTR_LEN(dentry->d_name.name,
> +						   dentry->d_name.len));
>  	err = PTR_ERR(upper);
>  	if (IS_ERR(upper))
> -		goto out_unlock;
> +		goto out_dput;
>  
>  	err = -ESTALE;
>  	if ((opaquedir && upper != opaquedir) ||
>  	    (!opaquedir && !ovl_matches_upper(dentry, upper)))
> -		goto out_dput_upper;
> +		goto out_unlock;
>  
>  	if (is_dir)
>  		err = ovl_do_rmdir(ofs, dir, upper);
> @@ -892,10 +892,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
>  	 */
>  	if (!err)
>  		d_drop(dentry);
> -out_dput_upper:
> -	dput(upper);
>  out_unlock:
> -	inode_unlock(dir);
> +	end_removing(upper);
> +out_dput:
>  	dput(opaquedir);
>  out:
>  	return err;
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index beeba96cfcb2..49ad65f829dc 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -423,6 +423,14 @@ static inline struct dentry *ovl_start_creating_upper(struct ovl_fs *ofs,
>  			      parent, name);
>  }
>  
> +static inline struct dentry *ovl_start_removing_upper(struct ovl_fs *ofs,
> +						      struct dentry *parent,
> +						      struct qstr *name)
> +{
> +	return start_removing(ovl_upper_mnt_idmap(ofs),
> +			      parent, name);
> +}
> +
>  static inline bool ovl_open_flags_need_copy_up(int flags)
>  {
>  	if (!flags)
> diff --git a/include/linux/namei.h b/include/linux/namei.h
> index 37b72f4a64f0..6d1069f93ebf 100644
> --- a/include/linux/namei.h
> +++ b/include/linux/namei.h
> @@ -91,6 +91,8 @@ struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap,
>  
>  struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
>  			      struct qstr *name);
> +struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
> +			      struct qstr *name);
>  
>  /**
>   * end_creating - finish action started with start_creating
> @@ -122,6 +124,22 @@ static inline void end_creating(struct dentry *child, struct dentry *parent)
>  		end_dirop(child);
>  }
>  
> +/**
> + * end_removing - finish action started with start_removing
> + * @child:  dentry returned by start_removing()
> + * @parent: dentry given to start_removing()
> + *
> + * Unlock and release the child.
> + *
> + * This is identical to end_dirop().  It can be passed the result of
> + * start_removing() whether that was successful or not, but it not needed
> + * if start_removing() failed.
> + */
> +static inline void end_removing(struct dentry *child)
> +{
> +	end_dirop(child);
> +}
> +
>  extern int follow_down_one(struct path *);
>  extern int follow_down(struct path *path, unsigned int flags);
>  extern int follow_up(struct path *);

This looks fine to me (particularly the knfsd parts), but doesn't ksmbd
need to be similarly converted?

Reviewed-by: Jeff Layton <jlayton@kernel.org>

Re: [PATCH v5 05/14] VFS/nfsd/cachefiles/ovl: introduce start_removing() and end_removing()

Posted by NeilBrown 2 months, 4 weeks ago

On Thu, 13 Nov 2025, Jeff Layton wrote:
> On Thu, 2025-11-06 at 11:50 +1100, NeilBrown wrote:
> > From: NeilBrown <neil@brown.name>
> > 
> > start_removing() is similar to start_creating() but will only return a
> > positive dentry with the expectation that it will be removed.  This is
> > used by nfsd, cachefiles, and overlayfs.  They are changed to also use
> > end_removing() to terminate the action begun by start_removing().  This
> > is a simple alias for end_dirop().
> > 
> > Apart from changes to the error paths, as we no longer need to unlock on
> > a lookup error, an effect on callers is that they don't need to test if
> > the found dentry is positive or negative - they can be sure it is
> > positive.
> > 
> > Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> > Signed-off-by: NeilBrown <neil@brown.name>
> > ---
> >  fs/cachefiles/namei.c    | 32 ++++++++++++++------------------
> >  fs/namei.c               | 27 +++++++++++++++++++++++++++
> >  fs/nfsd/nfs4recover.c    | 18 +++++-------------
> >  fs/nfsd/vfs.c            | 26 ++++++++++----------------
> >  fs/overlayfs/dir.c       | 15 +++++++--------
> >  fs/overlayfs/overlayfs.h |  8 ++++++++
> >  include/linux/namei.h    | 18 ++++++++++++++++++
> >  7 files changed, 89 insertions(+), 55 deletions(-)
> > 
> > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > index 0a136eb434da..c7f0c6ab9b88 100644
> > --- a/fs/cachefiles/namei.c
> > +++ b/fs/cachefiles/namei.c
> > @@ -260,6 +260,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
> >   * - File backed objects are unlinked
> >   * - Directory backed objects are stuffed into the graveyard for userspace to
> >   *   delete
> > + * On entry dir must be locked.  It will be unlocked on exit.
> >   */
> >  int cachefiles_bury_object(struct cachefiles_cache *cache,
> >  			   struct cachefiles_object *object,
> > @@ -274,28 +275,30 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
> >  
> >  	_enter(",'%pd','%pd'", dir, rep);
> >  
> > +	/* end_removing() will dput() @rep but we need to keep
> > +	 * a ref, so take one now.  This also stops the dentry
> > +	 * being negated when unlinked which we need.
> > +	 */
> > +	dget(rep);
> > +
> >  	if (rep->d_parent != dir) {
> > -		inode_unlock(d_inode(dir));
> > +		end_removing(rep);
> >  		_leave(" = -ESTALE");
> >  		return -ESTALE;
> >  	}
> >  
> >  	/* non-directories can just be unlinked */
> >  	if (!d_is_dir(rep)) {
> > -		dget(rep); /* Stop the dentry being negated if it's only pinned
> > -			    * by a file struct.
> > -			    */
> >  		ret = cachefiles_unlink(cache, object, dir, rep, why);
> > -		dput(rep);
> > +		end_removing(rep);
> >  
> > -		inode_unlock(d_inode(dir));
> >  		_leave(" = %d", ret);
> >  		return ret;
> >  	}
> >  
> >  	/* directories have to be moved to the graveyard */
> >  	_debug("move stale object to graveyard");
> > -	inode_unlock(d_inode(dir));
> > +	end_removing(rep);
> >  
> >  try_again:
> >  	/* first step is to make up a grave dentry in the graveyard */
> > @@ -749,26 +752,20 @@ static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache,
> >  	struct dentry *victim;
> >  	int ret = -ENOENT;
> >  
> > -	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
> > +	victim = start_removing(&nop_mnt_idmap, dir, &QSTR(filename));
> >  
> > -	victim = lookup_one(&nop_mnt_idmap, &QSTR(filename), dir);
> >  	if (IS_ERR(victim))
> >  		goto lookup_error;
> > -	if (d_is_negative(victim))
> > -		goto lookup_put;
> >  	if (d_inode(victim)->i_flags & S_KERNEL_FILE)
> >  		goto lookup_busy;
> >  	return victim;
> >  
> >  lookup_busy:
> >  	ret = -EBUSY;
> > -lookup_put:
> > -	inode_unlock(d_inode(dir));
> > -	dput(victim);
> > +	end_removing(victim);
> >  	return ERR_PTR(ret);
> >  
> >  lookup_error:
> > -	inode_unlock(d_inode(dir));
> >  	ret = PTR_ERR(victim);
> >  	if (ret == -ENOENT)
> >  		return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */
> > @@ -816,18 +813,17 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
> >  
> >  	ret = cachefiles_bury_object(cache, NULL, dir, victim,
> >  				     FSCACHE_OBJECT_WAS_CULLED);
> > +	dput(victim);
> >  	if (ret < 0)
> >  		goto error;
> >  
> >  	fscache_count_culled();
> > -	dput(victim);
> >  	_leave(" = 0");
> >  	return 0;
> >  
> >  error_unlock:
> > -	inode_unlock(d_inode(dir));
> > +	end_removing(victim);
> >  error:
> > -	dput(victim);
> >  	if (ret == -ENOENT)
> >  		return -ESTALE; /* Probably got retired by the netfs */
> >  
> > diff --git a/fs/namei.c b/fs/namei.c
> > index 8873ad0f05b0..38dda29552f6 100644
> > --- a/fs/namei.c
> > +++ b/fs/namei.c
> > @@ -3248,6 +3248,33 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
> >  }
> >  EXPORT_SYMBOL(start_creating);
> >  
> > +/**
> > + * start_removing - prepare to remove a given name with permission checking
> > + * @idmap:  idmap of the mount
> > + * @parent: directory in which to find the name
> > + * @name:   the name to be removed
> > + *
> > + * Locks are taken and a lookup in performed prior to removing
> > + * an object from a directory.  Permission checking (MAY_EXEC) is performed
> > + * against @idmap.
> > + *
> > + * If the name doesn't exist, an error is returned.
> > + *
> > + * end_removing() should be called when removal is complete, or aborted.
> > + *
> > + * Returns: a positive dentry, or an error.
> > + */
> > +struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
> > +			      struct qstr *name)
> > +{
> > +	int err = lookup_one_common(idmap, name, parent);
> > +
> > +	if (err)
> > +		return ERR_PTR(err);
> > +	return start_dirop(parent, name, 0);
> > +}
> > +EXPORT_SYMBOL(start_removing);
> > +
> >  #ifdef CONFIG_UNIX98_PTYS
> >  int path_pts(struct path *path)
> >  {
> > diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
> > index c247a7c3291c..3eefaa2202e3 100644
> > --- a/fs/nfsd/nfs4recover.c
> > +++ b/fs/nfsd/nfs4recover.c
> > @@ -324,20 +324,12 @@ nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn)
> >  	dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name);
> >  
> >  	dir = nn->rec_file->f_path.dentry;
> > -	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
> > -	dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir);
> > -	if (IS_ERR(dentry)) {
> > -		status = PTR_ERR(dentry);
> > -		goto out_unlock;
> > -	}
> > -	status = -ENOENT;
> > -	if (d_really_is_negative(dentry))
> > -		goto out;
> > +	dentry = start_removing(&nop_mnt_idmap, dir, &QSTR(name));
> > +	if (IS_ERR(dentry))
> > +		return PTR_ERR(dentry);
> > +
> >  	status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry);
> > -out:
> > -	dput(dentry);
> > -out_unlock:
> > -	inode_unlock(d_inode(dir));
> > +	end_removing(dentry);
> >  	return status;
> >  }
> >  
> > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> > index 24e501abad0e..6291c371caa7 100644
> > --- a/fs/nfsd/vfs.c
> > +++ b/fs/nfsd/vfs.c
> > @@ -2044,7 +2044,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
> >  {
> >  	struct dentry	*dentry, *rdentry;
> >  	struct inode	*dirp;
> > -	struct inode	*rinode;
> > +	struct inode	*rinode = NULL;
> >  	__be32		err;
> >  	int		host_err;
> >  
> > @@ -2063,24 +2063,21 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
> >  
> >  	dentry = fhp->fh_dentry;
> >  	dirp = d_inode(dentry);
> > -	inode_lock_nested(dirp, I_MUTEX_PARENT);
> >  
> > -	rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
> > +	rdentry = start_removing(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen));
> > +
> >  	host_err = PTR_ERR(rdentry);
> >  	if (IS_ERR(rdentry))
> > -		goto out_unlock;
> > +		goto out_drop_write;
> >  
> > -	if (d_really_is_negative(rdentry)) {
> > -		dput(rdentry);
> > -		host_err = -ENOENT;
> > -		goto out_unlock;
> > -	}
> > -	rinode = d_inode(rdentry);
> >  	err = fh_fill_pre_attrs(fhp);
> >  	if (err != nfs_ok)
> >  		goto out_unlock;
> >  
> > +	rinode = d_inode(rdentry);
> > +	/* Prevent truncation until after locks dropped */
> >  	ihold(rinode);
> > +
> >  	if (!type)
> >  		type = d_inode(rdentry)->i_mode & S_IFMT;
> >  
> > @@ -2102,10 +2099,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
> >  	}
> >  	fh_fill_post_attrs(fhp);
> >  
> > -	inode_unlock(dirp);
> > -	if (!host_err)
> > +out_unlock:
> > +	end_removing(rdentry);
> > +	if (!err && !host_err)
> >  		host_err = commit_metadata(fhp);
> > -	dput(rdentry);
> >  	iput(rinode);    /* truncate the inode here */
> >  
> >  out_drop_write:
> > @@ -2123,9 +2120,6 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
> >  	}
> >  out:
> >  	return err != nfs_ok ? err : nfserrno(host_err);
> > -out_unlock:
> > -	inode_unlock(dirp);
> > -	goto out_drop_write;
> >  }
> >  
> >  /*
> > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> > index b9160fefbd00..20682afdbd20 100644
> > --- a/fs/overlayfs/dir.c
> > +++ b/fs/overlayfs/dir.c
> > @@ -866,17 +866,17 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
> >  			goto out;
> >  	}
> >  
> > -	inode_lock_nested(dir, I_MUTEX_PARENT);
> > -	upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
> > -				 dentry->d_name.len);
> > +	upper = ovl_start_removing_upper(ofs, upperdir,
> > +					 &QSTR_LEN(dentry->d_name.name,
> > +						   dentry->d_name.len));
> >  	err = PTR_ERR(upper);
> >  	if (IS_ERR(upper))
> > -		goto out_unlock;
> > +		goto out_dput;
> >  
> >  	err = -ESTALE;
> >  	if ((opaquedir && upper != opaquedir) ||
> >  	    (!opaquedir && !ovl_matches_upper(dentry, upper)))
> > -		goto out_dput_upper;
> > +		goto out_unlock;
> >  
> >  	if (is_dir)
> >  		err = ovl_do_rmdir(ofs, dir, upper);
> > @@ -892,10 +892,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
> >  	 */
> >  	if (!err)
> >  		d_drop(dentry);
> > -out_dput_upper:
> > -	dput(upper);
> >  out_unlock:
> > -	inode_unlock(dir);
> > +	end_removing(upper);
> > +out_dput:
> >  	dput(opaquedir);
> >  out:
> >  	return err;
> > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> > index beeba96cfcb2..49ad65f829dc 100644
> > --- a/fs/overlayfs/overlayfs.h
> > +++ b/fs/overlayfs/overlayfs.h
> > @@ -423,6 +423,14 @@ static inline struct dentry *ovl_start_creating_upper(struct ovl_fs *ofs,
> >  			      parent, name);
> >  }
> >  
> > +static inline struct dentry *ovl_start_removing_upper(struct ovl_fs *ofs,
> > +						      struct dentry *parent,
> > +						      struct qstr *name)
> > +{
> > +	return start_removing(ovl_upper_mnt_idmap(ofs),
> > +			      parent, name);
> > +}
> > +
> >  static inline bool ovl_open_flags_need_copy_up(int flags)
> >  {
> >  	if (!flags)
> > diff --git a/include/linux/namei.h b/include/linux/namei.h
> > index 37b72f4a64f0..6d1069f93ebf 100644
> > --- a/include/linux/namei.h
> > +++ b/include/linux/namei.h
> > @@ -91,6 +91,8 @@ struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap,
> >  
> >  struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
> >  			      struct qstr *name);
> > +struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
> > +			      struct qstr *name);
> >  
> >  /**
> >   * end_creating - finish action started with start_creating
> > @@ -122,6 +124,22 @@ static inline void end_creating(struct dentry *child, struct dentry *parent)
> >  		end_dirop(child);
> >  }
> >  
> > +/**
> > + * end_removing - finish action started with start_removing
> > + * @child:  dentry returned by start_removing()
> > + * @parent: dentry given to start_removing()
> > + *
> > + * Unlock and release the child.
> > + *
> > + * This is identical to end_dirop().  It can be passed the result of
> > + * start_removing() whether that was successful or not, but it not needed
> > + * if start_removing() failed.
> > + */
> > +static inline void end_removing(struct dentry *child)
> > +{
> > +	end_dirop(child);
> > +}
> > +
> >  extern int follow_down_one(struct path *);
> >  extern int follow_down(struct path *path, unsigned int flags);
> >  extern int follow_up(struct path *);
> 
> This looks fine to me (particularly the knfsd parts), but doesn't ksmbd
> need to be similarly converted?

ksmbd removes names in two places.

ksmbd_vfs_unlink() is changed to use start_removing_dentry() in the next
patch as you noticed when reviewing it.

ksmbd_vfs_remove_file() is called after ksmbd_vfs_kern_path_locked()
which is only used there.  I should possible renamed to
ksmbd_vfs_kern_path_start_removing() and then the "do_lock" flag to
__ksmbd_vfs_kern_path() and ksmbd_vfs_path_lookup() should become
"for_remove" and then the lock/lookup in the "if (do_lock)" branch
(which will become "if (for_remove)") should use start_removing.

I'll insert a patch to do this.

> 
> Reviewed-by: Jeff Layton <jlayton@kernel.org>

Thanks,
NeilBrown