From: NeilBrown <neil@brown.name>
start_removing() is similar to start_creating() but will only return a
positive dentry with the expectation that it will be removed. This is
used by nfsd, cachefiles, and overlayfs. They are changed to also use
end_removing() to terminate the action begun by start_removing(). This
is a simple alias for end_dirop().
Apart from changes to the error paths, as we no longer need to unlock on
a lookup error, an effect on callers is that they don't need to test if
the found dentry is positive or negative - they can be sure it is
positive.
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
---
fs/cachefiles/namei.c | 32 ++++++++++++++------------------
fs/namei.c | 27 +++++++++++++++++++++++++++
fs/nfsd/nfs4recover.c | 18 +++++-------------
fs/nfsd/vfs.c | 26 ++++++++++----------------
fs/overlayfs/dir.c | 15 +++++++--------
fs/overlayfs/overlayfs.h | 8 ++++++++
include/linux/namei.h | 18 ++++++++++++++++++
7 files changed, 89 insertions(+), 55 deletions(-)
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 0a136eb434da..c7f0c6ab9b88 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -260,6 +260,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
* - File backed objects are unlinked
* - Directory backed objects are stuffed into the graveyard for userspace to
* delete
+ * On entry dir must be locked. It will be unlocked on exit.
*/
int cachefiles_bury_object(struct cachefiles_cache *cache,
struct cachefiles_object *object,
@@ -274,28 +275,30 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
_enter(",'%pd','%pd'", dir, rep);
+ /* end_removing() will dput() @rep but we need to keep
+ * a ref, so take one now. This also stops the dentry
+ * being negated when unlinked which we need.
+ */
+ dget(rep);
+
if (rep->d_parent != dir) {
- inode_unlock(d_inode(dir));
+ end_removing(rep);
_leave(" = -ESTALE");
return -ESTALE;
}
/* non-directories can just be unlinked */
if (!d_is_dir(rep)) {
- dget(rep); /* Stop the dentry being negated if it's only pinned
- * by a file struct.
- */
ret = cachefiles_unlink(cache, object, dir, rep, why);
- dput(rep);
+ end_removing(rep);
- inode_unlock(d_inode(dir));
_leave(" = %d", ret);
return ret;
}
/* directories have to be moved to the graveyard */
_debug("move stale object to graveyard");
- inode_unlock(d_inode(dir));
+ end_removing(rep);
try_again:
/* first step is to make up a grave dentry in the graveyard */
@@ -749,26 +752,20 @@ static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache,
struct dentry *victim;
int ret = -ENOENT;
- inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+ victim = start_removing(&nop_mnt_idmap, dir, &QSTR(filename));
- victim = lookup_one(&nop_mnt_idmap, &QSTR(filename), dir);
if (IS_ERR(victim))
goto lookup_error;
- if (d_is_negative(victim))
- goto lookup_put;
if (d_inode(victim)->i_flags & S_KERNEL_FILE)
goto lookup_busy;
return victim;
lookup_busy:
ret = -EBUSY;
-lookup_put:
- inode_unlock(d_inode(dir));
- dput(victim);
+ end_removing(victim);
return ERR_PTR(ret);
lookup_error:
- inode_unlock(d_inode(dir));
ret = PTR_ERR(victim);
if (ret == -ENOENT)
return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */
@@ -816,18 +813,17 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
ret = cachefiles_bury_object(cache, NULL, dir, victim,
FSCACHE_OBJECT_WAS_CULLED);
+ dput(victim);
if (ret < 0)
goto error;
fscache_count_culled();
- dput(victim);
_leave(" = 0");
return 0;
error_unlock:
- inode_unlock(d_inode(dir));
+ end_removing(victim);
error:
- dput(victim);
if (ret == -ENOENT)
return -ESTALE; /* Probably got retired by the netfs */
diff --git a/fs/namei.c b/fs/namei.c
index 8873ad0f05b0..38dda29552f6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3248,6 +3248,33 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
}
EXPORT_SYMBOL(start_creating);
+/**
+ * start_removing - prepare to remove a given name with permission checking
+ * @idmap: idmap of the mount
+ * @parent: directory in which to find the name
+ * @name: the name to be removed
+ *
+ * Locks are taken and a lookup in performed prior to removing
+ * an object from a directory. Permission checking (MAY_EXEC) is performed
+ * against @idmap.
+ *
+ * If the name doesn't exist, an error is returned.
+ *
+ * end_removing() should be called when removal is complete, or aborted.
+ *
+ * Returns: a positive dentry, or an error.
+ */
+struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_one_common(idmap, name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return start_dirop(parent, name, 0);
+}
+EXPORT_SYMBOL(start_removing);
+
#ifdef CONFIG_UNIX98_PTYS
int path_pts(struct path *path)
{
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index c247a7c3291c..3eefaa2202e3 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -324,20 +324,12 @@ nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn)
dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name);
dir = nn->rec_file->f_path.dentry;
- inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
- dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir);
- if (IS_ERR(dentry)) {
- status = PTR_ERR(dentry);
- goto out_unlock;
- }
- status = -ENOENT;
- if (d_really_is_negative(dentry))
- goto out;
+ dentry = start_removing(&nop_mnt_idmap, dir, &QSTR(name));
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry);
-out:
- dput(dentry);
-out_unlock:
- inode_unlock(d_inode(dir));
+ end_removing(dentry);
return status;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 24e501abad0e..6291c371caa7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -2044,7 +2044,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
{
struct dentry *dentry, *rdentry;
struct inode *dirp;
- struct inode *rinode;
+ struct inode *rinode = NULL;
__be32 err;
int host_err;
@@ -2063,24 +2063,21 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
- inode_lock_nested(dirp, I_MUTEX_PARENT);
- rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
+ rdentry = start_removing(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen));
+
host_err = PTR_ERR(rdentry);
if (IS_ERR(rdentry))
- goto out_unlock;
+ goto out_drop_write;
- if (d_really_is_negative(rdentry)) {
- dput(rdentry);
- host_err = -ENOENT;
- goto out_unlock;
- }
- rinode = d_inode(rdentry);
err = fh_fill_pre_attrs(fhp);
if (err != nfs_ok)
goto out_unlock;
+ rinode = d_inode(rdentry);
+ /* Prevent truncation until after locks dropped */
ihold(rinode);
+
if (!type)
type = d_inode(rdentry)->i_mode & S_IFMT;
@@ -2102,10 +2099,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
}
fh_fill_post_attrs(fhp);
- inode_unlock(dirp);
- if (!host_err)
+out_unlock:
+ end_removing(rdentry);
+ if (!err && !host_err)
host_err = commit_metadata(fhp);
- dput(rdentry);
iput(rinode); /* truncate the inode here */
out_drop_write:
@@ -2123,9 +2120,6 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
}
out:
return err != nfs_ok ? err : nfserrno(host_err);
-out_unlock:
- inode_unlock(dirp);
- goto out_drop_write;
}
/*
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index b9160fefbd00..20682afdbd20 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -866,17 +866,17 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
goto out;
}
- inode_lock_nested(dir, I_MUTEX_PARENT);
- upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
- dentry->d_name.len);
+ upper = ovl_start_removing_upper(ofs, upperdir,
+ &QSTR_LEN(dentry->d_name.name,
+ dentry->d_name.len));
err = PTR_ERR(upper);
if (IS_ERR(upper))
- goto out_unlock;
+ goto out_dput;
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
(!opaquedir && !ovl_matches_upper(dentry, upper)))
- goto out_dput_upper;
+ goto out_unlock;
if (is_dir)
err = ovl_do_rmdir(ofs, dir, upper);
@@ -892,10 +892,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
*/
if (!err)
d_drop(dentry);
-out_dput_upper:
- dput(upper);
out_unlock:
- inode_unlock(dir);
+ end_removing(upper);
+out_dput:
dput(opaquedir);
out:
return err;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index beeba96cfcb2..49ad65f829dc 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -423,6 +423,14 @@ static inline struct dentry *ovl_start_creating_upper(struct ovl_fs *ofs,
parent, name);
}
+static inline struct dentry *ovl_start_removing_upper(struct ovl_fs *ofs,
+ struct dentry *parent,
+ struct qstr *name)
+{
+ return start_removing(ovl_upper_mnt_idmap(ofs),
+ parent, name);
+}
+
static inline bool ovl_open_flags_need_copy_up(int flags)
{
if (!flags)
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 37b72f4a64f0..6d1069f93ebf 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -91,6 +91,8 @@ struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap,
struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
struct qstr *name);
+struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
+ struct qstr *name);
/**
* end_creating - finish action started with start_creating
@@ -122,6 +124,22 @@ static inline void end_creating(struct dentry *child, struct dentry *parent)
end_dirop(child);
}
+/**
+ * end_removing - finish action started with start_removing
+ * @child: dentry returned by start_removing()
+ * @parent: dentry given to start_removing()
+ *
+ * Unlock and release the child.
+ *
+ * This is identical to end_dirop(). It can be passed the result of
+ * start_removing() whether that was successful or not, but it not needed
+ * if start_removing() failed.
+ */
+static inline void end_removing(struct dentry *child)
+{
+ end_dirop(child);
+}
+
extern int follow_down_one(struct path *);
extern int follow_down(struct path *path, unsigned int flags);
extern int follow_up(struct path *);
--
2.50.0.107.gf914562f5916.dirty
On Thu, 2025-11-06 at 11:50 +1100, NeilBrown wrote:
> From: NeilBrown <neil@brown.name>
>
> start_removing() is similar to start_creating() but will only return a
> positive dentry with the expectation that it will be removed. This is
> used by nfsd, cachefiles, and overlayfs. They are changed to also use
> end_removing() to terminate the action begun by start_removing(). This
> is a simple alias for end_dirop().
>
> Apart from changes to the error paths, as we no longer need to unlock on
> a lookup error, an effect on callers is that they don't need to test if
> the found dentry is positive or negative - they can be sure it is
> positive.
>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> Signed-off-by: NeilBrown <neil@brown.name>
> ---
> fs/cachefiles/namei.c | 32 ++++++++++++++------------------
> fs/namei.c | 27 +++++++++++++++++++++++++++
> fs/nfsd/nfs4recover.c | 18 +++++-------------
> fs/nfsd/vfs.c | 26 ++++++++++----------------
> fs/overlayfs/dir.c | 15 +++++++--------
> fs/overlayfs/overlayfs.h | 8 ++++++++
> include/linux/namei.h | 18 ++++++++++++++++++
> 7 files changed, 89 insertions(+), 55 deletions(-)
>
> diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> index 0a136eb434da..c7f0c6ab9b88 100644
> --- a/fs/cachefiles/namei.c
> +++ b/fs/cachefiles/namei.c
> @@ -260,6 +260,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
> * - File backed objects are unlinked
> * - Directory backed objects are stuffed into the graveyard for userspace to
> * delete
> + * On entry dir must be locked. It will be unlocked on exit.
> */
> int cachefiles_bury_object(struct cachefiles_cache *cache,
> struct cachefiles_object *object,
> @@ -274,28 +275,30 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
>
> _enter(",'%pd','%pd'", dir, rep);
>
> + /* end_removing() will dput() @rep but we need to keep
> + * a ref, so take one now. This also stops the dentry
> + * being negated when unlinked which we need.
> + */
> + dget(rep);
> +
> if (rep->d_parent != dir) {
> - inode_unlock(d_inode(dir));
> + end_removing(rep);
> _leave(" = -ESTALE");
> return -ESTALE;
> }
>
> /* non-directories can just be unlinked */
> if (!d_is_dir(rep)) {
> - dget(rep); /* Stop the dentry being negated if it's only pinned
> - * by a file struct.
> - */
> ret = cachefiles_unlink(cache, object, dir, rep, why);
> - dput(rep);
> + end_removing(rep);
>
> - inode_unlock(d_inode(dir));
> _leave(" = %d", ret);
> return ret;
> }
>
> /* directories have to be moved to the graveyard */
> _debug("move stale object to graveyard");
> - inode_unlock(d_inode(dir));
> + end_removing(rep);
>
> try_again:
> /* first step is to make up a grave dentry in the graveyard */
> @@ -749,26 +752,20 @@ static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache,
> struct dentry *victim;
> int ret = -ENOENT;
>
> - inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
> + victim = start_removing(&nop_mnt_idmap, dir, &QSTR(filename));
>
> - victim = lookup_one(&nop_mnt_idmap, &QSTR(filename), dir);
> if (IS_ERR(victim))
> goto lookup_error;
> - if (d_is_negative(victim))
> - goto lookup_put;
> if (d_inode(victim)->i_flags & S_KERNEL_FILE)
> goto lookup_busy;
> return victim;
>
> lookup_busy:
> ret = -EBUSY;
> -lookup_put:
> - inode_unlock(d_inode(dir));
> - dput(victim);
> + end_removing(victim);
> return ERR_PTR(ret);
>
> lookup_error:
> - inode_unlock(d_inode(dir));
> ret = PTR_ERR(victim);
> if (ret == -ENOENT)
> return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */
> @@ -816,18 +813,17 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
>
> ret = cachefiles_bury_object(cache, NULL, dir, victim,
> FSCACHE_OBJECT_WAS_CULLED);
> + dput(victim);
> if (ret < 0)
> goto error;
>
> fscache_count_culled();
> - dput(victim);
> _leave(" = 0");
> return 0;
>
> error_unlock:
> - inode_unlock(d_inode(dir));
> + end_removing(victim);
> error:
> - dput(victim);
> if (ret == -ENOENT)
> return -ESTALE; /* Probably got retired by the netfs */
>
> diff --git a/fs/namei.c b/fs/namei.c
> index 8873ad0f05b0..38dda29552f6 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3248,6 +3248,33 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
> }
> EXPORT_SYMBOL(start_creating);
>
> +/**
> + * start_removing - prepare to remove a given name with permission checking
> + * @idmap: idmap of the mount
> + * @parent: directory in which to find the name
> + * @name: the name to be removed
> + *
> + * Locks are taken and a lookup in performed prior to removing
> + * an object from a directory. Permission checking (MAY_EXEC) is performed
> + * against @idmap.
> + *
> + * If the name doesn't exist, an error is returned.
> + *
> + * end_removing() should be called when removal is complete, or aborted.
> + *
> + * Returns: a positive dentry, or an error.
> + */
> +struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
> + struct qstr *name)
> +{
> + int err = lookup_one_common(idmap, name, parent);
> +
> + if (err)
> + return ERR_PTR(err);
> + return start_dirop(parent, name, 0);
> +}
> +EXPORT_SYMBOL(start_removing);
> +
> #ifdef CONFIG_UNIX98_PTYS
> int path_pts(struct path *path)
> {
> diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
> index c247a7c3291c..3eefaa2202e3 100644
> --- a/fs/nfsd/nfs4recover.c
> +++ b/fs/nfsd/nfs4recover.c
> @@ -324,20 +324,12 @@ nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn)
> dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name);
>
> dir = nn->rec_file->f_path.dentry;
> - inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
> - dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir);
> - if (IS_ERR(dentry)) {
> - status = PTR_ERR(dentry);
> - goto out_unlock;
> - }
> - status = -ENOENT;
> - if (d_really_is_negative(dentry))
> - goto out;
> + dentry = start_removing(&nop_mnt_idmap, dir, &QSTR(name));
> + if (IS_ERR(dentry))
> + return PTR_ERR(dentry);
> +
> status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry);
> -out:
> - dput(dentry);
> -out_unlock:
> - inode_unlock(d_inode(dir));
> + end_removing(dentry);
> return status;
> }
>
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 24e501abad0e..6291c371caa7 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -2044,7 +2044,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
> {
> struct dentry *dentry, *rdentry;
> struct inode *dirp;
> - struct inode *rinode;
> + struct inode *rinode = NULL;
> __be32 err;
> int host_err;
>
> @@ -2063,24 +2063,21 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
>
> dentry = fhp->fh_dentry;
> dirp = d_inode(dentry);
> - inode_lock_nested(dirp, I_MUTEX_PARENT);
>
> - rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
> + rdentry = start_removing(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen));
> +
> host_err = PTR_ERR(rdentry);
> if (IS_ERR(rdentry))
> - goto out_unlock;
> + goto out_drop_write;
>
> - if (d_really_is_negative(rdentry)) {
> - dput(rdentry);
> - host_err = -ENOENT;
> - goto out_unlock;
> - }
> - rinode = d_inode(rdentry);
> err = fh_fill_pre_attrs(fhp);
> if (err != nfs_ok)
> goto out_unlock;
>
> + rinode = d_inode(rdentry);
> + /* Prevent truncation until after locks dropped */
> ihold(rinode);
> +
> if (!type)
> type = d_inode(rdentry)->i_mode & S_IFMT;
>
> @@ -2102,10 +2099,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
> }
> fh_fill_post_attrs(fhp);
>
> - inode_unlock(dirp);
> - if (!host_err)
> +out_unlock:
> + end_removing(rdentry);
> + if (!err && !host_err)
> host_err = commit_metadata(fhp);
> - dput(rdentry);
> iput(rinode); /* truncate the inode here */
>
> out_drop_write:
> @@ -2123,9 +2120,6 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
> }
> out:
> return err != nfs_ok ? err : nfserrno(host_err);
> -out_unlock:
> - inode_unlock(dirp);
> - goto out_drop_write;
> }
>
> /*
> diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> index b9160fefbd00..20682afdbd20 100644
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -866,17 +866,17 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
> goto out;
> }
>
> - inode_lock_nested(dir, I_MUTEX_PARENT);
> - upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
> - dentry->d_name.len);
> + upper = ovl_start_removing_upper(ofs, upperdir,
> + &QSTR_LEN(dentry->d_name.name,
> + dentry->d_name.len));
> err = PTR_ERR(upper);
> if (IS_ERR(upper))
> - goto out_unlock;
> + goto out_dput;
>
> err = -ESTALE;
> if ((opaquedir && upper != opaquedir) ||
> (!opaquedir && !ovl_matches_upper(dentry, upper)))
> - goto out_dput_upper;
> + goto out_unlock;
>
> if (is_dir)
> err = ovl_do_rmdir(ofs, dir, upper);
> @@ -892,10 +892,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
> */
> if (!err)
> d_drop(dentry);
> -out_dput_upper:
> - dput(upper);
> out_unlock:
> - inode_unlock(dir);
> + end_removing(upper);
> +out_dput:
> dput(opaquedir);
> out:
> return err;
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index beeba96cfcb2..49ad65f829dc 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -423,6 +423,14 @@ static inline struct dentry *ovl_start_creating_upper(struct ovl_fs *ofs,
> parent, name);
> }
>
> +static inline struct dentry *ovl_start_removing_upper(struct ovl_fs *ofs,
> + struct dentry *parent,
> + struct qstr *name)
> +{
> + return start_removing(ovl_upper_mnt_idmap(ofs),
> + parent, name);
> +}
> +
> static inline bool ovl_open_flags_need_copy_up(int flags)
> {
> if (!flags)
> diff --git a/include/linux/namei.h b/include/linux/namei.h
> index 37b72f4a64f0..6d1069f93ebf 100644
> --- a/include/linux/namei.h
> +++ b/include/linux/namei.h
> @@ -91,6 +91,8 @@ struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap,
>
> struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
> struct qstr *name);
> +struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
> + struct qstr *name);
>
> /**
> * end_creating - finish action started with start_creating
> @@ -122,6 +124,22 @@ static inline void end_creating(struct dentry *child, struct dentry *parent)
> end_dirop(child);
> }
>
> +/**
> + * end_removing - finish action started with start_removing
> + * @child: dentry returned by start_removing()
> + * @parent: dentry given to start_removing()
> + *
> + * Unlock and release the child.
> + *
> + * This is identical to end_dirop(). It can be passed the result of
> + * start_removing() whether that was successful or not, but it not needed
> + * if start_removing() failed.
> + */
> +static inline void end_removing(struct dentry *child)
> +{
> + end_dirop(child);
> +}
> +
> extern int follow_down_one(struct path *);
> extern int follow_down(struct path *path, unsigned int flags);
> extern int follow_up(struct path *);
This looks fine to me (particularly the knfsd parts), but doesn't ksmbd
need to be similarly converted?
Reviewed-by: Jeff Layton <jlayton@kernel.org>
On Thu, 13 Nov 2025, Jeff Layton wrote:
> On Thu, 2025-11-06 at 11:50 +1100, NeilBrown wrote:
> > From: NeilBrown <neil@brown.name>
> >
> > start_removing() is similar to start_creating() but will only return a
> > positive dentry with the expectation that it will be removed. This is
> > used by nfsd, cachefiles, and overlayfs. They are changed to also use
> > end_removing() to terminate the action begun by start_removing(). This
> > is a simple alias for end_dirop().
> >
> > Apart from changes to the error paths, as we no longer need to unlock on
> > a lookup error, an effect on callers is that they don't need to test if
> > the found dentry is positive or negative - they can be sure it is
> > positive.
> >
> > Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> > Signed-off-by: NeilBrown <neil@brown.name>
> > ---
> > fs/cachefiles/namei.c | 32 ++++++++++++++------------------
> > fs/namei.c | 27 +++++++++++++++++++++++++++
> > fs/nfsd/nfs4recover.c | 18 +++++-------------
> > fs/nfsd/vfs.c | 26 ++++++++++----------------
> > fs/overlayfs/dir.c | 15 +++++++--------
> > fs/overlayfs/overlayfs.h | 8 ++++++++
> > include/linux/namei.h | 18 ++++++++++++++++++
> > 7 files changed, 89 insertions(+), 55 deletions(-)
> >
> > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > index 0a136eb434da..c7f0c6ab9b88 100644
> > --- a/fs/cachefiles/namei.c
> > +++ b/fs/cachefiles/namei.c
> > @@ -260,6 +260,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
> > * - File backed objects are unlinked
> > * - Directory backed objects are stuffed into the graveyard for userspace to
> > * delete
> > + * On entry dir must be locked. It will be unlocked on exit.
> > */
> > int cachefiles_bury_object(struct cachefiles_cache *cache,
> > struct cachefiles_object *object,
> > @@ -274,28 +275,30 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
> >
> > _enter(",'%pd','%pd'", dir, rep);
> >
> > + /* end_removing() will dput() @rep but we need to keep
> > + * a ref, so take one now. This also stops the dentry
> > + * being negated when unlinked which we need.
> > + */
> > + dget(rep);
> > +
> > if (rep->d_parent != dir) {
> > - inode_unlock(d_inode(dir));
> > + end_removing(rep);
> > _leave(" = -ESTALE");
> > return -ESTALE;
> > }
> >
> > /* non-directories can just be unlinked */
> > if (!d_is_dir(rep)) {
> > - dget(rep); /* Stop the dentry being negated if it's only pinned
> > - * by a file struct.
> > - */
> > ret = cachefiles_unlink(cache, object, dir, rep, why);
> > - dput(rep);
> > + end_removing(rep);
> >
> > - inode_unlock(d_inode(dir));
> > _leave(" = %d", ret);
> > return ret;
> > }
> >
> > /* directories have to be moved to the graveyard */
> > _debug("move stale object to graveyard");
> > - inode_unlock(d_inode(dir));
> > + end_removing(rep);
> >
> > try_again:
> > /* first step is to make up a grave dentry in the graveyard */
> > @@ -749,26 +752,20 @@ static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache,
> > struct dentry *victim;
> > int ret = -ENOENT;
> >
> > - inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
> > + victim = start_removing(&nop_mnt_idmap, dir, &QSTR(filename));
> >
> > - victim = lookup_one(&nop_mnt_idmap, &QSTR(filename), dir);
> > if (IS_ERR(victim))
> > goto lookup_error;
> > - if (d_is_negative(victim))
> > - goto lookup_put;
> > if (d_inode(victim)->i_flags & S_KERNEL_FILE)
> > goto lookup_busy;
> > return victim;
> >
> > lookup_busy:
> > ret = -EBUSY;
> > -lookup_put:
> > - inode_unlock(d_inode(dir));
> > - dput(victim);
> > + end_removing(victim);
> > return ERR_PTR(ret);
> >
> > lookup_error:
> > - inode_unlock(d_inode(dir));
> > ret = PTR_ERR(victim);
> > if (ret == -ENOENT)
> > return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */
> > @@ -816,18 +813,17 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
> >
> > ret = cachefiles_bury_object(cache, NULL, dir, victim,
> > FSCACHE_OBJECT_WAS_CULLED);
> > + dput(victim);
> > if (ret < 0)
> > goto error;
> >
> > fscache_count_culled();
> > - dput(victim);
> > _leave(" = 0");
> > return 0;
> >
> > error_unlock:
> > - inode_unlock(d_inode(dir));
> > + end_removing(victim);
> > error:
> > - dput(victim);
> > if (ret == -ENOENT)
> > return -ESTALE; /* Probably got retired by the netfs */
> >
> > diff --git a/fs/namei.c b/fs/namei.c
> > index 8873ad0f05b0..38dda29552f6 100644
> > --- a/fs/namei.c
> > +++ b/fs/namei.c
> > @@ -3248,6 +3248,33 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
> > }
> > EXPORT_SYMBOL(start_creating);
> >
> > +/**
> > + * start_removing - prepare to remove a given name with permission checking
> > + * @idmap: idmap of the mount
> > + * @parent: directory in which to find the name
> > + * @name: the name to be removed
> > + *
> > + * Locks are taken and a lookup in performed prior to removing
> > + * an object from a directory. Permission checking (MAY_EXEC) is performed
> > + * against @idmap.
> > + *
> > + * If the name doesn't exist, an error is returned.
> > + *
> > + * end_removing() should be called when removal is complete, or aborted.
> > + *
> > + * Returns: a positive dentry, or an error.
> > + */
> > +struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
> > + struct qstr *name)
> > +{
> > + int err = lookup_one_common(idmap, name, parent);
> > +
> > + if (err)
> > + return ERR_PTR(err);
> > + return start_dirop(parent, name, 0);
> > +}
> > +EXPORT_SYMBOL(start_removing);
> > +
> > #ifdef CONFIG_UNIX98_PTYS
> > int path_pts(struct path *path)
> > {
> > diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
> > index c247a7c3291c..3eefaa2202e3 100644
> > --- a/fs/nfsd/nfs4recover.c
> > +++ b/fs/nfsd/nfs4recover.c
> > @@ -324,20 +324,12 @@ nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn)
> > dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name);
> >
> > dir = nn->rec_file->f_path.dentry;
> > - inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
> > - dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir);
> > - if (IS_ERR(dentry)) {
> > - status = PTR_ERR(dentry);
> > - goto out_unlock;
> > - }
> > - status = -ENOENT;
> > - if (d_really_is_negative(dentry))
> > - goto out;
> > + dentry = start_removing(&nop_mnt_idmap, dir, &QSTR(name));
> > + if (IS_ERR(dentry))
> > + return PTR_ERR(dentry);
> > +
> > status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry);
> > -out:
> > - dput(dentry);
> > -out_unlock:
> > - inode_unlock(d_inode(dir));
> > + end_removing(dentry);
> > return status;
> > }
> >
> > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> > index 24e501abad0e..6291c371caa7 100644
> > --- a/fs/nfsd/vfs.c
> > +++ b/fs/nfsd/vfs.c
> > @@ -2044,7 +2044,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
> > {
> > struct dentry *dentry, *rdentry;
> > struct inode *dirp;
> > - struct inode *rinode;
> > + struct inode *rinode = NULL;
> > __be32 err;
> > int host_err;
> >
> > @@ -2063,24 +2063,21 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
> >
> > dentry = fhp->fh_dentry;
> > dirp = d_inode(dentry);
> > - inode_lock_nested(dirp, I_MUTEX_PARENT);
> >
> > - rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
> > + rdentry = start_removing(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen));
> > +
> > host_err = PTR_ERR(rdentry);
> > if (IS_ERR(rdentry))
> > - goto out_unlock;
> > + goto out_drop_write;
> >
> > - if (d_really_is_negative(rdentry)) {
> > - dput(rdentry);
> > - host_err = -ENOENT;
> > - goto out_unlock;
> > - }
> > - rinode = d_inode(rdentry);
> > err = fh_fill_pre_attrs(fhp);
> > if (err != nfs_ok)
> > goto out_unlock;
> >
> > + rinode = d_inode(rdentry);
> > + /* Prevent truncation until after locks dropped */
> > ihold(rinode);
> > +
> > if (!type)
> > type = d_inode(rdentry)->i_mode & S_IFMT;
> >
> > @@ -2102,10 +2099,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
> > }
> > fh_fill_post_attrs(fhp);
> >
> > - inode_unlock(dirp);
> > - if (!host_err)
> > +out_unlock:
> > + end_removing(rdentry);
> > + if (!err && !host_err)
> > host_err = commit_metadata(fhp);
> > - dput(rdentry);
> > iput(rinode); /* truncate the inode here */
> >
> > out_drop_write:
> > @@ -2123,9 +2120,6 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
> > }
> > out:
> > return err != nfs_ok ? err : nfserrno(host_err);
> > -out_unlock:
> > - inode_unlock(dirp);
> > - goto out_drop_write;
> > }
> >
> > /*
> > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> > index b9160fefbd00..20682afdbd20 100644
> > --- a/fs/overlayfs/dir.c
> > +++ b/fs/overlayfs/dir.c
> > @@ -866,17 +866,17 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
> > goto out;
> > }
> >
> > - inode_lock_nested(dir, I_MUTEX_PARENT);
> > - upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
> > - dentry->d_name.len);
> > + upper = ovl_start_removing_upper(ofs, upperdir,
> > + &QSTR_LEN(dentry->d_name.name,
> > + dentry->d_name.len));
> > err = PTR_ERR(upper);
> > if (IS_ERR(upper))
> > - goto out_unlock;
> > + goto out_dput;
> >
> > err = -ESTALE;
> > if ((opaquedir && upper != opaquedir) ||
> > (!opaquedir && !ovl_matches_upper(dentry, upper)))
> > - goto out_dput_upper;
> > + goto out_unlock;
> >
> > if (is_dir)
> > err = ovl_do_rmdir(ofs, dir, upper);
> > @@ -892,10 +892,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
> > */
> > if (!err)
> > d_drop(dentry);
> > -out_dput_upper:
> > - dput(upper);
> > out_unlock:
> > - inode_unlock(dir);
> > + end_removing(upper);
> > +out_dput:
> > dput(opaquedir);
> > out:
> > return err;
> > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> > index beeba96cfcb2..49ad65f829dc 100644
> > --- a/fs/overlayfs/overlayfs.h
> > +++ b/fs/overlayfs/overlayfs.h
> > @@ -423,6 +423,14 @@ static inline struct dentry *ovl_start_creating_upper(struct ovl_fs *ofs,
> > parent, name);
> > }
> >
> > +static inline struct dentry *ovl_start_removing_upper(struct ovl_fs *ofs,
> > + struct dentry *parent,
> > + struct qstr *name)
> > +{
> > + return start_removing(ovl_upper_mnt_idmap(ofs),
> > + parent, name);
> > +}
> > +
> > static inline bool ovl_open_flags_need_copy_up(int flags)
> > {
> > if (!flags)
> > diff --git a/include/linux/namei.h b/include/linux/namei.h
> > index 37b72f4a64f0..6d1069f93ebf 100644
> > --- a/include/linux/namei.h
> > +++ b/include/linux/namei.h
> > @@ -91,6 +91,8 @@ struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap,
> >
> > struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
> > struct qstr *name);
> > +struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
> > + struct qstr *name);
> >
> > /**
> > * end_creating - finish action started with start_creating
> > @@ -122,6 +124,22 @@ static inline void end_creating(struct dentry *child, struct dentry *parent)
> > end_dirop(child);
> > }
> >
> > +/**
> > + * end_removing - finish action started with start_removing
> > + * @child: dentry returned by start_removing()
> > + * @parent: dentry given to start_removing()
> > + *
> > + * Unlock and release the child.
> > + *
> > + * This is identical to end_dirop(). It can be passed the result of
> > + * start_removing() whether that was successful or not, but it not needed
> > + * if start_removing() failed.
> > + */
> > +static inline void end_removing(struct dentry *child)
> > +{
> > + end_dirop(child);
> > +}
> > +
> > extern int follow_down_one(struct path *);
> > extern int follow_down(struct path *path, unsigned int flags);
> > extern int follow_up(struct path *);
>
> This looks fine to me (particularly the knfsd parts), but doesn't ksmbd
> need to be similarly converted?
ksmbd removes names in two places.
ksmbd_vfs_unlink() is changed to use start_removing_dentry() in the next
patch as you noticed when reviewing it.
ksmbd_vfs_remove_file() is called after ksmbd_vfs_kern_path_locked()
which is only used there. I should possible renamed to
ksmbd_vfs_kern_path_start_removing() and then the "do_lock" flag to
__ksmbd_vfs_kern_path() and ksmbd_vfs_path_lookup() should become
"for_remove" and then the lock/lookup in the "if (do_lock)" branch
(which will become "if (for_remove)") should use start_removing.
I'll insert a patch to do this.
>
> Reviewed-by: Jeff Layton <jlayton@kernel.org>
Thanks,
NeilBrown
© 2016 - 2025 Red Hat, Inc.