[PATCH v5 08/14] VFS: add start_creating_killable() and start_removing_killable()

NeilBrown posted 14 patches 1 month, 1 week ago
There is a newer version of this series
[PATCH v5 08/14] VFS: add start_creating_killable() and start_removing_killable()
Posted by NeilBrown 1 month, 1 week ago
From: NeilBrown <neil@brown.name>

These are similar to start_creating() and start_removing(), but allow a
fatal signal to abort waiting for the lock.

They are used in btrfs for subvol creation and removal.

btrfs_may_create() no longer needs IS_DEADDIR() and
start_creating_killable() includes that check.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
---
 fs/btrfs/ioctl.c      | 41 +++++++---------------
 fs/namei.c            | 80 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/namei.h |  6 ++++
 3 files changed, 95 insertions(+), 32 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8cb7d5a462ef..d0c3bb0423bb 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -904,14 +904,9 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
 	struct fscrypt_str name_str = FSTR_INIT((char *)qname->name, qname->len);
 	int ret;
 
-	ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
-	if (ret == -EINTR)
-		return ret;
-
-	dentry = lookup_one(idmap, qname, parent);
-	ret = PTR_ERR(dentry);
+	dentry = start_creating_killable(idmap, parent, qname);
 	if (IS_ERR(dentry))
-		goto out_unlock;
+		return PTR_ERR(dentry);
 
 	ret = btrfs_may_create(idmap, dir, dentry);
 	if (ret)
@@ -940,9 +935,7 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
 out_up_read:
 	up_read(&fs_info->subvol_sem);
 out_dput:
-	dput(dentry);
-out_unlock:
-	btrfs_inode_unlock(BTRFS_I(dir), 0);
+	end_creating(dentry, parent);
 	return ret;
 }
 
@@ -2417,18 +2410,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 		goto free_subvol_name;
 	}
 
-	ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
-	if (ret == -EINTR)
-		goto free_subvol_name;
-	dentry = lookup_one(idmap, &QSTR(subvol_name), parent);
+	dentry = start_removing_killable(idmap, parent, &QSTR(subvol_name));
 	if (IS_ERR(dentry)) {
 		ret = PTR_ERR(dentry);
-		goto out_unlock_dir;
-	}
-
-	if (d_really_is_negative(dentry)) {
-		ret = -ENOENT;
-		goto out_dput;
+		goto out_end_removing;
 	}
 
 	inode = d_inode(dentry);
@@ -2449,7 +2434,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 		 */
 		ret = -EPERM;
 		if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
-			goto out_dput;
+			goto out_end_removing;
 
 		/*
 		 * Do not allow deletion if the parent dir is the same
@@ -2460,21 +2445,21 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 		 */
 		ret = -EINVAL;
 		if (root == dest)
-			goto out_dput;
+			goto out_end_removing;
 
 		ret = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC);
 		if (ret)
-			goto out_dput;
+			goto out_end_removing;
 	}
 
 	/* check if subvolume may be deleted by a user */
 	ret = btrfs_may_delete(idmap, dir, dentry, 1);
 	if (ret)
-		goto out_dput;
+		goto out_end_removing;
 
 	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
 		ret = -EINVAL;
-		goto out_dput;
+		goto out_end_removing;
 	}
 
 	btrfs_inode_lock(BTRFS_I(inode), 0);
@@ -2483,10 +2468,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 	if (!ret)
 		d_delete_notify(dir, dentry);
 
-out_dput:
-	dput(dentry);
-out_unlock_dir:
-	btrfs_inode_unlock(BTRFS_I(dir), 0);
+out_end_removing:
+	end_removing(dentry);
 free_subvol_name:
 	kfree(subvol_name_ptr);
 free_parent:
diff --git a/fs/namei.c b/fs/namei.c
index 729b42fb143b..e70d056b9543 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2778,19 +2778,33 @@ static int filename_parentat(int dfd, struct filename *name,
  * Returns: a locked dentry, or an error.
  *
  */
-struct dentry *start_dirop(struct dentry *parent, struct qstr *name,
-			   unsigned int lookup_flags)
+static struct dentry *__start_dirop(struct dentry *parent, struct qstr *name,
+				    unsigned int lookup_flags,
+				    unsigned int state)
 {
 	struct dentry *dentry;
 	struct inode *dir = d_inode(parent);
 
-	inode_lock_nested(dir, I_MUTEX_PARENT);
+	if (state == TASK_KILLABLE) {
+		int ret = down_write_killable_nested(&dir->i_rwsem,
+						     I_MUTEX_PARENT);
+		if (ret)
+			return ERR_PTR(ret);
+	} else {
+		inode_lock_nested(dir, I_MUTEX_PARENT);
+	}
 	dentry = lookup_one_qstr_excl(name, parent, lookup_flags);
 	if (IS_ERR(dentry))
 		inode_unlock(dir);
 	return dentry;
 }
 
+struct dentry *start_dirop(struct dentry *parent, struct qstr *name,
+			   unsigned int lookup_flags)
+{
+	return __start_dirop(parent, name, lookup_flags, TASK_NORMAL);
+}
+
 /**
  * end_dirop - signal completion of a dirop
  * @de: the dentry which was returned by start_dirop or similar.
@@ -3275,6 +3289,66 @@ struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
 }
 EXPORT_SYMBOL(start_removing);
 
+/**
+ * start_creating_killable - prepare to create a given name with permission checking
+ * @idmap:  idmap of the mount
+ * @parent: directory in which to prepare to create the name
+ * @name:   the name to be created
+ *
+ * Locks are taken and a lookup in performed prior to creating
+ * an object in a directory.  Permission checking (MAY_EXEC) is performed
+ * against @idmap.
+ *
+ * If the name already exists, a positive dentry is returned.
+ *
+ * If a signal is received or was already pending, the function aborts
+ * with -EINTR;
+ *
+ * Returns: a negative or positive dentry, or an error.
+ */
+struct dentry *start_creating_killable(struct mnt_idmap *idmap,
+				       struct dentry *parent,
+				       struct qstr *name)
+{
+	int err = lookup_one_common(idmap, name, parent);
+
+	if (err)
+		return ERR_PTR(err);
+	return __start_dirop(parent, name, LOOKUP_CREATE, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(start_creating_killable);
+
+/**
+ * start_removing_killable - prepare to remove a given name with permission checking
+ * @idmap:  idmap of the mount
+ * @parent: directory in which to find the name
+ * @name:   the name to be removed
+ *
+ * Locks are taken and a lookup in performed prior to removing
+ * an object from a directory.  Permission checking (MAY_EXEC) is performed
+ * against @idmap.
+ *
+ * If the name doesn't exist, an error is returned.
+ *
+ * end_removing() should be called when removal is complete, or aborted.
+ *
+ * If a signal is received or was already pending, the function aborts
+ * with -EINTR;
+ *
+ * Returns: a positive dentry, or an error.
+ */
+struct dentry *start_removing_killable(struct mnt_idmap *idmap,
+				       struct dentry *parent,
+				       struct qstr *name)
+{
+	int err = lookup_one_common(idmap, name, parent);
+
+	if (err)
+		return ERR_PTR(err);
+	return __start_dirop(parent, name, 0, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(start_removing_killable);
+
 /**
  * start_creating_noperm - prepare to create a given name without permission checking
  * @parent: directory in which to prepare to create the name
diff --git a/include/linux/namei.h b/include/linux/namei.h
index d089e4e8fdd0..196c66156a8a 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -93,6 +93,12 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
 			      struct qstr *name);
 struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
 			      struct qstr *name);
+struct dentry *start_creating_killable(struct mnt_idmap *idmap,
+				       struct dentry *parent,
+				       struct qstr *name);
+struct dentry *start_removing_killable(struct mnt_idmap *idmap,
+				       struct dentry *parent,
+				       struct qstr *name);
 struct dentry *start_creating_noperm(struct dentry *parent, struct qstr *name);
 struct dentry *start_removing_noperm(struct dentry *parent, struct qstr *name);
 struct dentry *start_removing_dentry(struct dentry *parent,
-- 
2.50.0.107.gf914562f5916.dirty
Re: [PATCH v5 08/14] VFS: add start_creating_killable() and start_removing_killable()
Posted by Jeff Layton 1 month ago
On Thu, 2025-11-06 at 11:50 +1100, NeilBrown wrote:
> From: NeilBrown <neil@brown.name>
> 
> These are similar to start_creating() and start_removing(), but allow a
> fatal signal to abort waiting for the lock.
> 
> They are used in btrfs for subvol creation and removal.
> 
> btrfs_may_create() no longer needs IS_DEADDIR() and
> start_creating_killable() includes that check.
> 
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> Signed-off-by: NeilBrown <neil@brown.name>
> ---
>  fs/btrfs/ioctl.c      | 41 +++++++---------------
>  fs/namei.c            | 80 +++++++++++++++++++++++++++++++++++++++++--
>  include/linux/namei.h |  6 ++++
>  3 files changed, 95 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 8cb7d5a462ef..d0c3bb0423bb 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -904,14 +904,9 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
>  	struct fscrypt_str name_str = FSTR_INIT((char *)qname->name, qname->len);
>  	int ret;
>  
> -	ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
> -	if (ret == -EINTR)
> -		return ret;
> -
> -	dentry = lookup_one(idmap, qname, parent);
> -	ret = PTR_ERR(dentry);
> +	dentry = start_creating_killable(idmap, parent, qname);
>  	if (IS_ERR(dentry))
> -		goto out_unlock;
> +		return PTR_ERR(dentry);
>  
>  	ret = btrfs_may_create(idmap, dir, dentry);
>  	if (ret)
> @@ -940,9 +935,7 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
>  out_up_read:
>  	up_read(&fs_info->subvol_sem);
>  out_dput:
> -	dput(dentry);
> -out_unlock:
> -	btrfs_inode_unlock(BTRFS_I(dir), 0);
> +	end_creating(dentry, parent);
>  	return ret;
>  }
>  
> @@ -2417,18 +2410,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
>  		goto free_subvol_name;
>  	}
>  
> -	ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
> -	if (ret == -EINTR)
> -		goto free_subvol_name;
> -	dentry = lookup_one(idmap, &QSTR(subvol_name), parent);
> +	dentry = start_removing_killable(idmap, parent, &QSTR(subvol_name));
>  	if (IS_ERR(dentry)) {
>  		ret = PTR_ERR(dentry);
> -		goto out_unlock_dir;
> -	}
> -
> -	if (d_really_is_negative(dentry)) {
> -		ret = -ENOENT;
> -		goto out_dput;
> +		goto out_end_removing;
>  	}
>  
>  	inode = d_inode(dentry);
> @@ -2449,7 +2434,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
>  		 */
>  		ret = -EPERM;
>  		if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
> -			goto out_dput;
> +			goto out_end_removing;
>  
>  		/*
>  		 * Do not allow deletion if the parent dir is the same
> @@ -2460,21 +2445,21 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
>  		 */
>  		ret = -EINVAL;
>  		if (root == dest)
> -			goto out_dput;
> +			goto out_end_removing;
>  
>  		ret = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC);
>  		if (ret)
> -			goto out_dput;
> +			goto out_end_removing;
>  	}
>  
>  	/* check if subvolume may be deleted by a user */
>  	ret = btrfs_may_delete(idmap, dir, dentry, 1);
>  	if (ret)
> -		goto out_dput;
> +		goto out_end_removing;
>  
>  	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
>  		ret = -EINVAL;
> -		goto out_dput;
> +		goto out_end_removing;
>  	}
>  
>  	btrfs_inode_lock(BTRFS_I(inode), 0);
> @@ -2483,10 +2468,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
>  	if (!ret)
>  		d_delete_notify(dir, dentry);
>  
> -out_dput:
> -	dput(dentry);
> -out_unlock_dir:
> -	btrfs_inode_unlock(BTRFS_I(dir), 0);
> +out_end_removing:
> +	end_removing(dentry);
>  free_subvol_name:
>  	kfree(subvol_name_ptr);
>  free_parent:
> diff --git a/fs/namei.c b/fs/namei.c
> index 729b42fb143b..e70d056b9543 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -2778,19 +2778,33 @@ static int filename_parentat(int dfd, struct filename *name,
>   * Returns: a locked dentry, or an error.
>   *
>   */
> -struct dentry *start_dirop(struct dentry *parent, struct qstr *name,
> -			   unsigned int lookup_flags)
> +static struct dentry *__start_dirop(struct dentry *parent, struct qstr *name,
> +				    unsigned int lookup_flags,
> +				    unsigned int state)
>  {
>  	struct dentry *dentry;
>  	struct inode *dir = d_inode(parent);
>  
> -	inode_lock_nested(dir, I_MUTEX_PARENT);
> +	if (state == TASK_KILLABLE) {
> +		int ret = down_write_killable_nested(&dir->i_rwsem,
> +						     I_MUTEX_PARENT);
> +		if (ret)
> +			return ERR_PTR(ret);
> +	} else {
> +		inode_lock_nested(dir, I_MUTEX_PARENT);
> +	}
>  	dentry = lookup_one_qstr_excl(name, parent, lookup_flags);
>  	if (IS_ERR(dentry))
>  		inode_unlock(dir);
>  	return dentry;
>  }
>  
> +struct dentry *start_dirop(struct dentry *parent, struct qstr *name,
> +			   unsigned int lookup_flags)
> +{
> +	return __start_dirop(parent, name, lookup_flags, TASK_NORMAL);
> +}
> +
>  /**
>   * end_dirop - signal completion of a dirop
>   * @de: the dentry which was returned by start_dirop or similar.
> @@ -3275,6 +3289,66 @@ struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
>  }
>  EXPORT_SYMBOL(start_removing);
>  
> +/**
> + * start_creating_killable - prepare to create a given name with permission checking
> + * @idmap:  idmap of the mount
> + * @parent: directory in which to prepare to create the name
> + * @name:   the name to be created
> + *
> + * Locks are taken and a lookup in performed prior to creating
> + * an object in a directory.  Permission checking (MAY_EXEC) is performed
> + * against @idmap.
> + *
> + * If the name already exists, a positive dentry is returned.
> + *
> + * If a signal is received or was already pending, the function aborts
> + * with -EINTR;
> + *
> + * Returns: a negative or positive dentry, or an error.
> + */
> +struct dentry *start_creating_killable(struct mnt_idmap *idmap,
> +				       struct dentry *parent,
> +				       struct qstr *name)
> +{
> +	int err = lookup_one_common(idmap, name, parent);
> +
> +	if (err)
> +		return ERR_PTR(err);
> +	return __start_dirop(parent, name, LOOKUP_CREATE, TASK_KILLABLE);
> +}
> +EXPORT_SYMBOL(start_creating_killable);
> +
> +/**
> + * start_removing_killable - prepare to remove a given name with permission checking
> + * @idmap:  idmap of the mount
> + * @parent: directory in which to find the name
> + * @name:   the name to be removed
> + *
> + * Locks are taken and a lookup in performed prior to removing
> + * an object from a directory.  Permission checking (MAY_EXEC) is performed
> + * against @idmap.
> + *
> + * If the name doesn't exist, an error is returned.
> + *
> + * end_removing() should be called when removal is complete, or aborted.
> + *
> + * If a signal is received or was already pending, the function aborts
> + * with -EINTR;
> + *
> + * Returns: a positive dentry, or an error.
> + */
> +struct dentry *start_removing_killable(struct mnt_idmap *idmap,
> +				       struct dentry *parent,
> +				       struct qstr *name)
> +{
> +	int err = lookup_one_common(idmap, name, parent);
> +
> +	if (err)
> +		return ERR_PTR(err);
> +	return __start_dirop(parent, name, 0, TASK_KILLABLE);
> +}
> +EXPORT_SYMBOL(start_removing_killable);
> +
>  /**
>   * start_creating_noperm - prepare to create a given name without permission checking
>   * @parent: directory in which to prepare to create the name
> diff --git a/include/linux/namei.h b/include/linux/namei.h
> index d089e4e8fdd0..196c66156a8a 100644
> --- a/include/linux/namei.h
> +++ b/include/linux/namei.h
> @@ -93,6 +93,12 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
>  			      struct qstr *name);
>  struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
>  			      struct qstr *name);
> +struct dentry *start_creating_killable(struct mnt_idmap *idmap,
> +				       struct dentry *parent,
> +				       struct qstr *name);
> +struct dentry *start_removing_killable(struct mnt_idmap *idmap,
> +				       struct dentry *parent,
> +				       struct qstr *name);
>  struct dentry *start_creating_noperm(struct dentry *parent, struct qstr *name);
>  struct dentry *start_removing_noperm(struct dentry *parent, struct qstr *name);
>  struct dentry *start_removing_dentry(struct dentry *parent,

Nice. Maybe we can start using the killable versions in more places
this way!

Reviewed-by: Jeff Layton <jlayton@kernel.org>