From nobody Mon Oct 6 13:42:07 2025 Received: from neil.brown.name (neil.brown.name [103.29.64.221]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B16512877E1; Mon, 21 Jul 2025 08:45:36 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=103.29.64.221 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087539; cv=none; b=rO9Zcnmpv/oN1tt9ZEs2cO+577svM5VxORjG4SAO5H3am0sA2PesIU4kAes7Pa/f5bCWT4wnlHbvAGTIrr3VPBIFTT4iXOSJb0XtPTLQNn8NpuDPwucQ6hL6g9/JRFx2LUJZD21sDz60pBpunastcyd+foV0FeDjSrkls53hPHM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087539; c=relaxed/simple; bh=YItLhh9CIpvXNfKExgtCqj8sAANAfdZnArN08IzlHEo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=t1nTuGZXt89jYqG75lue6ChorTHy8G5PPpUXmm6zl26uhsXhrHQR7j3yq3bDGKmDBcHBxsEMQKEtAPXDeXiSE8ZxV5TVQN8tN+ZlKQN5FN2lrX2KClgTnjbYFeZ5LrLtcBRJKN0iPuQDhdIpQMATlBFJSmm2n2G1sci6lOrXDiw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name; spf=pass smtp.mailfrom=neil.brown.name; arc=none smtp.client-ip=103.29.64.221 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=neil.brown.name Received: from 196.186.233.220.static.exetel.com.au ([220.233.186.196] helo=home.neil.brown.name) by neil.brown.name with esmtp (Exim 4.95) (envelope-from ) id 1udm9F-002pfp-Rn; Mon, 21 Jul 2025 08:45:27 +0000 From: NeilBrown To: Linus Torvalds , Alexander Viro , Christian Brauner , Jan Kara Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 1/7] VFS: unify old_mnt_idmap and new_mnt_idmap in renamedata Date: Mon, 21 Jul 2025 17:59:57 +1000 Message-ID: <20250721084412.370258-2-neil@brown.name> X-Mailer: git-send-email 2.49.0 In-Reply-To: <20250721084412.370258-1-neil@brown.name> References: <20250721084412.370258-1-neil@brown.name> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" A rename can only rename with a single mount. Callers of vfs_rename() must and do ensure this is the case. So there is no point in having two mnt_idmaps in renamedata as they are always the same. Only of of them is passed to ->rename in any case. This patch replaces both with a single "mnt_idmap" and changes all callers. Signed-off-by: NeilBrown Reviewed-by: Jeff Layton --- fs/cachefiles/namei.c | 3 +-- fs/ecryptfs/inode.c | 3 +-- fs/namei.c | 17 ++++++++--------- fs/nfsd/vfs.c | 3 +-- fs/overlayfs/overlayfs.h | 3 +-- fs/smb/server/vfs.c | 3 +-- include/linux/fs.h | 6 ++---- 7 files changed, 15 insertions(+), 23 deletions(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 91dfd0231877..d1edb2ac3837 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -387,10 +387,9 @@ int cachefiles_bury_object(struct cachefiles_cache *ca= che, cachefiles_io_error(cache, "Rename security error %d", ret); } else { struct renamedata rd =3D { - .old_mnt_idmap =3D &nop_mnt_idmap, + .mnt_idmap =3D &nop_mnt_idmap, .old_parent =3D dir, .old_dentry =3D rep, - .new_mnt_idmap =3D &nop_mnt_idmap, .new_parent =3D cache->graveyard, .new_dentry =3D grave, }; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 72fbe1316ab8..abd954c6a14e 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -634,10 +634,9 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode = *old_dir, goto out_lock; } =20 - rd.old_mnt_idmap =3D &nop_mnt_idmap; + rd.mnt_idmap =3D &nop_mnt_idmap; rd.old_parent =3D lower_old_dir_dentry; rd.old_dentry =3D lower_old_dentry; - rd.new_mnt_idmap =3D &nop_mnt_idmap; rd.new_parent =3D lower_new_dir_dentry; rd.new_dentry =3D lower_new_dentry; rc =3D vfs_rename(&rd); diff --git a/fs/namei.c b/fs/namei.c index cd43ff89fbaa..1c80445693d4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -5024,20 +5024,20 @@ int vfs_rename(struct renamedata *rd) if (source =3D=3D target) return 0; =20 - error =3D may_delete(rd->old_mnt_idmap, old_dir, old_dentry, is_dir); + error =3D may_delete(rd->mnt_idmap, old_dir, old_dentry, is_dir); if (error) return error; =20 if (!target) { - error =3D may_create(rd->new_mnt_idmap, new_dir, new_dentry); + error =3D may_create(rd->mnt_idmap, new_dir, new_dentry); } else { new_is_dir =3D d_is_dir(new_dentry); =20 if (!(flags & RENAME_EXCHANGE)) - error =3D may_delete(rd->new_mnt_idmap, new_dir, + error =3D may_delete(rd->mnt_idmap, new_dir, new_dentry, is_dir); else - error =3D may_delete(rd->new_mnt_idmap, new_dir, + error =3D may_delete(rd->mnt_idmap, new_dir, new_dentry, new_is_dir); } if (error) @@ -5052,13 +5052,13 @@ int vfs_rename(struct renamedata *rd) */ if (new_dir !=3D old_dir) { if (is_dir) { - error =3D inode_permission(rd->old_mnt_idmap, source, + error =3D inode_permission(rd->mnt_idmap, source, MAY_WRITE); if (error) return error; } if ((flags & RENAME_EXCHANGE) && new_is_dir) { - error =3D inode_permission(rd->new_mnt_idmap, target, + error =3D inode_permission(rd->mnt_idmap, target, MAY_WRITE); if (error) return error; @@ -5126,7 +5126,7 @@ int vfs_rename(struct renamedata *rd) if (error) goto out; } - error =3D old_dir->i_op->rename(rd->new_mnt_idmap, old_dir, old_dentry, + error =3D old_dir->i_op->rename(rd->mnt_idmap, old_dir, old_dentry, new_dir, new_dentry, flags); if (error) goto out; @@ -5269,10 +5269,9 @@ int do_renameat2(int olddfd, struct filename *from, = int newdfd, =20 rd.old_parent =3D old_path.dentry; rd.old_dentry =3D old_dentry; - rd.old_mnt_idmap =3D mnt_idmap(old_path.mnt); + rd.mnt_idmap =3D mnt_idmap(old_path.mnt); rd.new_parent =3D new_path.dentry; rd.new_dentry =3D new_dentry; - rd.new_mnt_idmap =3D mnt_idmap(new_path.mnt); rd.delegated_inode =3D &delegated_inode; rd.flags =3D flags; error =3D vfs_rename(&rd); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7d522e426b2d..a21940cadede 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1940,10 +1940,9 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *f= fhp, char *fname, int flen, goto out_dput_old; } else { struct renamedata rd =3D { - .old_mnt_idmap =3D &nop_mnt_idmap, + .mnt_idmap =3D &nop_mnt_idmap, .old_parent =3D fdentry, .old_dentry =3D odentry, - .new_mnt_idmap =3D &nop_mnt_idmap, .new_parent =3D tdentry, .new_dentry =3D ndentry, }; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index bb0d7ded8e76..4f84abaa0d68 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -361,10 +361,9 @@ static inline int ovl_do_rename(struct ovl_fs *ofs, st= ruct dentry *olddir, { int err; struct renamedata rd =3D { - .old_mnt_idmap =3D ovl_upper_mnt_idmap(ofs), + .mnt_idmap =3D ovl_upper_mnt_idmap(ofs), .old_parent =3D olddir, .old_dentry =3D olddentry, - .new_mnt_idmap =3D ovl_upper_mnt_idmap(ofs), .new_parent =3D newdir, .new_dentry =3D newdentry, .flags =3D flags, diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 49e731dd0529..bfd62a21e75c 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -770,10 +770,9 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const st= ruct path *old_path, goto out4; } =20 - rd.old_mnt_idmap =3D mnt_idmap(old_path->mnt), + rd.mnt_idmap =3D mnt_idmap(old_path->mnt), rd.old_parent =3D old_parent, rd.old_dentry =3D old_child, - rd.new_mnt_idmap =3D mnt_idmap(new_path.mnt), rd.new_parent =3D new_path.dentry, rd.new_dentry =3D new_dentry, rd.flags =3D flags, diff --git a/include/linux/fs.h b/include/linux/fs.h index 1948b2c828d3..d3e27da8a6aa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2005,20 +2005,18 @@ int vfs_unlink(struct mnt_idmap *, struct inode *, = struct dentry *, =20 /** * struct renamedata - contains all information required for renaming - * @old_mnt_idmap: idmap of the old mount the inode was found from + * @mnt_idmap: idmap of the mount in which the rename is happening. * @old_parent: parent of source * @old_dentry: source - * @new_mnt_idmap: idmap of the new mount the inode was found from * @new_parent: parent of destination * @new_dentry: destination * @delegated_inode: returns an inode needing a delegation break * @flags: rename flags */ struct renamedata { - struct mnt_idmap *old_mnt_idmap; + struct mnt_idmap *mnt_idmap; struct dentry *old_parent; struct dentry *old_dentry; - struct mnt_idmap *new_mnt_idmap; struct dentry *new_parent; struct dentry *new_dentry; struct inode **delegated_inode; --=20 2.49.0 From nobody Mon Oct 6 13:42:07 2025 Received: from neil.brown.name (neil.brown.name [103.29.64.221]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B14EE2877DD; Mon, 21 Jul 2025 08:45:36 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=103.29.64.221 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087540; cv=none; b=FDmFLhc0jYExYUl4kGiURne8mnfbuREI3Gb1b6mYqt+Lb75zAoWc6TsEcVhRXSfibeYzlx+vCrhrKf30aDoeKqLTBouIF2K2Qiq+vnW9S4gv27UluoiH6wjZEMovezbtS9GrLveKE5IANZ5hact5sRK0kcVT1KNQsdUvUAPPhTg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087540; c=relaxed/simple; bh=x12wTCgNX61cbCA8ZuNPRpQgNjL9DwU8174S+N2rU1Q=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=YRk0QTl4Ev5KkHYhvIk6GMiJVzJlXLcpgLKYg5TLGfegfIDX3GkWB8WPLTr1YA5tx0P8QEd1ZUlLN22rTgvr7MO++G0Py5GjKxHyA1/7Nl5coRU3fAQBDYG4jSNZw6cO3k5SNRzpdG4TwXxpyPbl2VJUaaBRWthv4doEjNmOpOY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name; spf=pass smtp.mailfrom=neil.brown.name; arc=none smtp.client-ip=103.29.64.221 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=neil.brown.name Received: from 196.186.233.220.static.exetel.com.au ([220.233.186.196] helo=home.neil.brown.name) by neil.brown.name with esmtp (Exim 4.95) (envelope-from ) id 1udm9G-002pfr-C9; Mon, 21 Jul 2025 08:45:28 +0000 From: NeilBrown To: Linus Torvalds , Alexander Viro , Christian Brauner , Jan Kara Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 2/7] VFS: introduce done_dentry_lookup() Date: Mon, 21 Jul 2025 17:59:58 +1000 Message-ID: <20250721084412.370258-3-neil@brown.name> X-Mailer: git-send-email 2.49.0 In-Reply-To: <20250721084412.370258-1-neil@brown.name> References: <20250721084412.370258-1-neil@brown.name> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" done_dentry_lookup() is the first step in introducing a new API for locked operation on names in directories - those that create or remove names. Rename operations will also be part of this API but will use separate interfaces. The plan is to lock just the dentry (or dentries), not the whole directory. A "dentry_lookup()" operation will perform the locking and lookup with a corresponding "done_dentry_lookup()" releasing the resulting dentry and dropping any locks. This done_dentry_lookup() can immediately be used to complete updates started with kern_path_locked() (much as done_path_create() already completes operations started with kern_path_create()). So this patch adds done_dentry_lookup() and uses it where kern_path_locked() is used. It also adds done_dentry_lookup_return() which returns a reference to the dentry rather than dropping it. This is a less common need in existing code, but still worth its own interface. Signed-off-by: NeilBrown Reviewed-by: Jeff Layton --- drivers/base/devtmpfs.c | 7 ++----- fs/bcachefs/fs-ioctl.c | 3 +-- fs/namei.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/namei.h | 3 +++ kernel/audit_fsnotify.c | 9 ++++----- kernel/audit_watch.c | 3 +-- 6 files changed, 49 insertions(+), 14 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 31bfb3194b4c..47bee8166c8d 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -265,8 +265,7 @@ static int dev_rmdir(const char *name) else err =3D -EPERM; =20 - dput(dentry); - inode_unlock(d_inode(parent.dentry)); + done_dentry_lookup(dentry); path_put(&parent); return err; } @@ -349,9 +348,7 @@ static int handle_remove(const char *nodename, struct d= evice *dev) if (!err || err =3D=3D -ENOENT) deleted =3D 1; } - dput(dentry); - inode_unlock(d_inode(parent.dentry)); - + done_dentry_lookup(dentry); path_put(&parent); if (deleted && strchr(nodename, '/')) delete_path(nodename); diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 4e72e654da96..8077ddf4ddc4 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -351,8 +351,7 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs = *c, struct file *filp, d_invalidate(victim); } err: - inode_unlock(dir); - dput(victim); + done_dentry_lookup(victim); path_put(&path); return ret; } diff --git a/fs/namei.c b/fs/namei.c index 1c80445693d4..da160a01e23d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1714,6 +1714,44 @@ struct dentry *lookup_one_qstr_excl(const struct qst= r *name, } EXPORT_SYMBOL(lookup_one_qstr_excl); =20 +/** + * done_dentry_lookup - finish a lookup used for create/delete + * @dentry: the target dentry + * + * After locking the directory and lookup or validating a dentry + * an attempt can be made to create (including link) or remove (including + * rmdir) a dentry. After this, done_dentry_lookup() can be used to both + * unlock the parent directory and dput() the dentry. + * + * This interface allows a smooth transition from parent-dir based + * locking to dentry based locking. + */ +void done_dentry_lookup(struct dentry *dentry) +{ + inode_unlock(dentry->d_parent->d_inode); + dput(dentry); +} +EXPORT_SYMBOL(done_dentry_lookup); + +/** + * done_dentry_lookup_return - finish a lookup sequence, returning the den= try + * @dentry: the target dentry + * + * After locking the directory and lookup or validating a dentry + * an attempt can be made to create (including link) or remove (including + * rmdir) a dentry. After this, done_dentry_lookup_return() can be used to + * unlock the parent directory. The dentry is returned for further use. + * + * This interface allows a smooth transition from parent-dir based + * locking to dentry based locking. + */ +struct dentry *done_dentry_lookup_return(struct dentry *dentry) +{ + inode_unlock(dentry->d_parent->d_inode); + return dentry; +} +EXPORT_SYMBOL(done_dentry_lookup_return); + /** * lookup_fast - do fast lockless (but racy) lookup of a dentry * @nd: current nameidata diff --git a/include/linux/namei.h b/include/linux/namei.h index 5d085428e471..e097f11587c9 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -81,6 +81,9 @@ struct dentry *lookup_one_positive_unlocked(struct mnt_id= map *idmap, struct qstr *name, struct dentry *base); =20 +void done_dentry_lookup(struct dentry *dentry); +struct dentry *done_dentry_lookup_return(struct dentry *dentry); + extern int follow_down_one(struct path *); extern int follow_down(struct path *path, unsigned int flags); extern int follow_up(struct path *); diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index c565fbf66ac8..170836c3520f 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -85,8 +85,8 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit= _krule *krule, char *pa dentry =3D kern_path_locked(pathname, &path); if (IS_ERR(dentry)) return ERR_CAST(dentry); /* returning an error */ - inode =3D path.dentry->d_inode; - inode_unlock(inode); + inode =3D igrab(dentry->d_inode); + done_dentry_lookup(dentry); =20 audit_mark =3D kzalloc(sizeof(*audit_mark), GFP_KERNEL); if (unlikely(!audit_mark)) { @@ -97,17 +97,16 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct aud= it_krule *krule, char *pa fsnotify_init_mark(&audit_mark->mark, audit_fsnotify_group); audit_mark->mark.mask =3D AUDIT_FS_EVENTS; audit_mark->path =3D pathname; - audit_update_mark(audit_mark, dentry->d_inode); + audit_update_mark(audit_mark, inode); audit_mark->rule =3D krule; =20 - ret =3D fsnotify_add_inode_mark(&audit_mark->mark, inode, 0); + ret =3D fsnotify_add_inode_mark(&audit_mark->mark, path.dentry->d_inode, = 0); if (ret < 0) { audit_mark->path =3D NULL; fsnotify_put_mark(&audit_mark->mark); audit_mark =3D ERR_PTR(ret); } out: - dput(dentry); path_put(&path); return audit_mark; } diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 0ebbbe37a60f..f6ecac2109d4 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -359,8 +359,7 @@ static int audit_get_nd(struct audit_watch *watch, stru= ct path *parent) watch->ino =3D d_backing_inode(d)->i_ino; } =20 - inode_unlock(d_backing_inode(parent->dentry)); - dput(d); + done_dentry_lookup(d); return 0; } =20 --=20 2.49.0 From nobody Mon Oct 6 13:42:07 2025 Received: from neil.brown.name (neil.brown.name [103.29.64.221]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B13CF287500; Mon, 21 Jul 2025 08:45:36 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=103.29.64.221 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087539; cv=none; b=tKQDDWTA7s/fZkBWG+gjrIpmA9wkPBrX6qdPbFv3afQfDNhIr/L0W1QwQW9r0SsrVIQsjy1oOJessNDrWDt2L6AN68j4byRDxhyTkI+LhuodIhgx5UVQQwDSIJg1roh+Z/fRt2sM1OwMrZ2XKMAjnvkNtinIvzzXl7MnDOupchY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087539; c=relaxed/simple; bh=WsiREpDyD5e2M1t4P4tsfE0wG9bWYJ1PAILyyKiMj/k=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=gVp0AKGuncpk5U6B1LvGtrUjaBD7sRtADhjHurKWlrqaW5x0Zsms8/0A1PaD6oi5x32+N3XVV2+buenQvcazMKXyAVbHlWUJREbBnoQtoI66UJixwixsrcFshQ45Gsxn921IiS/kFWrbU1gY9FoXcJwnkrK9wISBxhH0z1iX6tg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name; spf=pass smtp.mailfrom=neil.brown.name; arc=none smtp.client-ip=103.29.64.221 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=neil.brown.name Received: from 196.186.233.220.static.exetel.com.au ([220.233.186.196] helo=home.neil.brown.name) by neil.brown.name with esmtp (Exim 4.95) (envelope-from ) id 1udm9G-002pft-Tr; Mon, 21 Jul 2025 08:45:28 +0000 From: NeilBrown To: Linus Torvalds , Alexander Viro , Christian Brauner , Jan Kara Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 3/7] VFS: Change vfs_mkdir() to unlock on failure. Date: Mon, 21 Jul 2025 17:59:59 +1000 Message-ID: <20250721084412.370258-4-neil@brown.name> X-Mailer: git-send-email 2.49.0 In-Reply-To: <20250721084412.370258-1-neil@brown.name> References: <20250721084412.370258-1-neil@brown.name> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Proposed changes to directory-op locking will lock the dentry rather than the whole directory. So the dentry will need to be unlocked. vfs_mkdir() consumes the dentry on error, so there will be no dentry to be unlocked. So this patch changes vfs_mkdir() to unlock on error as well as releasing the dentry. This requires various other functions in various callers to also unlock on error. At present this results in some clumsy code. Once the transition to dentry locking is complete the clumsiness will be gone. Signed-off-by: NeilBrown --- fs/cachefiles/namei.c | 9 +++++---- fs/ecryptfs/inode.c | 3 ++- fs/namei.c | 24 ++++++++++++++++-------- fs/nfsd/nfs4recover.c | 12 +++++------- fs/nfsd/vfs.c | 12 ++++++++++-- fs/overlayfs/dir.c | 13 +++++++------ fs/overlayfs/overlayfs.h | 1 + fs/overlayfs/super.c | 5 +++-- fs/xfs/scrub/orphanage.c | 2 +- 9 files changed, 50 insertions(+), 31 deletions(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index d1edb2ac3837..732d78911bed 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -131,8 +131,11 @@ struct dentry *cachefiles_get_directory(struct cachefi= les_cache *cache, ret =3D cachefiles_inject_write_error(); if (ret =3D=3D 0) subdir =3D vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); - else + else { + /* vfs_mkdir() unlocks on failure so we must too */ + inode_unlock(d_inode(dir)); subdir =3D ERR_PTR(ret); + } if (IS_ERR(subdir)) { trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, cachefiles_trace_mkdir_error); @@ -196,9 +199,7 @@ struct dentry *cachefiles_get_directory(struct cachefil= es_cache *cache, return ERR_PTR(-EBUSY); =20 mkdir_error: - inode_unlock(d_inode(dir)); - if (!IS_ERR(subdir)) - dput(subdir); + done_dentry_lookup(subdir); pr_err("mkdir %s failed with error %d\n", dirname, ret); return ERR_PTR(ret); =20 diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index abd954c6a14e..5d8cb042aa57 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -520,7 +520,7 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *= idmap, struct inode *dir, lower_dentry, mode); rc =3D PTR_ERR(lower_dentry); if (IS_ERR(lower_dentry)) - goto out; + goto out_unlocked; rc =3D 0; if (d_unhashed(lower_dentry)) goto out; @@ -532,6 +532,7 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *= idmap, struct inode *dir, set_nlink(dir, lower_dir->i_nlink); out: inode_unlock(lower_dir); +out_unlocked: if (d_really_is_negative(dentry)) d_drop(dentry); return ERR_PTR(rc); diff --git a/fs/namei.c b/fs/namei.c index da160a01e23d..950a0d0d54da 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1723,13 +1723,18 @@ EXPORT_SYMBOL(lookup_one_qstr_excl); * rmdir) a dentry. After this, done_dentry_lookup() can be used to both * unlock the parent directory and dput() the dentry. * + * If the dentry is an error - as can happen after vfs_mkdir() - + * the unlock is skipped as unneeded. + * * This interface allows a smooth transition from parent-dir based * locking to dentry based locking. */ void done_dentry_lookup(struct dentry *dentry) { - inode_unlock(dentry->d_parent->d_inode); - dput(dentry); + if (!IS_ERR(dentry)) { + inode_unlock(dentry->d_parent->d_inode); + dput(dentry); + } } EXPORT_SYMBOL(done_dentry_lookup); =20 @@ -1742,12 +1747,16 @@ EXPORT_SYMBOL(done_dentry_lookup); * rmdir) a dentry. After this, done_dentry_lookup_return() can be used to * unlock the parent directory. The dentry is returned for further use. * + * If the dentry is an error - as can happen after vfs_mkdir() - + * the unlock is skipped as unneeded. + * * This interface allows a smooth transition from parent-dir based * locking to dentry based locking. */ struct dentry *done_dentry_lookup_return(struct dentry *dentry) { - inode_unlock(dentry->d_parent->d_inode); + if (!IS_ERR(dentry)) + inode_unlock(dentry->d_parent->d_inode); return dentry; } EXPORT_SYMBOL(done_dentry_lookup_return); @@ -4210,9 +4219,7 @@ EXPORT_SYMBOL(kern_path_create); =20 void done_path_create(struct path *path, struct dentry *dentry) { - if (!IS_ERR(dentry)) - dput(dentry); - inode_unlock(path->dentry->d_inode); + done_dentry_lookup(dentry); mnt_drop_write(path->mnt); path_put(path); } @@ -4375,7 +4382,8 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename,= umode_t, mode, unsigned, d * negative or unhashes it and possibly splices a different one returning = it, * the original dentry is dput() and the alternate is returned. * - * In case of an error the dentry is dput() and an ERR_PTR() is returned. + * In case of an error the dentry is dput(), the parent is unlocked, and + * an ERR_PTR() is returned. */ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) @@ -4413,7 +4421,7 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, str= uct inode *dir, return dentry; =20 err: - dput(dentry); + done_dentry_lookup(dentry); return ERR_PTR(error); } EXPORT_SYMBOL(vfs_mkdir); diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 82785db730d9..693fa95fa678 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -222,7 +222,8 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) dentry =3D lookup_one(&nop_mnt_idmap, &QSTR(dname), dir); if (IS_ERR(dentry)) { status =3D PTR_ERR(dentry); - goto out_unlock; + inode_unlock(d_inode(dir)); + goto out; } if (d_really_is_positive(dentry)) /* @@ -233,15 +234,12 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) * In the 4.0 case, we should never get here; but we may * as well be forgiving and just succeed silently. */ - goto out_put; + goto out; dentry =3D vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); if (IS_ERR(dentry)) status =3D PTR_ERR(dentry); -out_put: - if (!status) - dput(dentry); -out_unlock: - inode_unlock(d_inode(dir)); +out: + done_dentry_lookup(dentry); if (status =3D=3D 0) { if (nn->in_grace) __nfsd4_create_reclaim_record_grace(clp, dname, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a21940cadede..e85195e858a2 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1489,7 +1489,9 @@ nfsd_check_ignore_resizing(struct iattr *iap) iap->ia_valid &=3D ~ATTR_SIZE; } =20 -/* The parent directory should already be locked: */ +/* The parent directory should already be locked. The lock + * will be dropped on error. + */ __be32 nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_attrs *attrs, @@ -1555,8 +1557,11 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct sv= c_fh *fhp, err =3D nfsd_create_setattr(rqstp, fhp, resfhp, attrs); =20 out: - if (!IS_ERR(dchild)) + if (!IS_ERR(dchild)) { + if (err) + inode_unlock(dirp); dput(dchild); + } return err; =20 out_nfserr: @@ -1613,6 +1618,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fh= p, if (err !=3D nfs_ok) goto out_unlock; err =3D nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp); + if (err) + /* lock will have been dropped */ + return err; fh_fill_post_attrs(fhp); out_unlock: inode_unlock(dentry->d_inode); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 30619777f0f6..74b52595ea0e 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -161,14 +161,17 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, stru= ct dentry *dir, goto out; } =20 +/* dir will be unlocked on return */ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent, struct dentry *newdentry, struct ovl_cattr *attr) { struct inode *dir =3D parent->d_inode; int err; =20 - if (IS_ERR(newdentry)) + if (IS_ERR(newdentry)) { + inode_unlock(dir); return newdentry; + } =20 err =3D -ESTALE; if (newdentry->d_inode) @@ -213,11 +216,11 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, st= ruct dentry *parent, } out: if (err) { - if (!IS_ERR(newdentry)) - dput(newdentry); + done_dentry_lookup(newdentry); return ERR_PTR(err); + } else { + return done_dentry_lookup_return(newdentry); } - return newdentry; } =20 struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, @@ -227,7 +230,6 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, stru= ct dentry *workdir, inode_lock(workdir->d_inode); ret =3D ovl_create_real(ofs, workdir, ovl_lookup_temp(ofs, workdir), attr); - inode_unlock(workdir->d_inode); return ret; } =20 @@ -335,7 +337,6 @@ static int ovl_create_upper(struct dentry *dentry, stru= ct inode *inode, ovl_lookup_upper(ofs, dentry->d_name.name, upperdir, dentry->d_name.len), attr); - inode_unlock(udir); if (IS_ERR(newdentry)) return PTR_ERR(newdentry); =20 diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 4f84abaa0d68..238c26142318 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -250,6 +250,7 @@ static inline struct dentry *ovl_do_mkdir(struct ovl_fs= *ofs, =20 ret =3D vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); pr_debug("mkdir(%pd2, 0%o) =3D %i\n", dentry, mode, PTR_ERR_OR_ZERO(ret)); + /* Note: dir will have been unlocked on failure */ return ret; } =20 diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 4afa91882075..df99a6fa17ef 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -328,11 +328,11 @@ static struct dentry *ovl_workdir_create(struct ovl_f= s *ofs, } =20 work =3D ovl_do_mkdir(ofs, dir, work, attr.ia_mode); - inode_unlock(dir); err =3D PTR_ERR(work); if (IS_ERR(work)) goto out_err; =20 + done_dentry_lookup_return(work); /* Weird filesystem returning with hashed negative (kernfs)? */ err =3D -EINVAL; if (d_really_is_negative(work)) @@ -623,7 +623,8 @@ static struct dentry *ovl_lookup_or_create(struct ovl_f= s *ofs, child =3D ovl_lookup_upper(ofs, name, parent, len); if (!IS_ERR(child) && !child->d_inode) child =3D ovl_create_real(ofs, parent, child, OVL_CATTR(mode)); - inode_unlock(parent->d_inode); + else + inode_unlock(parent->d_inode); dput(parent); =20 return child; diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 9c12cb844231..c95bded4e8a7 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -170,7 +170,7 @@ xrep_orphanage_create( orphanage_dentry, 0750); error =3D PTR_ERR(orphanage_dentry); if (IS_ERR(orphanage_dentry)) - goto out_unlock_root; + goto out_dput_root; } =20 /* Not a directory? Bail out. */ --=20 2.49.0 From nobody Mon Oct 6 13:42:07 2025 Received: from neil.brown.name (neil.brown.name [103.29.64.221]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B14622877CE; Mon, 21 Jul 2025 08:45:36 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=103.29.64.221 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087541; cv=none; b=rvqT9OcVcN2JGprLgt0JCRkq0gCcy3a+sxzHVdEPERrZnxOsZQAYzuJGuIneRjFNP2k76e+Q09sw8sWaQNp9Ji9dMWwbL/sN7/NUO9MxaJPkzVwtIUJxBRkKQvp4J5KZfr6fQZpmm92GEnPvSL8p+OkhXdcrMCfKL3+4G8Kr2Ls= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087541; c=relaxed/simple; bh=kBTj7AdWspoKHj3j/tvxPQIpm3IgCWJH09T6ATNv6O4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=l2Ol/S33yEelfmelqzfyRgN3NSKQjzusNmYZOIb9fpaK9qtOqVO3RuSlQnFhAm5ul1PWqFUqj0iB7uUaRMt5T6ORPCA8yqQpFZiDTVYvjBVWdlVa8xnz3/1tiuDLHOt+de0KWi435rAIiwAOBw61bhmNas8sgahcgANR5RJ0350= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name; spf=pass smtp.mailfrom=neil.brown.name; arc=none smtp.client-ip=103.29.64.221 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=neil.brown.name Received: from 196.186.233.220.static.exetel.com.au ([220.233.186.196] helo=home.neil.brown.name) by neil.brown.name with esmtp (Exim 4.95) (envelope-from ) id 1udm9H-002pfv-E4; Mon, 21 Jul 2025 08:45:29 +0000 From: NeilBrown To: Linus Torvalds , Alexander Viro , Christian Brauner , Jan Kara Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 4/7] VFS: introduce dentry_lookup() and friends Date: Mon, 21 Jul 2025 18:00:00 +1000 Message-ID: <20250721084412.370258-5-neil@brown.name> X-Mailer: git-send-email 2.49.0 In-Reply-To: <20250721084412.370258-1-neil@brown.name> References: <20250721084412.370258-1-neil@brown.name> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" dentry_lookup() combines locking the directory and performing a lookup prior to a change to the directory. Abstracting this prepares for changing the locking requirements. dentry_lookup_noperm() does the same without needing a mnt_idmap and without checking permissions. This is useful for internal filesystem management (e.g. creating virtual files in response to events) and in other cases similar to lookup_noperm(). dentry_lookup_hashed() also does no permissions checking and assumes that the hash of the name has already been stored in the qstr. This is useful following filename_parentat(). These are intended to be paired with done_dentry_lookup() which provides the inverse of putting the dentry and unlocking. Like lookup_one_qstr_excl(), dentry_lookup() returns -ENOENT if LOOKUP_CREATE was NOT given and the name cannot be found,, and returns -EEXIST if LOOKUP_EXCL WAS given and the name CAN be found. These functions replace all uses of lookup_one_qstr_excl() in namei.c except for those used for rename. Some of the variants should possibly be inlines in a header. Signed-off-by: NeilBrown --- fs/namei.c | 158 ++++++++++++++++++++++++++++++------------ include/linux/namei.h | 8 ++- 2 files changed, 119 insertions(+), 47 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 950a0d0d54da..f292df61565a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1714,17 +1714,98 @@ struct dentry *lookup_one_qstr_excl(const struct qs= tr *name, } EXPORT_SYMBOL(lookup_one_qstr_excl); =20 +/** + * dentry_lookup_hashed - lookup and lock a name prior to dir ops + * @last: the name in the given directory + * @base: the directory in which the name is to be found + * @lookup_flags: %LOOKUP_xxx flags + * + * The name is looked up and necessary locks are taken so that + * the name can be created or removed. + * The "necessary locks" are currently the inode node lock on @base. + * The name @last is expected to already have the hash calculated. + * No permission checks are performed. + * Returns: the dentry, suitably locked, or an ERR_PTR(). + */ +struct dentry *dentry_lookup_hashed(struct qstr *last, + struct dentry *base, + unsigned int lookup_flags) +{ + struct dentry *dentry; + + inode_lock_nested(base->d_inode, I_MUTEX_PARENT); + + dentry =3D lookup_one_qstr_excl(last, base, lookup_flags); + if (IS_ERR(dentry)) + inode_unlock(base->d_inode); + return dentry; +} +EXPORT_SYMBOL(dentry_lookup_hashed); + +static int lookup_noperm_common(struct qstr *qname, struct dentry *base); +static int lookup_one_common(struct mnt_idmap *idmap, + struct qstr *qname, struct dentry *base); + +/** + * dentry_lookup_noperm - lookup and lock a name prior to dir ops + * @last: the name in the given directory + * @base: the directory in which the name is to be found + * @lookup_flags: %LOOKUP_xxx flags + * + * The name is looked up and necessary locks are taken so that + * the name can be created or removed. + * The "necessary locks" are currently the inode node lock on @base. + * The name @last is NOT expected to have the hash calculated. + * No permission checks are performed. + * Returns: the dentry, suitably locked, or an ERR_PTR(). + */ +struct dentry *dentry_lookup_noperm(struct qstr *last, + struct dentry *base, + unsigned int lookup_flags) +{ + int err; + + err =3D lookup_noperm_common(last, base); + if (err < 0) + return ERR_PTR(err); + return dentry_lookup_hashed(last, base, lookup_flags); +} +EXPORT_SYMBOL(dentry_lookup_noperm); + +/** + * dentry_lookup - lookup and lock a name prior to dir ops + * @last: the name in the given directory + * @base: the directory in which the name is to be found + * @lookup_flags: %LOOKUP_xxx flags + * + * The name is looked up and necessary locks are taken so that + * the name can be created or removed. + * The "necessary locks" are currently the inode node lock on @base. + * The name @last is NOT expected to already have the hash calculated. + * Permission checks are performed to ensure %MAY_EXEC access to @base. + * Returns: the dentry, suitably locked, or an ERR_PTR(). + */ +struct dentry *dentry_lookup(struct mnt_idmap *idmap, + struct qstr *last, + struct dentry *base, + unsigned int lookup_flags) +{ + int err; + + err =3D lookup_one_common(idmap, last, base); + if (err < 0) + return ERR_PTR(err); + return dentry_lookup_hashed(last, base, lookup_flags); +} +EXPORT_SYMBOL(dentry_lookup); + /** * done_dentry_lookup - finish a lookup used for create/delete * @dentry: the target dentry * - * After locking the directory and lookup or validating a dentry - * an attempt can be made to create (including link) or remove (including - * rmdir) a dentry. After this, done_dentry_lookup() can be used to both - * unlock the parent directory and dput() the dentry. - * - * If the dentry is an error - as can happen after vfs_mkdir() - - * the unlock is skipped as unneeded. + * Reverse the effects of dentry_lookup() or similar. If the + * @dentry is not an error, the lock and the reference to the dentry + * are dropped. * * This interface allows a smooth transition from parent-dir based * locking to dentry based locking. @@ -1733,6 +1814,7 @@ void done_dentry_lookup(struct dentry *dentry) { if (!IS_ERR(dentry)) { inode_unlock(dentry->d_parent->d_inode); + d_lookup_done(dentry); dput(dentry); } } @@ -1742,16 +1824,16 @@ EXPORT_SYMBOL(done_dentry_lookup); * done_dentry_lookup_return - finish a lookup sequence, returning the den= try * @dentry: the target dentry * - * After locking the directory and lookup or validating a dentry - * an attempt can be made to create (including link) or remove (including - * rmdir) a dentry. After this, done_dentry_lookup_return() can be used to - * unlock the parent directory. The dentry is returned for further use. + * Reverse the effects of dentry_lookup() or similar, but keep the dentry. + * If @dentry is not an error, the lock is dropped. * * If the dentry is an error - as can happen after vfs_mkdir() - * the unlock is skipped as unneeded. * * This interface allows a smooth transition from parent-dir based * locking to dentry based locking. + * + * Returns: the dentry. */ struct dentry *done_dentry_lookup_return(struct dentry *dentry) { @@ -2803,12 +2885,9 @@ static struct dentry *__kern_path_locked(int dfd, st= ruct filename *name, struct return ERR_PTR(error); if (unlikely(type !=3D LAST_NORM)) return ERR_PTR(-EINVAL); - inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT); - d =3D lookup_one_qstr_excl(&last, parent_path.dentry, 0); - if (IS_ERR(d)) { - inode_unlock(parent_path.dentry->d_inode); + d =3D dentry_lookup_hashed(&last, parent_path.dentry, 0); + if (IS_ERR(d)) return d; - } path->dentry =3D no_free_ptr(parent_path.dentry); path->mnt =3D no_free_ptr(parent_path.mnt); return d; @@ -2827,12 +2906,9 @@ struct dentry *kern_path_locked_negative(const char = *name, struct path *path) return ERR_PTR(error); if (unlikely(type !=3D LAST_NORM)) return ERR_PTR(-EINVAL); - inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT); - d =3D lookup_one_qstr_excl(&last, parent_path.dentry, LOOKUP_CREATE); - if (IS_ERR(d)) { - inode_unlock(parent_path.dentry->d_inode); + d =3D dentry_lookup_hashed(&last, parent_path.dentry, LOOKUP_CREATE); + if (IS_ERR(d)) return d; - } path->dentry =3D no_free_ptr(parent_path.dentry); path->mnt =3D no_free_ptr(parent_path.mnt); return d; @@ -4161,7 +4237,6 @@ static struct dentry *filename_create(int dfd, struct= filename *name, unsigned int reval_flag =3D lookup_flags & LOOKUP_REVAL; unsigned int create_flags =3D LOOKUP_CREATE | LOOKUP_EXCL; int type; - int err2; int error; =20 error =3D filename_parentat(dfd, name, reval_flag, path, &last, &type); @@ -4173,35 +4248,30 @@ static struct dentry *filename_create(int dfd, stru= ct filename *name, * (foo/., foo/.., /////) */ if (unlikely(type !=3D LAST_NORM)) - goto out; + goto put; =20 /* don't fail immediately if it's r/o, at least try to report other error= s */ - err2 =3D mnt_want_write(path->mnt); + error =3D mnt_want_write(path->mnt); /* * Do the final lookup. Suppress 'create' if there is a trailing * '/', and a directory wasn't requested. */ if (last.name[last.len] && !want_dir) create_flags &=3D ~LOOKUP_CREATE; - inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); - dentry =3D lookup_one_qstr_excl(&last, path->dentry, - reval_flag | create_flags); + dentry =3D dentry_lookup_hashed(&last, path->dentry, reval_flag | create_= flags); if (IS_ERR(dentry)) - goto unlock; + goto drop; =20 - if (unlikely(err2)) { - error =3D err2; + if (unlikely(error)) goto fail; - } return dentry; fail: - dput(dentry); + done_dentry_lookup(dentry); dentry =3D ERR_PTR(error); -unlock: - inode_unlock(path->dentry->d_inode); - if (!err2) +drop: + if (!error) mnt_drop_write(path->mnt); -out: +put: path_put(path); return dentry; } @@ -4225,7 +4295,7 @@ void done_path_create(struct path *path, struct dentr= y *dentry) } EXPORT_SYMBOL(done_path_create); =20 -inline struct dentry *user_path_create(int dfd, const char __user *pathnam= e, +struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, unsigned int lookup_flags) { struct filename *filename =3D getname(pathname); @@ -4551,19 +4621,18 @@ int do_rmdir(int dfd, struct filename *name) if (error) goto exit2; =20 - inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT); - dentry =3D lookup_one_qstr_excl(&last, path.dentry, lookup_flags); + dentry =3D dentry_lookup_hashed(&last, path.dentry, lookup_flags); error =3D PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit3; + error =3D security_path_rmdir(&path, dentry); if (error) goto exit4; error =3D vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry); exit4: - dput(dentry); + done_dentry_lookup(dentry); exit3: - inode_unlock(path.dentry->d_inode); mnt_drop_write(path.mnt); exit2: path_put(&path); @@ -4680,11 +4749,9 @@ int do_unlinkat(int dfd, struct filename *name) if (error) goto exit2; retry_deleg: - inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT); - dentry =3D lookup_one_qstr_excl(&last, path.dentry, lookup_flags); + dentry =3D dentry_lookup_hashed(&last, path.dentry, lookup_flags); error =3D PTR_ERR(dentry); if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ if (last.name[last.len]) goto slashes; @@ -4696,9 +4763,8 @@ int do_unlinkat(int dfd, struct filename *name) error =3D vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, &delegated_inode); exit3: - dput(dentry); + done_dentry_lookup(dentry); } - inode_unlock(path.dentry->d_inode); if (inode) iput(inode); /* truncate the inode here */ inode =3D NULL; diff --git a/include/linux/namei.h b/include/linux/namei.h index e097f11587c9..6d2cf4af5f16 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -80,7 +80,13 @@ struct dentry *lookup_one_unlocked(struct mnt_idmap *idm= ap, struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, struct qstr *name, struct dentry *base); - +struct dentry *dentry_lookup(struct mnt_idmap *idmap, + struct qstr *last, struct dentry *base, + unsigned int lookup_flags); +struct dentry *dentry_lookup_noperm(struct qstr *name, struct dentry *base, + unsigned int lookup_flags); +struct dentry *dentry_lookup_hashed(struct qstr *last, struct dentry *base, + unsigned int lookup_flags); void done_dentry_lookup(struct dentry *dentry); struct dentry *done_dentry_lookup_return(struct dentry *dentry); =20 --=20 2.49.0 From nobody Mon Oct 6 13:42:07 2025 Received: from neil.brown.name (neil.brown.name [103.29.64.221]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 012C91798F; Mon, 21 Jul 2025 08:45:37 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=103.29.64.221 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087541; cv=none; b=RV0nplzQ5z2+Yjd2MaEe+zyGCnP3qvJsAI24nJ3CQMYtoewY/bq9AktUU4nEe+K3AAOgB7wzyPwQOX34MPfdJzRooRVDblRVSiG6gyy0avjFGkN78PUJ5rarXdo2T5VFIH1thqAd9FDguhKfFNvUeQFPljOBGdqvwT6QaVDmk3s= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087541; c=relaxed/simple; bh=tIuRYlykPAGszyzVHK9YDzKE+Xx+7unKoWAz9AwPBnM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=ZEb++dc3eXEmf0eypI5UdrXYkXqbnwl/lYP+x+jXzMmtbqTWS6OA0N+P5vrGCCSayxH4QDP6VAQum2ATHox1NMxUQA9X9Y6ScsSI3dGTqp9JRHrz4VUREKYm/OlYQJkf4FKo0JfoBF11YtMxfofa/lXaZ3SaNsWMCz3Xe1RLPf0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name; spf=pass smtp.mailfrom=neil.brown.name; arc=none smtp.client-ip=103.29.64.221 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=neil.brown.name Received: from 196.186.233.220.static.exetel.com.au ([220.233.186.196] helo=home.neil.brown.name) by neil.brown.name with esmtp (Exim 4.95) (envelope-from ) id 1udm9H-002pg7-UG; Mon, 21 Jul 2025 08:45:29 +0000 From: NeilBrown To: Linus Torvalds , Alexander Viro , Christian Brauner , Jan Kara Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 5/7] VFS: add dentry_lookup_killable() Date: Mon, 21 Jul 2025 18:00:01 +1000 Message-ID: <20250721084412.370258-6-neil@brown.name> X-Mailer: git-send-email 2.49.0 In-Reply-To: <20250721084412.370258-1-neil@brown.name> References: <20250721084412.370258-1-neil@brown.name> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" btrfs/ioctl.c uses a "killable" lock on the directory when creating an destroying subvols. overlayfs also does this. This patch adds dentry_lookup_killable() for these users. Possibly all dentry_lookup should be killable as there is no down-side, but that can come in a later patch. Signed-off-by: NeilBrown --- fs/namei.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/namei.h | 3 +++ 2 files changed, 40 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index f292df61565a..46657736c7a0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1799,6 +1799,43 @@ struct dentry *dentry_lookup(struct mnt_idmap *idmap, } EXPORT_SYMBOL(dentry_lookup); =20 +/** + * dentry_lookup_killable - lookup and lock a name prior to dir ops + * @last: the name in the given directory + * @base: the directory in which the name is to be found + * @lookup_flags: %LOOKUP_xxx flags + * + * The name is looked up and necessary locks are taken so that + * the name can be created or removed. + * The "necessary locks" are currently the inode node lock on @base. + * If a fatal signal arrives or is already pending the operation is aborte= d. + * The name @last is NOT expected to already have the hash calculated. + * Permission checks are performed to ensure %MAY_EXEC access to @base. + * Returns: the dentry, suitably locked, or an ERR_PTR(). + */ +struct dentry *dentry_lookup_killable(struct mnt_idmap *idmap, + struct qstr *last, + struct dentry *base, + unsigned int lookup_flags) +{ + struct dentry *dentry; + int err; + + err =3D lookup_one_common(idmap, last, base); + if (err < 0) + return ERR_PTR(err); + + err =3D down_write_killable_nested(&base->d_inode->i_rwsem, I_MUTEX_PAREN= T); + if (err) + return ERR_PTR(err); + + dentry =3D lookup_one_qstr_excl(last, base, lookup_flags); + if (IS_ERR(dentry)) + inode_unlock(base->d_inode); + return dentry; +} +EXPORT_SYMBOL(dentry_lookup_killable); + /** * done_dentry_lookup - finish a lookup used for create/delete * @dentry: the target dentry diff --git a/include/linux/namei.h b/include/linux/namei.h index 6d2cf4af5f16..8eade32623d8 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -83,6 +83,9 @@ struct dentry *lookup_one_positive_unlocked(struct mnt_id= map *idmap, struct dentry *dentry_lookup(struct mnt_idmap *idmap, struct qstr *last, struct dentry *base, unsigned int lookup_flags); +struct dentry *dentry_lookup_killable(struct mnt_idmap *idmap, + struct qstr *last, struct dentry *base, + unsigned int lookup_flags); struct dentry *dentry_lookup_noperm(struct qstr *name, struct dentry *base, unsigned int lookup_flags); struct dentry *dentry_lookup_hashed(struct qstr *last, struct dentry *base, --=20 2.49.0 From nobody Mon Oct 6 13:42:07 2025 Received: from neil.brown.name (neil.brown.name [103.29.64.221]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 01EE721C17D; Mon, 21 Jul 2025 08:45:37 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=103.29.64.221 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087541; cv=none; b=CV0YZBmR7rp/BcO4q0gKjlexWoEnOXuEpkDvNAe4H2uNM2sADmxdiVq/GVs2PaEypxtcgdCFJqq13sdo8AvKH95GSPJBLXmXRrgJEh3L8LJqqv7hzg9O6jhp2xk/WbuS2VaSEOJNVoGwQwT9bwbFSq0SzZ5IaiPOkzVybsSCfo8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087541; c=relaxed/simple; bh=twIEAk2VcEqC3Z8nz59FTWIJoql8mNp4EZgcLKmUpXk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=CpPbAMspN0V/ker7pwZqUQ252j5d9WnfEe4/aI3XNJxc3F8O0M3lq9jrFGTEzrdaVolXcIME37oJs+O/aer4ckXGCjzSR3cXwumJTt2HqRqDFCeBk8zdlTzL8/hH/p5ueu7YfmLTnOPtRbpCoJaj4GyHX5bGSz7YSHzxKOXk1Io= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name; spf=pass smtp.mailfrom=neil.brown.name; arc=none smtp.client-ip=103.29.64.221 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=neil.brown.name Received: from 196.186.233.220.static.exetel.com.au ([220.233.186.196] helo=home.neil.brown.name) by neil.brown.name with esmtp (Exim 4.95) (envelope-from ) id 1udm9I-002pgB-Dj; Mon, 21 Jul 2025 08:45:30 +0000 From: NeilBrown To: Linus Torvalds , Alexander Viro , Christian Brauner , Jan Kara Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 6/7] VFS: add rename_lookup() Date: Mon, 21 Jul 2025 18:00:02 +1000 Message-ID: <20250721084412.370258-7-neil@brown.name> X-Mailer: git-send-email 2.49.0 In-Reply-To: <20250721084412.370258-1-neil@brown.name> References: <20250721084412.370258-1-neil@brown.name> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" rename_lookup() combines lookup and locking for a rename. Two names - new_last and old_last - are added to struct renamedata so it can be passed to rename_lookup() to have the old and new dentries filled in. rename_lookup_hashed() assumes that the names are already hashed and skips permission checking. This is appropriate for use after filename_parentat(). rename_lookup_noperm() does hash the name but avoids permission checking. This will be used by debugfs. If either old_dentry or new_dentry are not NULL, the corresponding "last" is ignored and the dentry is used as-is. After locks are obtained we check that the parent is still correct. If old_parent was not given, then it is set to the parent of old_dentry which was locked. new_parent must never be NULL. done_rename_lookup() unlocks and drops any dentry references taken. Signed-off-by: NeilBrown --- fs/namei.c | 316 ++++++++++++++++++++++++++++++++++-------- include/linux/fs.h | 4 + include/linux/namei.h | 4 + 3 files changed, 263 insertions(+), 61 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 46657736c7a0..ae8079916ac6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3490,6 +3490,233 @@ void unlock_rename(struct dentry *p1, struct dentry= *p2) } EXPORT_SYMBOL(unlock_rename); =20 +/** + * rename_lookup_hashed - lookup and lock names for rename + * @rd: rename data containing relevant details + * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL, + * LOOKUP_NO_SYMLINKS etc). + * + * Optionally look up two names and ensure locks are in place for + * rename. + * Normally @rd.old_dentry and @rd.new_dentry are %NULL and the + * old and new directories and last names are given in @rd. In this + * case the names are looked up with appropriate locking and the + * results stored in @rd.old_dentry and @rd.new_dentry. + * + * If either are not NULL, then the corresponding lookup is avoided + * but the required locks are still taken. In this case @rd.old_parent + * may be %NULL, otherwise @rd.old_dentry must have that as its d_parent + * pointer after the locks are obtained. @rd.new_parent must always + * be non-NULL, and must always be the correct parent after locking. + * + * On success a reference is held on @rd.old_dentry, @rd.new_dentry, + * and @rd.old_parent whether they were originally %NULL or not. These + * references are dropped by done_rename_lookup(). @rd.new_parent + * must always be non-NULL and no extra reference is taken. + * + * The passed in qstrs must have the hash calculated, and no permission + * checking is performed. + * + * Returns: zero or an error. + */ +int +rename_lookup_hashed(struct renamedata *rd, int lookup_flags) +{ + struct dentry *p; + struct dentry *d1, *d2; + int target_flags =3D LOOKUP_RENAME_TARGET | LOOKUP_CREATE; + int err; + + if (rd->flags & RENAME_EXCHANGE) + target_flags =3D 0; + if (rd->flags & RENAME_NOREPLACE) + target_flags |=3D LOOKUP_EXCL; + + if (rd->old_dentry) { + /* Already have the dentry - need to be sure to lock the correct parent = */ + p =3D lock_rename_child(rd->old_dentry, rd->new_parent); + if (IS_ERR(p)) + return PTR_ERR(p); + if (d_unhashed(rd->old_dentry) || + (rd->old_parent && rd->old_parent !=3D rd->old_dentry->d_parent)) { + /* dentry was removed, or moved and explicit parent requested */ + unlock_rename(rd->old_dentry->d_parent, rd->new_parent); + return -EINVAL; + } + rd->old_parent =3D dget(rd->old_dentry->d_parent); + d1 =3D dget(rd->old_dentry); + } else { + p =3D lock_rename(rd->old_parent, rd->new_parent); + if (IS_ERR(p)) + return PTR_ERR(p); + dget(rd->old_parent); + + d1 =3D lookup_one_qstr_excl(&rd->old_last, rd->old_parent, + lookup_flags); + if (IS_ERR(d1)) + goto out_unlock_1; + } + if (rd->new_dentry) { + if (d_unhashed(rd->new_dentry) || + rd->new_dentry->d_parent !=3D rd->new_parent) { + /* new_dentry was moved or removed! */ + goto out_unlock_2; + } + d2 =3D dget(rd->new_dentry); + } else { + d2 =3D lookup_one_qstr_excl(&rd->new_last, rd->new_parent, + lookup_flags | target_flags); + if (IS_ERR(d2)) + goto out_unlock_2; + } + + if (d1 =3D=3D p) { + /* source is an ancestor of target */ + err =3D -EINVAL; + goto out_unlock_3; + } + + if (d2 =3D=3D p) { + /* target is an ancestor of source */ + if (rd->flags & RENAME_EXCHANGE) + err =3D -EINVAL; + else + err =3D -ENOTEMPTY; + goto out_unlock_3; + } + + rd->old_dentry =3D d1; + rd->new_dentry =3D d2; + return 0; + +out_unlock_3: + d_lookup_done(d2); + dput(d2); + d2 =3D ERR_PTR(err); +out_unlock_2: + d_lookup_done(d1); + dput(d1); + d1 =3D d2; +out_unlock_1: + unlock_rename(rd->old_parent, rd->new_parent); + dput(rd->old_parent); + return PTR_ERR(d1); +} +EXPORT_SYMBOL(rename_lookup_hashed); + +/** + * rename_lookup_noperm - lookup and lock names for rename + * @rd: rename data containing relevant details + * @lookup_flags: optional LOOKUP_REVAL to pass to ->lookup + * + * Optionally look up two names and ensure locks are in place for + * rename. + * Normally @rd.old_dentry and @rd.new_dentry are %NULL and the + * old and new directories and last names are given in @rd. In this + * case the names are looked up with appropriate locking and the + * results stored in @rd.old_dentry and @rd.new_dentry. + * + * If either are not NULL, then the corresponding lookup is avoided + * but the required locks are still taken. In this case @rd.old_parent + * may be %NULL, otherwise @rd.old_dentry must have that as its d_parent + * pointer after the locks are obtained. @rd.new_parent must always + * be non-NULL, and must always be the correct parent after locking. + * + * On success a reference is held on @rd.old_dentry, @rd.new_dentry, + * and @rd.old_parent whether they were originally %NULL or not. These + * references are dropped by done_rename_lookup(). @rd.new_parent + * must always be non-NULL and no extra reference is taken. + * + * The passed in qstrs need not have the hash calculated, and no + * permission checking is performed. + * + * Returns: zero or an error. + */ +int rename_lookup_noperm(struct renamedata *rd, int lookup_flags) +{ + int err; + + if (!rd->old_dentry) { + err =3D lookup_noperm_common(&rd->old_last, rd->old_parent); + if (err) + return err; + } + if (!rd->new_dentry) { + err =3D lookup_noperm_common(&rd->new_last, rd->new_parent); + if (err) + return err; + } + return rename_lookup_hashed(rd, lookup_flags); +} +EXPORT_SYMBOL(rename_lookup_noperm); + +/** + * rename_lookup - lookup and lock names for rename + * @rd: rename data containing relevant details + * @lookup_flags: optional LOOKUP_REVAL to pass to ->lookup + * + * Optionally look up two names and ensure locks are in place for + * rename. + * Normally @rd.old_dentry and @rd.new_dentry are %NULL and the + * old and new directories and last names are given in @rd. In this + * case the names are looked up with appropriate locking and the + * results stored in @rd.old_dentry and @rd.new_dentry. + * + * If either are not NULL, then the corresponding lookup is avoided + * but the required locks are still taken. In this case @rd.old_parent + * may be %NULL, otherwise @rd.old_dentry must have that as its d_parent + * pointer after the locks are obtained. @rd.new_parent must always + * be non-NULL, and must always be the correct parent after locking. + * + * On success a reference is held on @rd.old_dentry, @rd.new_dentry, + * and @rd.old_parent whether they were originally %NULL or not. These + * references are dropped by done_rename_lookup(). @rd.new_parent + * must always be non-NULL and no extra reference is taken. + * + * The passed in qstrs need not have the hash calculated, and normal + * permission checking for MAY_EXEC is performed. + * + * Returns: zero or an error. + */ +int rename_lookup(struct renamedata *rd, int lookup_flags) +{ + int err; + + if (!rd->old_dentry) { + err =3D lookup_one_common(rd->mnt_idmap, &rd->old_last, + rd->old_parent); + if (err) + return err; + } + if (!rd->new_dentry) { + err =3D lookup_one_common(rd->mnt_idmap, &rd->new_last, + rd->new_parent); + if (err) + return err; + } + return rename_lookup_hashed(rd, lookup_flags); +} +EXPORT_SYMBOL(rename_lookup); + +/** + * done_rename_lookup - unlock dentries after rename + * @rd: the struct renamedata that was passed to rename_lookup() + * + * After a successful rename_lookup() (or similar) call, and after + * any required renaming is performed, done_rename_rename() must be called + * to drop any locks and references that were obtained by the earlier func= tion. + */ +void done_rename_lookup(struct renamedata *rd) +{ + d_lookup_done(rd->old_dentry); + d_lookup_done(rd->new_dentry); + unlock_rename(rd->old_parent, rd->new_parent); + dput(rd->old_parent); + dput(rd->old_dentry); + dput(rd->new_dentry); +} +EXPORT_SYMBOL(done_rename_lookup); + /** * vfs_prepare_mode - prepare the mode to be used for a new inode * @idmap: idmap of the mount the inode was found from @@ -5318,14 +5545,10 @@ int do_renameat2(int olddfd, struct filename *from,= int newdfd, struct filename *to, unsigned int flags) { struct renamedata rd; - struct dentry *old_dentry, *new_dentry; - struct dentry *trap; struct path old_path, new_path; - struct qstr old_last, new_last; int old_type, new_type; struct inode *delegated_inode =3D NULL; - unsigned int lookup_flags =3D 0, target_flags =3D - LOOKUP_RENAME_TARGET | LOOKUP_CREATE; + unsigned int lookup_flags =3D 0; bool should_retry =3D false; int error =3D -EINVAL; =20 @@ -5336,19 +5559,14 @@ int do_renameat2(int olddfd, struct filename *from,= int newdfd, (flags & RENAME_EXCHANGE)) goto put_names; =20 - if (flags & RENAME_EXCHANGE) - target_flags =3D 0; - if (flags & RENAME_NOREPLACE) - target_flags |=3D LOOKUP_EXCL; - retry: error =3D filename_parentat(olddfd, from, lookup_flags, &old_path, - &old_last, &old_type); + &rd.old_last, &old_type); if (error) goto put_names; =20 - error =3D filename_parentat(newdfd, to, lookup_flags, &new_path, &new_las= t, - &new_type); + error =3D filename_parentat(newdfd, to, lookup_flags, &new_path, + &rd.new_last, &new_type); if (error) goto exit1; =20 @@ -5370,66 +5588,42 @@ int do_renameat2(int olddfd, struct filename *from,= int newdfd, goto exit2; =20 retry_deleg: - trap =3D lock_rename(new_path.dentry, old_path.dentry); - if (IS_ERR(trap)) { - error =3D PTR_ERR(trap); + rd.old_parent =3D old_path.dentry; + rd.mnt_idmap =3D mnt_idmap(old_path.mnt); + rd.old_dentry =3D NULL; + rd.new_parent =3D new_path.dentry; + rd.new_dentry =3D NULL; + rd.delegated_inode =3D &delegated_inode; + rd.flags =3D flags; + + error =3D rename_lookup_hashed(&rd, lookup_flags); + if (error) goto exit_lock_rename; - } =20 - old_dentry =3D lookup_one_qstr_excl(&old_last, old_path.dentry, - lookup_flags); - error =3D PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; - new_dentry =3D lookup_one_qstr_excl(&new_last, new_path.dentry, - lookup_flags | target_flags); - error =3D PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; if (flags & RENAME_EXCHANGE) { - if (!d_is_dir(new_dentry)) { + if (!d_is_dir(rd.new_dentry)) { error =3D -ENOTDIR; - if (new_last.name[new_last.len]) - goto exit5; + if (rd.new_last.name[rd.new_last.len]) + goto exit_unlock; } } /* unless the source is a directory trailing slashes give -ENOTDIR */ - if (!d_is_dir(old_dentry)) { + if (!d_is_dir(rd.old_dentry)) { error =3D -ENOTDIR; - if (old_last.name[old_last.len]) - goto exit5; - if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len]) - goto exit5; - } - /* source should not be ancestor of target */ - error =3D -EINVAL; - if (old_dentry =3D=3D trap) - goto exit5; - /* target should not be an ancestor of source */ - if (!(flags & RENAME_EXCHANGE)) - error =3D -ENOTEMPTY; - if (new_dentry =3D=3D trap) - goto exit5; + if (rd.old_last.name[rd.old_last.len]) + goto exit_unlock; + if (!(flags & RENAME_EXCHANGE) && rd.new_last.name[rd.new_last.len]) + goto exit_unlock; + } =20 - error =3D security_path_rename(&old_path, old_dentry, - &new_path, new_dentry, flags); + error =3D security_path_rename(&old_path, rd.old_dentry, + &new_path, rd.new_dentry, flags); if (error) - goto exit5; + goto exit_unlock; =20 - rd.old_parent =3D old_path.dentry; - rd.old_dentry =3D old_dentry; - rd.mnt_idmap =3D mnt_idmap(old_path.mnt); - rd.new_parent =3D new_path.dentry; - rd.new_dentry =3D new_dentry; - rd.delegated_inode =3D &delegated_inode; - rd.flags =3D flags; error =3D vfs_rename(&rd); -exit5: - dput(new_dentry); -exit4: - dput(old_dentry); -exit3: - unlock_rename(new_path.dentry, old_path.dentry); +exit_unlock: + done_rename_lookup(&rd); exit_lock_rename: if (delegated_inode) { error =3D break_deleg_wait(&delegated_inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index d3e27da8a6aa..1ae7328749a0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2008,8 +2008,10 @@ int vfs_unlink(struct mnt_idmap *, struct inode *, s= truct dentry *, * @mnt_idmap: idmap of the mount in which the rename is happening. * @old_parent: parent of source * @old_dentry: source + * @old_last: name for old_dentry in old_dir, if old_dentry not g= iven * @new_parent: parent of destination * @new_dentry: destination + * @new_last: name for new_dentry in new_dir, if new_dentry not g= iven * @delegated_inode: returns an inode needing a delegation break * @flags: rename flags */ @@ -2017,8 +2019,10 @@ struct renamedata { struct mnt_idmap *mnt_idmap; struct dentry *old_parent; struct dentry *old_dentry; + struct qstr old_last; struct dentry *new_parent; struct dentry *new_dentry; + struct qstr new_last; struct inode **delegated_inode; unsigned int flags; } __randomize_layout; diff --git a/include/linux/namei.h b/include/linux/namei.h index 8eade32623d8..c86d9683563c 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -100,6 +100,10 @@ extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern struct dentry *lock_rename_child(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); +int rename_lookup(struct renamedata *rd, int lookup_flags); +int rename_lookup_noperm(struct renamedata *rd, int lookup_flags); +int rename_lookup_hashed(struct renamedata *rd, int lookup_flags); +void done_rename_lookup(struct renamedata *rd); =20 /** * mode_strip_umask - handle vfs umask stripping --=20 2.49.0 From nobody Mon Oct 6 13:42:07 2025 Received: from neil.brown.name (neil.brown.name [103.29.64.221]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 01FA8288C0D; Mon, 21 Jul 2025 08:45:38 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=103.29.64.221 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087541; cv=none; b=T+S5huWXdkjulJKhSbfIT7tnh0bsthK9elpUT4QBoPUQqjv9axikRj9X/5pEfXh/JxIt7sGbLP4bB4Rnh6Cc6FNBsiJ/rb2/Yh0LaYwGDTYrNXkFSJk7oqQGvuQQ3VRWP/Sg4TnoeGjw5In+B68y/zLJtZJ4JGgfdhyjTNdaihA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087541; c=relaxed/simple; bh=5G8ua6KcjgR3pcmj+D6mNI13dMJbQyQEVfel7UfMrRM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Cw9Nkz5Rs7Rwkm3YElJxxS12UriTQ/Xk8ViLiu048+Yt5n4vQX+rRSA+Uy3zkqPtwwXWJYh8ChLRjq3hXhDzEAJMeIWAK7UezWOGu4qauXeTiMxm6CZ++WoDzxgIqM2Hgh7/t4oklGYnXxjOfvhLkXmCGr1DJ3zBUH4h77+IPWw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name; spf=pass smtp.mailfrom=neil.brown.name; arc=none smtp.client-ip=103.29.64.221 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=brown.name Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=neil.brown.name Received: from 196.186.233.220.static.exetel.com.au ([220.233.186.196] helo=home.neil.brown.name) by neil.brown.name with esmtp (Exim 4.95) (envelope-from ) id 1udm9I-002pgF-TG; Mon, 21 Jul 2025 08:45:30 +0000 From: NeilBrown To: Linus Torvalds , Alexander Viro , Christian Brauner , Jan Kara Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 7/7] VFS: introduce dentry_lock_in() Date: Mon, 21 Jul 2025 18:00:03 +1000 Message-ID: <20250721084412.370258-8-neil@brown.name> X-Mailer: git-send-email 2.49.0 In-Reply-To: <20250721084412.370258-1-neil@brown.name> References: <20250721084412.370258-1-neil@brown.name> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" A few callers operate on a dentry which they already have - unlike the normal case where a lookup proceeds an operation. For these callers dentry_lock_in() is provided where other callers would use dentry_lookup(). The call will fail if, after the lock was gained, the child is no longer a child of the given parent. When the operation completes done_dentry_lookup() must be called. An extra reference is taken when the dentry_lock_in() call succeeds and will be dropped by done_dentry_lookup(). This will be used in smb/server, ecryptfs, and overlayfs, each of which have their own lock_parent() or parent_lock() or similar; and a few other places which lock the parent but don't check if the parent is still correct (often because rename isn't supported so parent cannot be incorrect). Signed-off-by: NeilBrown --- fs/namei.c | 26 ++++++++++++++++++++++++++ include/linux/namei.h | 1 + 2 files changed, 27 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index ae8079916ac6..ed656a1e458c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1836,6 +1836,32 @@ struct dentry *dentry_lookup_killable(struct mnt_idm= ap *idmap, } EXPORT_SYMBOL(dentry_lookup_killable); =20 +/** + * dentry_lock_in: lock a dentry in given parent prior to dir ops + * @child: the dentry to lock + * @parent: the dentry of the assumed parent + * + * The child is locked - currently by taking i_rwsem on the parent - to + * prepare for create/remove operations. If the given parent is no longer + * the parent of the dentry after the lock is gained, the lock is released + * and the call fails (returns %false). + * + * A reference is taken to the child on success. The lock and reference + * must both be dropped by done_dentry_lookup() after the operation comple= tes. + */ +bool dentry_lock_in(struct dentry *child, struct dentry *parent) +{ + inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); + if (child->d_parent =3D=3D parent) { + /* get the child to balance with done_dentry_lookup() which puts it. */ + dget(child); + return true; + } + inode_unlock(d_inode(parent)); + return false; +} +EXPORT_SYMBOL(dentry_lock_in); + /** * done_dentry_lookup - finish a lookup used for create/delete * @dentry: the target dentry diff --git a/include/linux/namei.h b/include/linux/namei.h index c86d9683563c..61ab251237e4 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -104,6 +104,7 @@ int rename_lookup(struct renamedata *rd, int lookup_fla= gs); int rename_lookup_noperm(struct renamedata *rd, int lookup_flags); int rename_lookup_hashed(struct renamedata *rd, int lookup_flags); void done_rename_lookup(struct renamedata *rd); +bool dentry_lock_in(struct dentry *child, struct dentry *parent); =20 /** * mode_strip_umask - handle vfs umask stripping --=20 2.49.0