[PATCH 32/53] ext4: move dcache modifying code out of __ext4_link()

NeilBrown posted 53 patches 3 weeks, 4 days ago
[PATCH 32/53] ext4: move dcache modifying code out of __ext4_link()
Posted by NeilBrown 3 weeks, 4 days ago
From: NeilBrown <neil@brown.name>

__ext4_link() is separate from ext4_link() so that it can be used for
replaying a "fast_commit" which records a link operation.
Replaying the fast_commit does not require any interaction with the
dcache - it is purely ext4-local - but it uses a dentry to simplify code
reuse.

An interface it uses - d_alloc() - is not generally useful and will soon
be removed.  This patch prepares ext4 for that removal.  Specifically it
removes all dcache-modification code from __ext4_link().  This will mean
that __ext4_link() treats the dentry as read-only so fast_commit reply
can simply provide an on-stack dentry.

Various "const" markers are sprinkled around to confirm that the dentry
is treated read-only.

This patch only rearranges code and slightly re-orders it, so those
changes can be reviewed separately.  The next patch will remove the use
of d_alloc().

Signed-off-by: NeilBrown <neil@brown.name>
---
 fs/dcache.c                 |  2 +-
 fs/ext4/ext4.h              |  4 ++--
 fs/ext4/fast_commit.c       | 14 +++++++++++---
 fs/ext4/namei.c             | 23 +++++++++++++----------
 include/linux/dcache.h      |  2 +-
 include/trace/events/ext4.h |  4 ++--
 6 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index a1219b446b74..c48337d95f9a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -358,7 +358,7 @@ static inline int dname_external(const struct dentry *dentry)
 	return dentry->d_name.name != dentry->d_shortname.string;
 }
 
-void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
+void take_dentry_name_snapshot(struct name_snapshot *name, const struct dentry *dentry)
 {
 	unsigned seq;
 	const unsigned char *s;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 293f698b7042..1794407652ff 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2972,7 +2972,7 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star
 void __ext4_fc_track_unlink(handle_t *handle, struct inode *inode,
 	struct dentry *dentry);
 void __ext4_fc_track_link(handle_t *handle, struct inode *inode,
-	struct dentry *dentry);
+	const struct dentry *dentry);
 void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry);
 void ext4_fc_track_link(handle_t *handle, struct dentry *dentry);
 void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
@@ -3719,7 +3719,7 @@ extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
 extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
 			 struct inode *inode, struct dentry *dentry);
 extern int __ext4_link(struct inode *dir, struct inode *inode,
-		       struct dentry *dentry);
+		       const struct dentry *dentry);
 
 #define S_SHIFT 12
 static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index f575751f1cae..2a5daf1d9667 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -388,7 +388,7 @@ static int ext4_fc_track_template(
 }
 
 struct __track_dentry_update_args {
-	struct dentry *dentry;
+	const struct dentry *dentry;
 	int op;
 };
 
@@ -400,7 +400,7 @@ static int __track_dentry_update(handle_t *handle, struct inode *inode,
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct __track_dentry_update_args *dentry_update =
 		(struct __track_dentry_update_args *)arg;
-	struct dentry *dentry = dentry_update->dentry;
+	const struct dentry *dentry = dentry_update->dentry;
 	struct inode *dir = dentry->d_parent->d_inode;
 	struct super_block *sb = inode->i_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -483,7 +483,7 @@ void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
 }
 
 void __ext4_fc_track_link(handle_t *handle,
-	struct inode *inode, struct dentry *dentry)
+	struct inode *inode, const struct dentry *dentry)
 {
 	struct __track_dentry_update_args args;
 	int ret;
@@ -1471,7 +1471,15 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
 		goto out;
 	}
 
+	ihold(inode);
+	inc_nlink(inode);
 	ret = __ext4_link(dir, inode, dentry_inode);
+	if (ret) {
+		drop_nlink(inode);
+		iput(inode);
+	} else {
+		d_instantiate(dentry_inode, inode);
+	}
 	/*
 	 * It's possible that link already existed since data blocks
 	 * for the dir in question got persisted before we crashed OR
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index c4b5e252af0e..80e1051cab44 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2353,7 +2353,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
  * may not sleep between calling this and putting something into
  * the entry, as someone else might have used it while you slept.
  */
-static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
+static int ext4_add_entry(handle_t *handle, const struct dentry *dentry,
 			  struct inode *inode)
 {
 	struct inode *dir = d_inode(dentry->d_parent);
@@ -3445,7 +3445,7 @@ static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	return err;
 }
 
-int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
+int __ext4_link(struct inode *dir, struct inode *inode, const struct dentry *dentry)
 {
 	handle_t *handle;
 	int err, retries = 0;
@@ -3460,8 +3460,6 @@ int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
 		ext4_handle_sync(handle);
 
 	inode_set_ctime_current(inode);
-	ext4_inc_count(inode);
-	ihold(inode);
 
 	err = ext4_add_entry(handle, dentry, inode);
 	if (!err) {
@@ -3471,11 +3469,7 @@ int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
 		 */
 		if (inode->i_nlink == 1)
 			ext4_orphan_del(handle, inode);
-		d_instantiate(dentry, inode);
-		ext4_fc_track_link(handle, dentry);
-	} else {
-		drop_nlink(inode);
-		iput(inode);
+		__ext4_fc_track_link(handle, inode, dentry);
 	}
 	ext4_journal_stop(handle);
 	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
@@ -3504,7 +3498,16 @@ static int ext4_link(struct dentry *old_dentry,
 	err = dquot_initialize(dir);
 	if (err)
 		return err;
-	return __ext4_link(dir, inode, dentry);
+	ihold(inode);
+	ext4_inc_count(inode);
+	err = __ext4_link(dir, inode, dentry);
+	if (err) {
+		drop_nlink(inode);
+		iput(inode);
+	} else {
+		d_instantiate(dentry, inode);
+	}
+	return err;
 }
 
 /*
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index a97eb151d9db..3b12577ddfbb 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -600,7 +600,7 @@ struct name_snapshot {
 	struct qstr name;
 	union shortname_store inline_name;
 };
-void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
+void take_dentry_name_snapshot(struct name_snapshot *, const struct dentry *);
 void release_dentry_name_snapshot(struct name_snapshot *);
 
 static inline struct dentry *d_first_child(const struct dentry *dentry)
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index a3e8fe414df8..efcf1018c208 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2870,7 +2870,7 @@ TRACE_EVENT(ext4_fc_stats,
 DECLARE_EVENT_CLASS(ext4_fc_track_dentry,
 
 	TP_PROTO(handle_t *handle, struct inode *inode,
-		 struct dentry *dentry, int ret),
+		 const struct dentry *dentry, int ret),
 
 	TP_ARGS(handle, inode, dentry, ret),
 
@@ -2902,7 +2902,7 @@ DECLARE_EVENT_CLASS(ext4_fc_track_dentry,
 #define DEFINE_EVENT_CLASS_DENTRY(__type)				\
 DEFINE_EVENT(ext4_fc_track_dentry, ext4_fc_track_##__type,		\
 	TP_PROTO(handle_t *handle, struct inode *inode,			\
-		 struct dentry *dentry, int ret),			\
+		 const struct dentry *dentry, int ret),			\
 	TP_ARGS(handle, inode, dentry, ret)				\
 )
 
-- 
2.50.0.107.gf914562f5916.dirty
Re: [PATCH 32/53] ext4: move dcache modifying code out of __ext4_link()
Posted by Jan Kara 3 weeks ago
On Fri 13-03-26 08:12:19, NeilBrown wrote:
...
> diff --git a/fs/dcache.c b/fs/dcache.c
> index a1219b446b74..c48337d95f9a 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -358,7 +358,7 @@ static inline int dname_external(const struct dentry *dentry)
>  	return dentry->d_name.name != dentry->d_shortname.string;
>  }
>  
> -void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
> +void take_dentry_name_snapshot(struct name_snapshot *name, const struct dentry *dentry)
>  {
>  	unsigned seq;
>  	const unsigned char *s;

The constification of take_dentry_name_snapshot() should probably be a
separate patch? Also I'd note that this constification (and the
constification of __ext4_fc_track_link()) isn't really needed here because
ext4_fc_track_link() will immediately bail through ext4_fc_disabled() when
fast commit replay is happening so __ext4_fc_track_link() never gets called
in that case - more about that below.

> @@ -1471,7 +1471,15 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
>  		goto out;
>  	}
>  
> +	ihold(inode);
> +	inc_nlink(inode);
>  	ret = __ext4_link(dir, inode, dentry_inode);
> +	if (ret) {
> +		drop_nlink(inode);
> +		iput(inode);
> +	} else {
> +		d_instantiate(dentry_inode, inode);
> +	}
>  	/*
>  	 * It's possible that link already existed since data blocks
>  	 * for the dir in question got persisted before we crashed OR
...
> @@ -3460,8 +3460,6 @@ int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
>  		ext4_handle_sync(handle);
>  
>  	inode_set_ctime_current(inode);
> -	ext4_inc_count(inode);
> -	ihold(inode);
>  
>  	err = ext4_add_entry(handle, dentry, inode);
>  	if (!err) {
> @@ -3471,11 +3469,7 @@ int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
>  		 */
>  		if (inode->i_nlink == 1)
>  			ext4_orphan_del(handle, inode);
> -		d_instantiate(dentry, inode);
> -		ext4_fc_track_link(handle, dentry);
> -	} else {
> -		drop_nlink(inode);
> -		iput(inode);
> +		__ext4_fc_track_link(handle, inode, dentry);

This looks wrong. If fastcommit replay is running, we must skip calling
__ext4_fc_track_link(). Similarly if the filesystem is currently
inelligible for fastcommit (due to some complex unsupported operations
running in parallel). Why did you change ext4_fc_track_link() to
__ext4_fc_track_link()?

> @@ -3504,7 +3498,16 @@ static int ext4_link(struct dentry *old_dentry,
>  	err = dquot_initialize(dir);
>  	if (err)
>  		return err;
> -	return __ext4_link(dir, inode, dentry);
> +	ihold(inode);
> +	ext4_inc_count(inode);

I'd put inc_nlink() here instead. We are guaranteed to have a regular file
anyway and it matches what we do in ext4_fc_replay_link_internal().
Alternatively we could consistently use ext4_inc_count() &
ext4_dec_count() in these functions.

> +	err = __ext4_link(dir, inode, dentry);
> +	if (err) {
> +		drop_nlink(inode);
> +		iput(inode);
> +	} else {
> +		d_instantiate(dentry, inode);
> +	}
> +	return err;
>  }

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR
Re: [PATCH 32/53f] ext4: move dcache modifying code out of __ext4_link()
Posted by NeilBrown 2 weeks, 6 days ago
(cc trimmed...)

On Tue, 17 Mar 2026, Jan Kara wrote:
> On Fri 13-03-26 08:12:19, NeilBrown wrote:
> ...
> > diff --git a/fs/dcache.c b/fs/dcache.c
> > index a1219b446b74..c48337d95f9a 100644
> > --- a/fs/dcache.c
> > +++ b/fs/dcache.c
> > @@ -358,7 +358,7 @@ static inline int dname_external(const struct dentry *dentry)
> >  	return dentry->d_name.name != dentry->d_shortname.string;
> >  }
> >  
> > -void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
> > +void take_dentry_name_snapshot(struct name_snapshot *name, const struct dentry *dentry)
> >  {
> >  	unsigned seq;
> >  	const unsigned char *s;
> 
> The constification of take_dentry_name_snapshot() should probably be a
> separate patch? Also I'd note that this constification (and the
> constification of __ext4_fc_track_link()) isn't really needed here because
> ext4_fc_track_link() will immediately bail through ext4_fc_disabled() when
> fast commit replay is happening so __ext4_fc_track_link() never gets called
> in that case - more about that below.

I thought I might have overdone the constantification, but I didn't want
to under-do it, at least for my compile tests.


> 
> > @@ -1471,7 +1471,15 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
> >  		goto out;
> >  	}
> >  
> > +	ihold(inode);
> > +	inc_nlink(inode);
> >  	ret = __ext4_link(dir, inode, dentry_inode);
> > +	if (ret) {
> > +		drop_nlink(inode);
> > +		iput(inode);
> > +	} else {
> > +		d_instantiate(dentry_inode, inode);
> > +	}
> >  	/*
> >  	 * It's possible that link already existed since data blocks
> >  	 * for the dir in question got persisted before we crashed OR
> ...
> > @@ -3460,8 +3460,6 @@ int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
> >  		ext4_handle_sync(handle);
> >  
> >  	inode_set_ctime_current(inode);
> > -	ext4_inc_count(inode);
> > -	ihold(inode);
> >  
> >  	err = ext4_add_entry(handle, dentry, inode);
> >  	if (!err) {
> > @@ -3471,11 +3469,7 @@ int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
> >  		 */
> >  		if (inode->i_nlink == 1)
> >  			ext4_orphan_del(handle, inode);
> > -		d_instantiate(dentry, inode);
> > -		ext4_fc_track_link(handle, dentry);
> > -	} else {
> > -		drop_nlink(inode);
> > -		iput(inode);
> > +		__ext4_fc_track_link(handle, inode, dentry);
> 
> This looks wrong. If fastcommit replay is running, we must skip calling
> __ext4_fc_track_link(). Similarly if the filesystem is currently
> inelligible for fastcommit (due to some complex unsupported operations
> running in parallel). Why did you change ext4_fc_track_link() to
> __ext4_fc_track_link()?

I changed to __ext4_fc_track_link() because I needed something that
accepted the inode separately from the dentry.  As you point out, that
means we lose some important code which makes the decision misguided.

I'm wondering about taking a different approach - not using a dentry at
all and not constifying anything.
I could split __ext4_add_entry() out of ext4_add_entry() and instead of
passing the dentry-to-add I could pass the dir inode qstr name, and
d_flags.

Then ext4_link could be passed the same plus a "do fast commit" flag.

The result would be more verbose, but also hopefully more clear.

> 
> > @@ -3504,7 +3498,16 @@ static int ext4_link(struct dentry *old_dentry,
> >  	err = dquot_initialize(dir);
> >  	if (err)
> >  		return err;
> > -	return __ext4_link(dir, inode, dentry);
> > +	ihold(inode);
> > +	ext4_inc_count(inode);
> 
> I'd put inc_nlink() here instead. We are guaranteed to have a regular file
> anyway and it matches what we do in ext4_fc_replay_link_internal().
> Alternatively we could consistently use ext4_inc_count() &
> ext4_dec_count() in these functions.

Current __ext4_link() has ext4_inc_count().
But it is only usable in namei.c.  So I didn't use it in
ext4_fc_replay_link_internal() as that is in a different file.

I'll revise and resend these patches just to the ext4 team.

Thanks,
NeilBrown


> 
> > +	err = __ext4_link(dir, inode, dentry);
> > +	if (err) {
> > +		drop_nlink(inode);
> > +		iput(inode);
> > +	} else {
> > +		d_instantiate(dentry, inode);
> > +	}
> > +	return err;
> >  }
> 
> 								Honza
> -- 
> Jan Kara <jack@suse.com>
> SUSE Labs, CR
> 
Re: [PATCH 32/53f] ext4: move dcache modifying code out of __ext4_link()
Posted by Jan Kara 2 weeks, 5 days ago
On Wed 18-03-26 07:27:37, NeilBrown wrote:
> > > @@ -3460,8 +3460,6 @@ int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
> > >  		ext4_handle_sync(handle);
> > >  
> > >  	inode_set_ctime_current(inode);
> > > -	ext4_inc_count(inode);
> > > -	ihold(inode);
> > >  
> > >  	err = ext4_add_entry(handle, dentry, inode);
> > >  	if (!err) {
> > > @@ -3471,11 +3469,7 @@ int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
> > >  		 */
> > >  		if (inode->i_nlink == 1)
> > >  			ext4_orphan_del(handle, inode);
> > > -		d_instantiate(dentry, inode);
> > > -		ext4_fc_track_link(handle, dentry);
> > > -	} else {
> > > -		drop_nlink(inode);
> > > -		iput(inode);
> > > +		__ext4_fc_track_link(handle, inode, dentry);
> > 
> > This looks wrong. If fastcommit replay is running, we must skip calling
> > __ext4_fc_track_link(). Similarly if the filesystem is currently
> > inelligible for fastcommit (due to some complex unsupported operations
> > running in parallel). Why did you change ext4_fc_track_link() to
> > __ext4_fc_track_link()?
> 
> I changed to __ext4_fc_track_link() because I needed something that
> accepted the inode separately from the dentry.  As you point out, that
> means we lose some important code which makes the decision misguided.
> 
> I'm wondering about taking a different approach - not using a dentry at
> all and not constifying anything.
> I could split __ext4_add_entry() out of ext4_add_entry() and instead of
> passing the dentry-to-add I could pass the dir inode qstr name, and
> d_flags.
> 
> Then ext4_link could be passed the same plus a "do fast commit" flag.
> 
> The result would be more verbose, but also hopefully more clear.

Yeah, I was considering that option as well when looking at this. Let's see
how the code will look like.

								Honza

-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR