Commit-time fast commit snapshots run under jbd2_journal_lock_updates(),
so it is useful to quantify the time spent with updates locked and to
understand why snapshotting can fail.
Add a new tracepoint, ext4_fc_lock_updates, reporting the time spent in
the updates-locked window along with the number of snapshotted inodes
and ranges. Record the first snapshot failure reason in a stable snap_err
field for tooling.
Signed-off-by: Li Chen <me@linux.beauty>
---
fs/ext4/fast_commit.c | 86 ++++++++++++++++++++++++++++++-------
include/trace/events/ext4.h | 33 ++++++++++++++
2 files changed, 104 insertions(+), 15 deletions(-)
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index d1eefee60912..d266eb2a4219 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -193,6 +193,27 @@ static struct kmem_cache *ext4_fc_range_cachep;
#define EXT4_FC_SNAPSHOT_MAX_INODES 1024
#define EXT4_FC_SNAPSHOT_MAX_RANGES 2048
+/*
+ * Snapshot failure reasons for ext4_fc_lock_updates tracepoint.
+ * Keep these stable for tooling.
+ */
+enum ext4_fc_snap_err {
+ EXT4_FC_SNAP_ERR_NONE = 0,
+ EXT4_FC_SNAP_ERR_ES_MISS,
+ EXT4_FC_SNAP_ERR_ES_DELAYED,
+ EXT4_FC_SNAP_ERR_ES_OTHER,
+ EXT4_FC_SNAP_ERR_INODES_CAP,
+ EXT4_FC_SNAP_ERR_RANGES_CAP,
+ EXT4_FC_SNAP_ERR_NOMEM,
+ EXT4_FC_SNAP_ERR_INODE_LOC,
+};
+
+static inline void ext4_fc_set_snap_err(int *snap_err, int err)
+{
+ if (snap_err && *snap_err == EXT4_FC_SNAP_ERR_NONE)
+ *snap_err = err;
+}
+
static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
{
BUFFER_TRACE(bh, "");
@@ -983,11 +1004,12 @@ static void ext4_fc_free_inode_snap(struct inode *inode)
static int ext4_fc_snapshot_inode_data(struct inode *inode,
struct list_head *ranges,
unsigned int nr_ranges_total,
- unsigned int *nr_rangesp)
+ unsigned int *nr_rangesp,
+ int *snap_err)
{
struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned int nr_ranges = 0;
ext4_lblk_t start_lblk, end_lblk, cur_lblk;
+ unsigned int nr_ranges = 0;
spin_lock(&ei->i_fc_lock);
if (ei->i_fc_lblk_len == 0) {
@@ -1010,11 +1032,16 @@ static int ext4_fc_snapshot_inode_data(struct inode *inode,
struct ext4_fc_range *range;
ext4_lblk_t len;
- if (!ext4_es_lookup_extent(inode, cur_lblk, NULL, &es, NULL))
+ if (!ext4_es_lookup_extent(inode, cur_lblk, NULL, &es, NULL)) {
+ ext4_fc_set_snap_err(snap_err, EXT4_FC_SNAP_ERR_ES_MISS);
return -EAGAIN;
+ }
- if (ext4_es_is_delayed(&es))
+ if (ext4_es_is_delayed(&es)) {
+ ext4_fc_set_snap_err(snap_err,
+ EXT4_FC_SNAP_ERR_ES_DELAYED);
return -EAGAIN;
+ }
len = es.es_len - (cur_lblk - es.es_lblk);
if (len > end_lblk - cur_lblk + 1)
@@ -1024,12 +1051,17 @@ static int ext4_fc_snapshot_inode_data(struct inode *inode,
continue;
}
- if (nr_ranges_total + nr_ranges >= EXT4_FC_SNAPSHOT_MAX_RANGES)
+ if (nr_ranges_total + nr_ranges >= EXT4_FC_SNAPSHOT_MAX_RANGES) {
+ ext4_fc_set_snap_err(snap_err,
+ EXT4_FC_SNAP_ERR_RANGES_CAP);
return -E2BIG;
+ }
range = kmem_cache_alloc(ext4_fc_range_cachep, GFP_NOFS);
- if (!range)
+ if (!range) {
+ ext4_fc_set_snap_err(snap_err, EXT4_FC_SNAP_ERR_NOMEM);
return -ENOMEM;
+ }
nr_ranges++;
range->lblk = cur_lblk;
@@ -1054,6 +1086,7 @@ static int ext4_fc_snapshot_inode_data(struct inode *inode,
range->len = max;
} else {
kmem_cache_free(ext4_fc_range_cachep, range);
+ ext4_fc_set_snap_err(snap_err, EXT4_FC_SNAP_ERR_ES_OTHER);
return -EAGAIN;
}
@@ -1070,7 +1103,7 @@ static int ext4_fc_snapshot_inode_data(struct inode *inode,
static int ext4_fc_snapshot_inode(struct inode *inode,
unsigned int nr_ranges_total,
- unsigned int *nr_rangesp)
+ unsigned int *nr_rangesp, int *snap_err)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_fc_inode_snap *snap;
@@ -1082,8 +1115,10 @@ static int ext4_fc_snapshot_inode(struct inode *inode,
int alloc_ctx;
ret = ext4_get_inode_loc_noio(inode, &iloc);
- if (ret)
+ if (ret) {
+ ext4_fc_set_snap_err(snap_err, EXT4_FC_SNAP_ERR_INODE_LOC);
return ret;
+ }
if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
inode_len = EXT4_INODE_SIZE(inode->i_sb);
@@ -1092,6 +1127,7 @@ static int ext4_fc_snapshot_inode(struct inode *inode,
snap = kmalloc(struct_size(snap, inode_buf, inode_len), GFP_NOFS);
if (!snap) {
+ ext4_fc_set_snap_err(snap_err, EXT4_FC_SNAP_ERR_NOMEM);
brelse(iloc.bh);
return -ENOMEM;
}
@@ -1102,7 +1138,7 @@ static int ext4_fc_snapshot_inode(struct inode *inode,
brelse(iloc.bh);
ret = ext4_fc_snapshot_inode_data(inode, &ranges, nr_ranges_total,
- &nr_ranges);
+ &nr_ranges, snap_err);
if (ret) {
kfree(snap);
ext4_fc_free_ranges(&ranges);
@@ -1203,7 +1239,10 @@ static int ext4_fc_alloc_snapshot_inodes(struct super_block *sb,
unsigned int *nr_inodesp);
static int ext4_fc_snapshot_inodes(journal_t *journal, struct inode **inodes,
- unsigned int inodes_size)
+ unsigned int inodes_size,
+ unsigned int *nr_inodesp,
+ unsigned int *nr_rangesp,
+ int *snap_err)
{
struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1221,6 +1260,8 @@ static int ext4_fc_snapshot_inodes(journal_t *journal, struct inode **inodes,
alloc_ctx = ext4_fc_lock(sb);
list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
if (i >= inodes_size) {
+ ext4_fc_set_snap_err(snap_err,
+ EXT4_FC_SNAP_ERR_INODES_CAP);
ret = -E2BIG;
goto unlock;
}
@@ -1244,6 +1285,8 @@ static int ext4_fc_snapshot_inodes(journal_t *journal, struct inode **inodes,
continue;
if (i >= inodes_size) {
+ ext4_fc_set_snap_err(snap_err,
+ EXT4_FC_SNAP_ERR_INODES_CAP);
ret = -E2BIG;
goto unlock;
}
@@ -1268,16 +1311,20 @@ static int ext4_fc_snapshot_inodes(journal_t *journal, struct inode **inodes,
unsigned int inode_ranges = 0;
ret = ext4_fc_snapshot_inode(inodes[idx], nr_ranges,
- &inode_ranges);
+ &inode_ranges, snap_err);
if (ret)
break;
nr_ranges += inode_ranges;
}
+ if (nr_inodesp)
+ *nr_inodesp = i;
+ if (nr_rangesp)
+ *nr_rangesp = nr_ranges;
return ret;
}
-static int ext4_fc_perform_commit(journal_t *journal)
+static int ext4_fc_perform_commit(journal_t *journal, tid_t commit_tid)
{
struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1286,10 +1333,15 @@ static int ext4_fc_perform_commit(journal_t *journal)
struct inode *inode;
struct inode **inodes;
unsigned int inodes_size;
+ unsigned int snap_inodes = 0;
+ unsigned int snap_ranges = 0;
+ int snap_err = EXT4_FC_SNAP_ERR_NONE;
struct blk_plug plug;
int ret = 0;
u32 crc = 0;
int alloc_ctx;
+ ktime_t lock_start;
+ u64 locked_ns;
/*
* Step 1: Mark all inodes on s_fc_q[MAIN] with
@@ -1337,13 +1389,13 @@ static int ext4_fc_perform_commit(journal_t *journal)
if (ret)
return ret;
-
ret = ext4_fc_alloc_snapshot_inodes(sb, &inodes, &inodes_size);
if (ret)
return ret;
/* Step 4: Mark all inodes as being committed. */
jbd2_journal_lock_updates(journal);
+ lock_start = ktime_get();
/*
* The journal is now locked. No more handles can start and all the
* previous handles are now drained. Snapshotting happens in this
@@ -1357,8 +1409,12 @@ static int ext4_fc_perform_commit(journal_t *journal)
}
ext4_fc_unlock(sb, alloc_ctx);
- ret = ext4_fc_snapshot_inodes(journal, inodes, inodes_size);
+ ret = ext4_fc_snapshot_inodes(journal, inodes, inodes_size,
+ &snap_inodes, &snap_ranges, &snap_err);
jbd2_journal_unlock_updates(journal);
+ locked_ns = ktime_to_ns(ktime_sub(ktime_get(), lock_start));
+ trace_ext4_fc_lock_updates(sb, commit_tid, locked_ns, snap_inodes,
+ snap_ranges, ret, snap_err);
kvfree(inodes);
if (ret)
return ret;
@@ -1563,7 +1619,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
journal_ioprio = EXT4_DEF_JOURNAL_IOPRIO;
set_task_ioprio(current, journal_ioprio);
fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
- ret = ext4_fc_perform_commit(journal);
+ ret = ext4_fc_perform_commit(journal, commit_tid);
if (ret < 0) {
if (ret == -EAGAIN || ret == -E2BIG || ret == -ECANCELED)
status = EXT4_FC_STATUS_INELIGIBLE;
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index fd76d14c2776..a1493971821d 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2812,6 +2812,39 @@ TRACE_EVENT(ext4_fc_commit_stop,
__entry->num_fc_ineligible, __entry->nblks_agg, __entry->tid)
);
+TRACE_EVENT(ext4_fc_lock_updates,
+ TP_PROTO(struct super_block *sb, tid_t commit_tid, u64 locked_ns,
+ unsigned int nr_inodes, unsigned int nr_ranges, int err,
+ int snap_err),
+
+ TP_ARGS(sb, commit_tid, locked_ns, nr_inodes, nr_ranges, err, snap_err),
+
+ TP_STRUCT__entry(/* entry */
+ __field(dev_t, dev)
+ __field(tid_t, tid)
+ __field(u64, locked_ns)
+ __field(unsigned int, nr_inodes)
+ __field(unsigned int, nr_ranges)
+ __field(int, err)
+ __field(int, snap_err)
+ ),
+
+ TP_fast_assign(/* assign */
+ __entry->dev = sb->s_dev;
+ __entry->tid = commit_tid;
+ __entry->locked_ns = locked_ns;
+ __entry->nr_inodes = nr_inodes;
+ __entry->nr_ranges = nr_ranges;
+ __entry->err = err;
+ __entry->snap_err = snap_err;
+ ),
+
+ TP_printk("dev %d,%d tid %u locked_ns %llu nr_inodes %u nr_ranges %u err %d snap_err %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
+ __entry->locked_ns, __entry->nr_inodes, __entry->nr_ranges,
+ __entry->err, __entry->snap_err)
+);
+
#define FC_REASON_NAME_STAT(reason) \
show_fc_reason(reason), \
__entry->fc_ineligible_rc[reason]
--
2.52.0
On Tue, 20 Jan 2026 19:25:35 +0800
Li Chen <me@linux.beauty> wrote:
> Commit-time fast commit snapshots run under jbd2_journal_lock_updates(),
> so it is useful to quantify the time spent with updates locked and to
> understand why snapshotting can fail.
>
> Add a new tracepoint, ext4_fc_lock_updates, reporting the time spent in
> the updates-locked window along with the number of snapshotted inodes
> and ranges. Record the first snapshot failure reason in a stable snap_err
> field for tooling.
>
> Signed-off-by: Li Chen <me@linux.beauty>
> ---
> fs/ext4/fast_commit.c | 86 ++++++++++++++++++++++++++++++-------
> include/trace/events/ext4.h | 33 ++++++++++++++
> 2 files changed, 104 insertions(+), 15 deletions(-)
>
> diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
> index d1eefee60912..d266eb2a4219 100644
> --- a/fs/ext4/fast_commit.c
> +++ b/fs/ext4/fast_commit.c
> @@ -193,6 +193,27 @@ static struct kmem_cache *ext4_fc_range_cachep;
> #define EXT4_FC_SNAPSHOT_MAX_INODES 1024
> #define EXT4_FC_SNAPSHOT_MAX_RANGES 2048
>
> +/*
> + * Snapshot failure reasons for ext4_fc_lock_updates tracepoint.
> + * Keep these stable for tooling.
> + */
> +enum ext4_fc_snap_err {
> + EXT4_FC_SNAP_ERR_NONE = 0,
> + EXT4_FC_SNAP_ERR_ES_MISS,
> + EXT4_FC_SNAP_ERR_ES_DELAYED,
> + EXT4_FC_SNAP_ERR_ES_OTHER,
> + EXT4_FC_SNAP_ERR_INODES_CAP,
> + EXT4_FC_SNAP_ERR_RANGES_CAP,
> + EXT4_FC_SNAP_ERR_NOMEM,
> + EXT4_FC_SNAP_ERR_INODE_LOC,
> +};
> +
> +static inline void ext4_fc_set_snap_err(int *snap_err, int err)
> +{
> + if (snap_err && *snap_err == EXT4_FC_SNAP_ERR_NONE)
> + *snap_err = err;
> +}
> +
> diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> index fd76d14c2776..a1493971821d 100644
> --- a/include/trace/events/ext4.h
> +++ b/include/trace/events/ext4.h
> @@ -2812,6 +2812,39 @@ TRACE_EVENT(ext4_fc_commit_stop,
> __entry->num_fc_ineligible, __entry->nblks_agg, __entry->tid)
> );
>
Why not make the snap_err into a human readable format?
#define TRACE_SNAP_ERR \
EM(NONE) \
EM(ES_MISS) \
EM(ES_DELAYED) \
EM(ES_OTHER) \
EM(INODES_CAP) \
EM(RANGES_CAP) \
EM(NOMEM) \
EMe(INODE_LOC) \
#undef EM
#undef EMe
#define EM(a) TRACE_DEFINE_ENUM(EXT4_FC_SNAP_ERR_##a);
#define EMe(a) TRACE_DEFINE_ENUM(EXT4_FC_SNAP_ERR_##a);
TRACE_SNAP_ERR
#undef EM
#undef EMe
#define EM(a) { EXT4_FC_SNAP_ERR_##a, #a },
#define EM(a) { EXT4_FC_SNAP_ERR_##a, #a }
> +TRACE_EVENT(ext4_fc_lock_updates,
> + TP_PROTO(struct super_block *sb, tid_t commit_tid, u64 locked_ns,
> + unsigned int nr_inodes, unsigned int nr_ranges, int err,
> + int snap_err),
> +
> + TP_ARGS(sb, commit_tid, locked_ns, nr_inodes, nr_ranges, err, snap_err),
> +
> + TP_STRUCT__entry(/* entry */
> + __field(dev_t, dev)
> + __field(tid_t, tid)
> + __field(u64, locked_ns)
> + __field(unsigned int, nr_inodes)
> + __field(unsigned int, nr_ranges)
> + __field(int, err)
> + __field(int, snap_err)
> + ),
> +
> + TP_fast_assign(/* assign */
> + __entry->dev = sb->s_dev;
> + __entry->tid = commit_tid;
> + __entry->locked_ns = locked_ns;
> + __entry->nr_inodes = nr_inodes;
> + __entry->nr_ranges = nr_ranges;
> + __entry->err = err;
> + __entry->snap_err = snap_err;
> + ),
> +
> + TP_printk("dev %d,%d tid %u locked_ns %llu nr_inodes %u nr_ranges %u err %d snap_err %d",
> + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
> + __entry->locked_ns, __entry->nr_inodes, __entry->nr_ranges,
> + __entry->err, __entry->snap_err)
And instead of having the raw value of __entry->snap_err, use:
__entry->err, __print_symbolic(__entry->snap_err, TRACE_SNAP_ERR))
-- Steve
> +);
> +
> #define FC_REASON_NAME_STAT(reason) \
> show_fc_reason(reason), \
> __entry->fc_ineligible_rc[reason]
1
Hi Steven,
> On Tue, 20 Jan 2026 19:25:35 +0800
> Li Chen <me@linux.beauty> wrote:
>
> > Commit-time fast commit snapshots run under jbd2_journal_lock_updates(),
> > so it is useful to quantify the time spent with updates locked and to
> > understand why snapshotting can fail.
> >
> > Add a new tracepoint, ext4_fc_lock_updates, reporting the time spent in
> > the updates-locked window along with the number of snapshotted inodes
> > and ranges. Record the first snapshot failure reason in a stable snap_err
> > field for tooling.
> >
> > Signed-off-by: Li Chen <me@linux.beauty>
> > ---
> > fs/ext4/fast_commit.c | 86 ++++++++++++++++++++++++++++++-------
> > include/trace/events/ext4.h | 33 ++++++++++++++
> > 2 files changed, 104 insertions(+), 15 deletions(-)
> >
> > diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
> > index d1eefee60912..d266eb2a4219 100644
> > --- a/fs/ext4/fast_commit.c
> > +++ b/fs/ext4/fast_commit.c
> > @@ -193,6 +193,27 @@ static struct kmem_cache *ext4_fc_range_cachep;
> > #define EXT4_FC_SNAPSHOT_MAX_INODES 1024
> > #define EXT4_FC_SNAPSHOT_MAX_RANGES 2048
> >
> > +/*
> > + * Snapshot failure reasons for ext4_fc_lock_updates tracepoint.
> > + * Keep these stable for tooling.
> > + */
> > +enum ext4_fc_snap_err {
> > + EXT4_FC_SNAP_ERR_NONE = 0,
> > + EXT4_FC_SNAP_ERR_ES_MISS,
> > + EXT4_FC_SNAP_ERR_ES_DELAYED,
> > + EXT4_FC_SNAP_ERR_ES_OTHER,
> > + EXT4_FC_SNAP_ERR_INODES_CAP,
> > + EXT4_FC_SNAP_ERR_RANGES_CAP,
> > + EXT4_FC_SNAP_ERR_NOMEM,
> > + EXT4_FC_SNAP_ERR_INODE_LOC,
> > +};
> > +
> > +static inline void ext4_fc_set_snap_err(int *snap_err, int err)
> > +{
> > + if (snap_err && *snap_err == EXT4_FC_SNAP_ERR_NONE)
> > + *snap_err = err;
> > +}
> > +
>
>
>
> > diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> > index fd76d14c2776..a1493971821d 100644
> > --- a/include/trace/events/ext4.h
> > +++ b/include/trace/events/ext4.h
> > @@ -2812,6 +2812,39 @@ TRACE_EVENT(ext4_fc_commit_stop,
> > __entry->num_fc_ineligible, __entry->nblks_agg, __entry->tid)
> > );
> >
>
> Why not make the snap_err into a human readable format?
>
> #define TRACE_SNAP_ERR \
> EM(NONE) \
> EM(ES_MISS) \
> EM(ES_DELAYED) \
> EM(ES_OTHER) \
> EM(INODES_CAP) \
> EM(RANGES_CAP) \
> EM(NOMEM) \
> EMe(INODE_LOC) \
>
> #undef EM
> #undef EMe
>
> #define EM(a) TRACE_DEFINE_ENUM(EXT4_FC_SNAP_ERR_##a);
> #define EMe(a) TRACE_DEFINE_ENUM(EXT4_FC_SNAP_ERR_##a);
>
> TRACE_SNAP_ERR
>
> #undef EM
> #undef EMe
>
> #define EM(a) { EXT4_FC_SNAP_ERR_##a, #a },
> #define EM(a) { EXT4_FC_SNAP_ERR_##a, #a }
>
>
> > +TRACE_EVENT(ext4_fc_lock_updates,
> > + TP_PROTO(struct super_block *sb, tid_t commit_tid, u64 locked_ns,
> > + unsigned int nr_inodes, unsigned int nr_ranges, int err,
> > + int snap_err),
> > +
> > + TP_ARGS(sb, commit_tid, locked_ns, nr_inodes, nr_ranges, err, snap_err),
> > +
> > + TP_STRUCT__entry(/* entry */
> > + __field(dev_t, dev)
> > + __field(tid_t, tid)
> > + __field(u64, locked_ns)
> > + __field(unsigned int, nr_inodes)
> > + __field(unsigned int, nr_ranges)
> > + __field(int, err)
> > + __field(int, snap_err)
> > + ),
> > +
> > + TP_fast_assign(/* assign */
> > + __entry->dev = sb->s_dev;
> > + __entry->tid = commit_tid;
> > + __entry->locked_ns = locked_ns;
> > + __entry->nr_inodes = nr_inodes;
> > + __entry->nr_ranges = nr_ranges;
> > + __entry->err = err;
> > + __entry->snap_err = snap_err;
> > + ),
> > +
> > + TP_printk("dev %d,%d tid %u locked_ns %llu nr_inodes %u nr_ranges %u err %d snap_err %d",
> > + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
> > + __entry->locked_ns, __entry->nr_inodes, __entry->nr_ranges,
> > + __entry->err, __entry->snap_err)
>
> And instead of having the raw value of __entry->snap_err, use:
>
> __entry->err, __print_symbolic(__entry->snap_err, TRACE_SNAP_ERR))
Good point. I'll switch snap_err to __print_symbolic() in v5.
Regards,
Li
© 2016 - 2026 Red Hat, Inc.