Add /sys/kernel/debug/bdi/xxx/wb_stats to show per group writeback stats
of bdi.
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
---
include/linux/writeback.h | 1 +
mm/backing-dev.c | 66 +++++++++++++++++++++++++++++++++++++++
mm/page-writeback.c | 19 +++++++++++
3 files changed, 86 insertions(+)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 9845cb62e40b..bb1ce1123b52 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -355,6 +355,7 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
+unsigned long wb_calc_cg_thresh(struct bdi_writeback *wb);
void wb_update_bandwidth(struct bdi_writeback *wb);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 788702b6c5dd..bfc4079dc7fe 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -95,12 +95,77 @@ static void bdi_collect_stats(struct backing_dev_info *bdi,
collect_wb_stats(stats, wb);
mutex_unlock(&bdi->cgwb_release_mutex);
}
+static void wb_stats_show(struct seq_file *m, struct bdi_writeback *wb,
+ struct wb_stats *stats)
+{
+
+ seq_printf(m,
+ "WbCgIno: %10lu\n"
+ "WbWriteback: %10lu kB\n"
+ "WbReclaimable: %10lu kB\n"
+ "WbDirtyThresh: %10lu kB\n"
+ "WbDirtied: %10lu kB\n"
+ "WbWritten: %10lu kB\n"
+ "WbWriteBandwidth: %10lu kBps\n"
+ "b_dirty: %10lu\n"
+ "b_io: %10lu\n"
+ "b_more_io: %10lu\n"
+ "b_dirty_time: %10lu\n"
+ "state: %10lx\n",
+ cgroup_ino(wb->memcg_css->cgroup),
+ K(stats->nr_writeback),
+ K(stats->nr_reclaimable),
+ K(stats->wb_thresh),
+ K(stats->nr_dirtied),
+ K(stats->nr_written),
+ K(wb->avg_write_bandwidth),
+ stats->nr_dirty,
+ stats->nr_io,
+ stats->nr_more_io,
+ stats->nr_dirty_time,
+ wb->state);
+}
+
+static int cgwb_debug_stats_show(struct seq_file *m, void *v)
+{
+ struct backing_dev_info *bdi = m->private;
+ unsigned long background_thresh;
+ unsigned long dirty_thresh;
+ struct bdi_writeback *wb;
+ struct wb_stats stats;
+
+ global_dirty_limits(&background_thresh, &dirty_thresh);
+
+ mutex_lock(&bdi->cgwb_release_mutex);
+ list_for_each_entry(wb, &bdi->wb_list, bdi_node) {
+ memset(&stats, 0, sizeof(stats));
+ stats.dirty_thresh = dirty_thresh;
+ collect_wb_stats(&stats, wb);
+
+ if (mem_cgroup_wb_domain(wb) != NULL)
+ stats.wb_thresh = min(stats.wb_thresh, wb_calc_cg_thresh(wb));
+
+ wb_stats_show(m, wb, &stats);
+ }
+ mutex_unlock(&bdi->cgwb_release_mutex);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(cgwb_debug_stats);
+
+static void cgwb_debug_register(struct backing_dev_info *bdi)
+{
+ debugfs_create_file("wb_stats", 0444, bdi->debug_dir, bdi,
+ &cgwb_debug_stats_fops);
+}
#else
static void bdi_collect_stats(struct backing_dev_info *bdi,
struct wb_stats *stats)
{
collect_wb_stats(stats, &bdi->wb);
}
+
+static inline void cgwb_debug_register(struct backing_dev_info *bdi) { }
#endif
static int bdi_debug_stats_show(struct seq_file *m, void *v)
@@ -158,6 +223,7 @@ static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
debugfs_create_file("stats", 0444, bdi->debug_dir, bdi,
&bdi_debug_stats_fops);
+ cgwb_debug_register(bdi);
}
static void bdi_debug_unregister(struct backing_dev_info *bdi)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0e20467367fe..ba1b6b5ae5d6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -893,6 +893,25 @@ unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
return __wb_calc_thresh(&gdtc, thresh);
}
+unsigned long wb_calc_cg_thresh(struct bdi_writeback *wb)
+{
+ struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
+ struct dirty_throttle_control mdtc = { MDTC_INIT(wb, &gdtc) };
+ unsigned long filepages, headroom, writeback;
+
+ gdtc.avail = global_dirtyable_memory();
+ gdtc.dirty = global_node_page_state(NR_FILE_DIRTY) +
+ global_node_page_state(NR_WRITEBACK);
+
+ mem_cgroup_wb_stats(wb, &filepages, &headroom,
+ &mdtc.dirty, &writeback);
+ mdtc.dirty += writeback;
+ mdtc_calc_avail(&mdtc, filepages, headroom);
+ domain_dirty_limits(&mdtc);
+
+ return __wb_calc_thresh(&mdtc, mdtc.thresh);
+}
+
/*
* setpoint - dirty 3
* f(dirty) := 1.0 + (----------------)
--
2.30.0
On Wed 20-03-24 19:02:18, Kemeng Shi wrote:
> Add /sys/kernel/debug/bdi/xxx/wb_stats to show per group writeback stats
> of bdi.
>
> Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
...
> +unsigned long wb_calc_cg_thresh(struct bdi_writeback *wb)
> +{
> + struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
> + struct dirty_throttle_control mdtc = { MDTC_INIT(wb, &gdtc) };
> + unsigned long filepages, headroom, writeback;
> +
> + gdtc.avail = global_dirtyable_memory();
> + gdtc.dirty = global_node_page_state(NR_FILE_DIRTY) +
> + global_node_page_state(NR_WRITEBACK);
> +
> + mem_cgroup_wb_stats(wb, &filepages, &headroom,
> + &mdtc.dirty, &writeback);
> + mdtc.dirty += writeback;
> + mdtc_calc_avail(&mdtc, filepages, headroom);
> + domain_dirty_limits(&mdtc);
> +
> + return __wb_calc_thresh(&mdtc, mdtc.thresh);
> +}
I kind of wish we didn't replicate this logic from balance_dirty_pages()
and wb_over_bg_thresh() into yet another place. But the refactoring would
be kind of difficult. So OK.
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
on 3/26/2024 8:24 PM, Jan Kara wrote:
> On Wed 20-03-24 19:02:18, Kemeng Shi wrote:
>> Add /sys/kernel/debug/bdi/xxx/wb_stats to show per group writeback stats
>> of bdi.
>>
>> Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
>
> ...
>
>> +unsigned long wb_calc_cg_thresh(struct bdi_writeback *wb)
>> +{
>> + struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
>> + struct dirty_throttle_control mdtc = { MDTC_INIT(wb, &gdtc) };
>> + unsigned long filepages, headroom, writeback;
>> +
>> + gdtc.avail = global_dirtyable_memory();
>> + gdtc.dirty = global_node_page_state(NR_FILE_DIRTY) +
>> + global_node_page_state(NR_WRITEBACK);
>> +
>> + mem_cgroup_wb_stats(wb, &filepages, &headroom,
>> + &mdtc.dirty, &writeback);
>> + mdtc.dirty += writeback;
>> + mdtc_calc_avail(&mdtc, filepages, headroom);
>> + domain_dirty_limits(&mdtc);
>> +
>> + return __wb_calc_thresh(&mdtc, mdtc.thresh);
>> +}
>
> I kind of wish we didn't replicate this logic from balance_dirty_pages()
> and wb_over_bg_thresh() into yet another place. But the refactoring would
> be kind of difficult. So OK.
Thanks for review the code.
I have considered about this. It's difficult and will introduce a lot
change to non-debug code which make this series uneasy to review.
I will submit another patch for refactoring if I could find a way to
clean the code.
Kemeng
>
> Honza
>
On Wed, Mar 20, 2024 at 07:02:18PM +0800, Kemeng Shi wrote: > diff --git a/include/linux/writeback.h b/include/linux/writeback.h > index 9845cb62e40b..bb1ce1123b52 100644 > --- a/include/linux/writeback.h > +++ b/include/linux/writeback.h > @@ -355,6 +355,7 @@ int dirtytime_interval_handler(struct ctl_table *table, int write, > > void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); > unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); > +unsigned long wb_calc_cg_thresh(struct bdi_writeback *wb); Would cgwb_calc_thresh() be an easier name? Thanks. -- tejun
on 3/20/2024 11:01 PM, Tejun Heo wrote: > On Wed, Mar 20, 2024 at 07:02:18PM +0800, Kemeng Shi wrote: >> diff --git a/include/linux/writeback.h b/include/linux/writeback.h >> index 9845cb62e40b..bb1ce1123b52 100644 >> --- a/include/linux/writeback.h >> +++ b/include/linux/writeback.h >> @@ -355,6 +355,7 @@ int dirtytime_interval_handler(struct ctl_table *table, int write, >> >> void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); >> unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); >> +unsigned long wb_calc_cg_thresh(struct bdi_writeback *wb); > > Would cgwb_calc_thresh() be an easier name? > Sure, will rename it in next version. Thansk! > Thanks. >
© 2016 - 2026 Red Hat, Inc.