fs/erofs/internal.h | 3 ++- fs/erofs/zdata.c | 14 ++++++++--- fs/erofs/zutil.c | 58 +++++++++++++++++++++++++++++---------------- 3 files changed, 51 insertions(+), 24 deletions(-)
Once a pcluster is fully decompressed and there are no attached cached
pages, its corresponding struct z_erofs_pcluster will be freed. This
will significantly reduce the frequency of calls to erofs_shrink_scan()
and the memory allocated for struct z_erofs_pcluster.
The tables below show approximately a 95% reduction in the calls to
erofs_shrink_scan() and in the memory allocated for struct
z_erofs_pcluster after applying this patch. The results were obtained by
performing a test to copy a 2.1 GB partition on ARM64 Android devices
running the 5.15 kernel with an 8-core CPU and 8GB of memory.
1. The reduction in calls to erofs_shrink_scan():
+-----------------+-----------+----------+---------+
| | w/o patch | w/ patch | diff |
+-----------------+-----------+----------+---------+
| Average (times) | 3152 | 160 | -94.92% |
+-----------------+-----------+----------+---------+
2. The reduction in memory released by erofs_shrink_scan():
+-----------------+-----------+----------+---------+
| | w/o patch | w/ patch | diff |
+-----------------+-----------+----------+---------+
| Average (Byte) | 44503200 | 2293760 | -94.84% |
+-----------------+-----------+----------+---------+
Signed-off-by: Chunhai Guo <guochunhai@vivo.com>
---
fs/erofs/internal.h | 3 ++-
fs/erofs/zdata.c | 14 ++++++++---
fs/erofs/zutil.c | 58 +++++++++++++++++++++++++++++----------------
3 files changed, 51 insertions(+), 24 deletions(-)
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 4efd578d7c62..17b04bfd743f 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -456,7 +456,8 @@ static inline void erofs_pagepool_add(struct page **pagepool, struct page *page)
void erofs_release_pages(struct page **pagepool);
#ifdef CONFIG_EROFS_FS_ZIP
-void erofs_workgroup_put(struct erofs_workgroup *grp);
+void erofs_workgroup_put(struct erofs_sb_info *sbi, struct erofs_workgroup *grp,
+ bool can_released);
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
pgoff_t index);
struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 8936790618c6..656fd65aec33 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -888,7 +888,7 @@ static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
* any longer if the pcluster isn't hosted by ourselves.
*/
if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE)
- erofs_workgroup_put(&pcl->obj);
+ erofs_workgroup_put(EROFS_I_SB(fe->inode), &pcl->obj, false);
fe->pcl = NULL;
}
@@ -1046,6 +1046,9 @@ struct z_erofs_decompress_backend {
struct list_head decompressed_secondary_bvecs;
struct page **pagepool;
unsigned int onstack_used, nr_pages;
+
+ /* whether the pcluster can be released after its decompression */
+ bool try_free;
};
struct z_erofs_bvec_item {
@@ -1244,12 +1247,15 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL);
put_page(page);
} else {
+ be->try_free = true;
/* managed folios are still left in compressed_bvecs[] */
for (i = 0; i < pclusterpages; ++i) {
page = be->compressed_pages[i];
if (!page ||
- erofs_folio_is_managed(sbi, page_folio(page)))
+ erofs_folio_is_managed(sbi, page_folio(page))) {
+ be->try_free = false;
continue;
+ }
(void)z_erofs_put_shortlivedpage(be->pagepool, page);
WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
}
@@ -1285,6 +1291,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
if (be->decompressed_pages != be->onstack_pages)
kvfree(be->decompressed_pages);
+ be->try_free = be->try_free && !pcl->partial;
pcl->length = 0;
pcl->partial = true;
pcl->multibases = false;
@@ -1320,7 +1327,8 @@ static int z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
if (z_erofs_is_inline_pcluster(be.pcl))
z_erofs_free_pcluster(be.pcl);
else
- erofs_workgroup_put(&be.pcl->obj);
+ erofs_workgroup_put(EROFS_SB(io->sb), &be.pcl->obj,
+ be.try_free);
}
return err;
}
diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c
index 37afe2024840..cf59ba6a2322 100644
--- a/fs/erofs/zutil.c
+++ b/fs/erofs/zutil.c
@@ -285,26 +285,11 @@ static void __erofs_workgroup_free(struct erofs_workgroup *grp)
erofs_workgroup_free_rcu(grp);
}
-void erofs_workgroup_put(struct erofs_workgroup *grp)
-{
- if (lockref_put_or_lock(&grp->lockref))
- return;
-
- DBG_BUGON(__lockref_is_dead(&grp->lockref));
- if (grp->lockref.count == 1)
- atomic_long_inc(&erofs_global_shrink_cnt);
- --grp->lockref.count;
- spin_unlock(&grp->lockref.lock);
-}
-
-static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+static bool erofs_prepare_to_release_workgroup(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp)
{
- int free = false;
-
- spin_lock(&grp->lockref.lock);
if (grp->lockref.count)
- goto out;
+ return false;
/*
* Note that all cached pages should be detached before deleted from
@@ -312,7 +297,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
* the orphan old workgroup when the new one is available in the tree.
*/
if (erofs_try_to_free_all_cached_folios(sbi, grp))
- goto out;
+ return false;
/*
* It's impossible to fail after the workgroup is freezed,
@@ -322,14 +307,47 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
lockref_mark_dead(&grp->lockref);
- free = true;
-out:
+ return true;
+}
+
+static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+ struct erofs_workgroup *grp)
+{
+ bool free = false;
+
+ /* Using trylock to avoid deadlock with erofs_workgroup_put() */
+ if (!spin_trylock(&grp->lockref.lock))
+ return free;
+ free = erofs_prepare_to_release_workgroup(sbi, grp);
spin_unlock(&grp->lockref.lock);
if (free)
__erofs_workgroup_free(grp);
return free;
}
+void erofs_workgroup_put(struct erofs_sb_info *sbi, struct erofs_workgroup *grp,
+ bool try_free)
+{
+ bool free = false;
+
+ if (lockref_put_or_lock(&grp->lockref))
+ return;
+
+ DBG_BUGON(__lockref_is_dead(&grp->lockref));
+ if (--grp->lockref.count == 0) {
+ atomic_long_inc(&erofs_global_shrink_cnt);
+
+ if (try_free) {
+ xa_lock(&sbi->managed_pslots);
+ free = erofs_prepare_to_release_workgroup(sbi, grp);
+ xa_unlock(&sbi->managed_pslots);
+ }
+ }
+ spin_unlock(&grp->lockref.lock);
+ if (free)
+ __erofs_workgroup_free(grp);
+}
+
static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
unsigned long nr_shrink)
{
--
2.25.1
Hi Chunhai, Thanks for the work! Please rebase this work on my "sunset z_erofs_workgroup` series. On 2024/9/30 22:04, Chunhai Guo wrote: > Once a pcluster is fully decompressed and there are no attached cached > pages, its corresponding struct z_erofs_pcluster will be freed. This Subject: free pclusters if no cached folio attached cached folios, its corresponding `struct z_erofs_pcluster`... > will significantly reduce the frequency of calls to erofs_shrink_scan() > and the memory allocated for struct z_erofs_pcluster. > > The tables below show approximately a 95% reduction in the calls to > erofs_shrink_scan() and in the memory allocated for struct for `struct z_erofs_pcluster` > z_erofs_pcluster after applying this patch. The results were obtained by > performing a test to copy a 2.1 GB partition on ARM64 Android devices > running the 5.15 kernel with an 8-core CPU and 8GB of memory. I guess you could try to use more recent kernels for testing instead? > > 1. The reduction in calls to erofs_shrink_scan(): > +-----------------+-----------+----------+---------+ > | | w/o patch | w/ patch | diff | > +-----------------+-----------+----------+---------+ > | Average (times) | 3152 | 160 | -94.92% | > +-----------------+-----------+----------+---------+ > > 2. The reduction in memory released by erofs_shrink_scan(): > +-----------------+-----------+----------+---------+ > | | w/o patch | w/ patch | diff | > +-----------------+-----------+----------+---------+ > | Average (Byte) | 44503200 | 2293760 | -94.84% | > +-----------------+-----------+----------+---------+ > > Signed-off-by: Chunhai Guo <guochunhai@vivo.com> > --- > fs/erofs/internal.h | 3 ++- > fs/erofs/zdata.c | 14 ++++++++--- > fs/erofs/zutil.c | 58 +++++++++++++++++++++++++++++---------------- > 3 files changed, 51 insertions(+), 24 deletions(-) > > diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h > index 4efd578d7c62..17b04bfd743f 100644 > --- a/fs/erofs/internal.h > +++ b/fs/erofs/internal.h > @@ -456,7 +456,8 @@ static inline void erofs_pagepool_add(struct page **pagepool, struct page *page) > void erofs_release_pages(struct page **pagepool); > > #ifdef CONFIG_EROFS_FS_ZIP > -void erofs_workgroup_put(struct erofs_workgroup *grp); > +void erofs_workgroup_put(struct erofs_sb_info *sbi, struct erofs_workgroup *grp, > + bool can_released); > struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, > pgoff_t index); > struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, > diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c > index 8936790618c6..656fd65aec33 100644 > --- a/fs/erofs/zdata.c > +++ b/fs/erofs/zdata.c > @@ -888,7 +888,7 @@ static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe) > * any longer if the pcluster isn't hosted by ourselves. > */ > if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE) > - erofs_workgroup_put(&pcl->obj); > + erofs_workgroup_put(EROFS_I_SB(fe->inode), &pcl->obj, false); > > fe->pcl = NULL; > } > @@ -1046,6 +1046,9 @@ struct z_erofs_decompress_backend { > struct list_head decompressed_secondary_bvecs; > struct page **pagepool; > unsigned int onstack_used, nr_pages; > + > + /* whether the pcluster can be released after its decompression */ > + bool try_free; > }; > > struct z_erofs_bvec_item { > @@ -1244,12 +1247,15 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, > WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL); > put_page(page); > } else { > + be->try_free = true; > /* managed folios are still left in compressed_bvecs[] */ > for (i = 0; i < pclusterpages; ++i) { > page = be->compressed_pages[i]; > if (!page || > - erofs_folio_is_managed(sbi, page_folio(page))) > + erofs_folio_is_managed(sbi, page_folio(page))) { > + be->try_free = false; > continue; > + } > (void)z_erofs_put_shortlivedpage(be->pagepool, page); > WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); > } > @@ -1285,6 +1291,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, > if (be->decompressed_pages != be->onstack_pages) > kvfree(be->decompressed_pages); > > + be->try_free = be->try_free && !pcl->partial; I think no need to check `pcl->partial`. > pcl->length = 0; > pcl->partial = true; > pcl->multibases = false; > @@ -1320,7 +1327,8 @@ static int z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, > if (z_erofs_is_inline_pcluster(be.pcl)) > z_erofs_free_pcluster(be.pcl); > else > - erofs_workgroup_put(&be.pcl->obj); > + erofs_workgroup_put(EROFS_SB(io->sb), &be.pcl->obj, > + be.try_free); We could just move if (z_erofs_is_inline_pcluster(be.pcl)) z_erofs_free_pcluster(be.pcl); else z_erofs_put_pcluster(be.pcl); into the end of z_erofs_decompress_pcluster() and get rid of `be->try_free`; > } > return err; > } > diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c > index 37afe2024840..cf59ba6a2322 100644 > --- a/fs/erofs/zutil.c > +++ b/fs/erofs/zutil.c > @@ -285,26 +285,11 @@ static void __erofs_workgroup_free(struct erofs_workgroup *grp) > erofs_workgroup_free_rcu(grp); > } > > -void erofs_workgroup_put(struct erofs_workgroup *grp) > -{ > - if (lockref_put_or_lock(&grp->lockref)) > - return; > - > - DBG_BUGON(__lockref_is_dead(&grp->lockref)); > - if (grp->lockref.count == 1) > - atomic_long_inc(&erofs_global_shrink_cnt); > - --grp->lockref.count; > - spin_unlock(&grp->lockref.lock); > -} > - > -static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, > +static bool erofs_prepare_to_release_workgroup(struct erofs_sb_info *sbi, > struct erofs_workgroup *grp) > { > - int free = false; > - > - spin_lock(&grp->lockref.lock); > if (grp->lockref.count) > - goto out; > + return false; > > /* > * Note that all cached pages should be detached before deleted from > @@ -312,7 +297,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, > * the orphan old workgroup when the new one is available in the tree. > */ > if (erofs_try_to_free_all_cached_folios(sbi, grp)) > - goto out; > + return false; > > /* > * It's impossible to fail after the workgroup is freezed, > @@ -322,14 +307,47 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, > DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp); > > lockref_mark_dead(&grp->lockref); > - free = true; > -out: > + return true; > +} > + > +static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, > + struct erofs_workgroup *grp) > +{ > + bool free = false; > + > + /* Using trylock to avoid deadlock with erofs_workgroup_put() */ > + if (!spin_trylock(&grp->lockref.lock)) > + return free; > + free = erofs_prepare_to_release_workgroup(sbi, grp); > spin_unlock(&grp->lockref.lock); > if (free) > __erofs_workgroup_free(grp); > return free; > } > > +void erofs_workgroup_put(struct erofs_sb_info *sbi, struct erofs_workgroup *grp, > + bool try_free) > +{ > + bool free = false; > + > + if (lockref_put_or_lock(&grp->lockref)) > + return; > + > + DBG_BUGON(__lockref_is_dead(&grp->lockref)); > + if (--grp->lockref.count == 0) { > + atomic_long_inc(&erofs_global_shrink_cnt); > + > + if (try_free) { > + xa_lock(&sbi->managed_pslots); > + free = erofs_prepare_to_release_workgroup(sbi, grp); > + xa_unlock(&sbi->managed_pslots); > + } > + } > + spin_unlock(&grp->lockref.lock); > + if (free) > + __erofs_workgroup_free(grp); need to wait for a RCU grace period. Thanks, Gao Xiang
© 2016 - 2024 Red Hat, Inc.