zswap_entry_free() performs four types of cleanups before freeing a
zswap_entry:
- Deletes the entry from the LRU.
- Frees compressed memory.
- Puts the pool reference.
- Uncharges the compressed memory and puts the objcg.
zswap_entry_free() always expects a fully initialized entry. Allow
zswap_entry_free() to handle partially initialized entries by making it
possible to identify what cleanups are needed as follows:
- Allocate entries with __GFP_ZERO and initialize zswap_entry.lru when
the entry is allocated. Points are NULL and length is zero upon
initialization.
- Use zswap_entry.length to identify if there is compressed memory to
free. This is possible now that zero-filled pages are handled
separately, so a length of zero means we did not successfully compress
the page.
- Only initialize entry->objcg after the memory is charged in
zswap_store().
With this in place, use zswap_entry_free() in the failure path of
zswap_store() to cleanup partially initialized entries. This simplifies
the cleanup code in zswap_store(). While we are at it, rename the
remaining cleanup labels to more meaningful names.
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
---
mm/zswap.c | 62 ++++++++++++++++++++++++++----------------------------
1 file changed, 30 insertions(+), 32 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index 9357328d940af..c50f9df230ca3 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -774,12 +774,13 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
**********************************/
static struct kmem_cache *zswap_entry_cache;
-static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
+static struct zswap_entry *zswap_entry_cache_alloc(int nid)
{
struct zswap_entry *entry;
- entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
- if (!entry)
- return NULL;
+ entry = kmem_cache_alloc_node(zswap_entry_cache,
+ GFP_KERNEL | __GFP_ZERO, nid);
+ if (entry)
+ INIT_LIST_HEAD(&entry->lru);
return entry;
}
@@ -795,9 +796,12 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
static void zswap_entry_free(struct zswap_entry *entry)
{
- zswap_lru_del(&zswap_list_lru, entry);
- zpool_free(zswap_find_zpool(entry), entry->handle);
- zswap_pool_put(entry->pool);
+ if (!list_empty(&entry->lru))
+ zswap_lru_del(&zswap_list_lru, entry);
+ if (entry->length)
+ zpool_free(zswap_find_zpool(entry), entry->handle);
+ if (entry->pool)
+ zswap_pool_put(entry->pool);
if (entry->objcg) {
obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
obj_cgroup_put(entry->objcg);
@@ -1447,7 +1451,7 @@ bool zswap_store(struct folio *folio)
return false;
if (!zswap_enabled)
- goto check_old;
+ goto erase_old;
/* Check cgroup limits */
objcg = get_obj_cgroup_from_folio(folio);
@@ -1455,54 +1459,52 @@ bool zswap_store(struct folio *folio)
memcg = get_mem_cgroup_from_objcg(objcg);
if (shrink_memcg(memcg)) {
mem_cgroup_put(memcg);
- goto reject;
+ goto put_objcg;
}
mem_cgroup_put(memcg);
}
if (zswap_is_folio_zero_filled(folio)) {
if (zswap_store_zero_filled(tree, offset, objcg))
- goto reject;
+ goto put_objcg;
goto stored;
}
if (!zswap_non_zero_filled_pages_enabled)
- goto reject;
+ goto put_objcg;
if (!zswap_check_limit())
- goto reject;
+ goto put_objcg;
- entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
+ entry = zswap_entry_cache_alloc(folio_nid(folio));
if (!entry) {
zswap_reject_kmemcache_fail++;
- goto reject;
+ goto put_objcg;
}
- /* if entry is successfully added, it keeps the reference */
entry->pool = zswap_pool_current_get();
if (!entry->pool)
- goto freepage;
+ goto free_entry;
if (objcg) {
memcg = get_mem_cgroup_from_objcg(objcg);
if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) {
mem_cgroup_put(memcg);
- goto put_pool;
+ goto free_entry;
}
mem_cgroup_put(memcg);
}
if (!zswap_compress(folio, entry))
- goto put_pool;
-
- entry->swpentry = swp;
- entry->objcg = objcg;
+ goto free_entry;
if (zswap_tree_store(tree, offset, entry))
- goto store_failed;
+ goto free_entry;
- if (objcg)
+ if (objcg) {
obj_cgroup_charge_zswap(objcg, entry->length);
+ entry->objcg = objcg;
+ }
/*
* We finish initializing the entry while it's already in xarray.
@@ -1514,7 +1516,7 @@ bool zswap_store(struct folio *folio)
* The publishing order matters to prevent writeback from seeing
* an incoherent entry.
*/
- INIT_LIST_HEAD(&entry->lru);
+ entry->swpentry = swp;
zswap_lru_add(&zswap_list_lru, entry);
stored:
@@ -1525,17 +1527,13 @@ bool zswap_store(struct folio *folio)
return true;
-store_failed:
- zpool_free(zswap_find_zpool(entry), entry->handle);
-put_pool:
- zswap_pool_put(entry->pool);
-freepage:
- zswap_entry_cache_free(entry);
-reject:
+free_entry:
+ zswap_entry_free(entry);
+put_objcg:
obj_cgroup_put(objcg);
if (zswap_pool_reached_full)
queue_work(shrink_wq, &zswap_shrink_work);
-check_old:
+erase_old:
/*
* If the zswap store fails or zswap is disabled, we must invalidate the
* possibly stale entry which was previously stored at this offset.
--
2.44.0.396.g6e790dbe36-goog
On 2024/3/26 07:50, Yosry Ahmed wrote:
> zswap_entry_free() performs four types of cleanups before freeing a
> zswap_entry:
> - Deletes the entry from the LRU.
> - Frees compressed memory.
> - Puts the pool reference.
> - Uncharges the compressed memory and puts the objcg.
>
> zswap_entry_free() always expects a fully initialized entry. Allow
> zswap_entry_free() to handle partially initialized entries by making it
> possible to identify what cleanups are needed as follows:
> - Allocate entries with __GFP_ZERO and initialize zswap_entry.lru when
> the entry is allocated. Points are NULL and length is zero upon
> initialization.
> - Use zswap_entry.length to identify if there is compressed memory to
> free. This is possible now that zero-filled pages are handled
> separately, so a length of zero means we did not successfully compress
> the page.
> - Only initialize entry->objcg after the memory is charged in
> zswap_store().
>
> With this in place, use zswap_entry_free() in the failure path of
> zswap_store() to cleanup partially initialized entries. This simplifies
> the cleanup code in zswap_store(). While we are at it, rename the
> remaining cleanup labels to more meaningful names.
Not sure if it's worthwhile to clean up the fail path with the normal path
gets a little verbose.
>
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> ---
> mm/zswap.c | 62 ++++++++++++++++++++++++++----------------------------
> 1 file changed, 30 insertions(+), 32 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 9357328d940af..c50f9df230ca3 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -774,12 +774,13 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
> **********************************/
> static struct kmem_cache *zswap_entry_cache;
>
> -static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
> +static struct zswap_entry *zswap_entry_cache_alloc(int nid)
> {
> struct zswap_entry *entry;
> - entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
> - if (!entry)
> - return NULL;
> + entry = kmem_cache_alloc_node(zswap_entry_cache,
> + GFP_KERNEL | __GFP_ZERO, nid);
> + if (entry)
> + INIT_LIST_HEAD(&entry->lru);
> return entry;
> }
>
> @@ -795,9 +796,12 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
>
> static void zswap_entry_free(struct zswap_entry *entry)
> {
> - zswap_lru_del(&zswap_list_lru, entry);
> - zpool_free(zswap_find_zpool(entry), entry->handle);
> - zswap_pool_put(entry->pool);
> + if (!list_empty(&entry->lru))
> + zswap_lru_del(&zswap_list_lru, entry);
> + if (entry->length)
> + zpool_free(zswap_find_zpool(entry), entry->handle);
> + if (entry->pool)
> + zswap_pool_put(entry->pool);
> if (entry->objcg) {
> obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
> obj_cgroup_put(entry->objcg);
> @@ -1447,7 +1451,7 @@ bool zswap_store(struct folio *folio)
> return false;
>
> if (!zswap_enabled)
> - goto check_old;
> + goto erase_old;
>
> /* Check cgroup limits */
> objcg = get_obj_cgroup_from_folio(folio);
> @@ -1455,54 +1459,52 @@ bool zswap_store(struct folio *folio)
> memcg = get_mem_cgroup_from_objcg(objcg);
> if (shrink_memcg(memcg)) {
> mem_cgroup_put(memcg);
> - goto reject;
> + goto put_objcg;
> }
> mem_cgroup_put(memcg);
> }
>
> if (zswap_is_folio_zero_filled(folio)) {
> if (zswap_store_zero_filled(tree, offset, objcg))
> - goto reject;
> + goto put_objcg;
> goto stored;
> }
>
> if (!zswap_non_zero_filled_pages_enabled)
> - goto reject;
> + goto put_objcg;
>
> if (!zswap_check_limit())
> - goto reject;
> + goto put_objcg;
>
> - entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
> + entry = zswap_entry_cache_alloc(folio_nid(folio));
> if (!entry) {
> zswap_reject_kmemcache_fail++;
> - goto reject;
> + goto put_objcg;
> }
>
> - /* if entry is successfully added, it keeps the reference */
> entry->pool = zswap_pool_current_get();
> if (!entry->pool)
> - goto freepage;
> + goto free_entry;
>
> if (objcg) {
> memcg = get_mem_cgroup_from_objcg(objcg);
> if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) {
> mem_cgroup_put(memcg);
> - goto put_pool;
> + goto free_entry;
> }
> mem_cgroup_put(memcg);
> }
>
> if (!zswap_compress(folio, entry))
> - goto put_pool;
> -
> - entry->swpentry = swp;
> - entry->objcg = objcg;
> + goto free_entry;
>
> if (zswap_tree_store(tree, offset, entry))
> - goto store_failed;
> + goto free_entry;
>
> - if (objcg)
> + if (objcg) {
> obj_cgroup_charge_zswap(objcg, entry->length);
> + entry->objcg = objcg;
> + }
>
> /*
> * We finish initializing the entry while it's already in xarray.
> @@ -1514,7 +1516,7 @@ bool zswap_store(struct folio *folio)
> * The publishing order matters to prevent writeback from seeing
> * an incoherent entry.
> */
> - INIT_LIST_HEAD(&entry->lru);
> + entry->swpentry = swp;
> zswap_lru_add(&zswap_list_lru, entry);
>
> stored:
> @@ -1525,17 +1527,13 @@ bool zswap_store(struct folio *folio)
>
> return true;
>
> -store_failed:
> - zpool_free(zswap_find_zpool(entry), entry->handle);
> -put_pool:
> - zswap_pool_put(entry->pool);
> -freepage:
> - zswap_entry_cache_free(entry);
> -reject:
> +free_entry:
> + zswap_entry_free(entry);
> +put_objcg:
> obj_cgroup_put(objcg);
> if (zswap_pool_reached_full)
> queue_work(shrink_wq, &zswap_shrink_work);
> -check_old:
> +erase_old:
> /*
> * If the zswap store fails or zswap is disabled, we must invalidate the
> * possibly stale entry which was previously stored at this offset.
On Thu, Mar 28, 2024 at 1:31 AM Chengming Zhou <chengming.zhou@linux.dev> wrote:
>
> On 2024/3/26 07:50, Yosry Ahmed wrote:
> > zswap_entry_free() performs four types of cleanups before freeing a
> > zswap_entry:
> > - Deletes the entry from the LRU.
> > - Frees compressed memory.
> > - Puts the pool reference.
> > - Uncharges the compressed memory and puts the objcg.
> >
> > zswap_entry_free() always expects a fully initialized entry. Allow
> > zswap_entry_free() to handle partially initialized entries by making it
> > possible to identify what cleanups are needed as follows:
> > - Allocate entries with __GFP_ZERO and initialize zswap_entry.lru when
> > the entry is allocated. Points are NULL and length is zero upon
> > initialization.
> > - Use zswap_entry.length to identify if there is compressed memory to
> > free. This is possible now that zero-filled pages are handled
> > separately, so a length of zero means we did not successfully compress
> > the page.
> > - Only initialize entry->objcg after the memory is charged in
> > zswap_store().
> >
> > With this in place, use zswap_entry_free() in the failure path of
> > zswap_store() to cleanup partially initialized entries. This simplifies
> > the cleanup code in zswap_store(). While we are at it, rename the
> > remaining cleanup labels to more meaningful names.
>
> Not sure if it's worthwhile to clean up the fail path with the normal path
> gets a little verbose.
I was on the fence about this, so I thought I would just send it and
see what others think.
On one hand it makes the initialization more robust because the
zswap_entry is always in a clean identifiable state, but on the other
hand it adds churn to the normal path as you noticed. Also after
removing handling zero-length entries from the failure path it isn't
that bad without this patch anyway.
So if no one else thinks this is useful, I will drop the patch in the
next version.
Thanks!
>
> >
> > Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> > ---
> > mm/zswap.c | 62 ++++++++++++++++++++++++++----------------------------
> > 1 file changed, 30 insertions(+), 32 deletions(-)
> >
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index 9357328d940af..c50f9df230ca3 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -774,12 +774,13 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
> > **********************************/
> > static struct kmem_cache *zswap_entry_cache;
> >
> > -static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
> > +static struct zswap_entry *zswap_entry_cache_alloc(int nid)
> > {
> > struct zswap_entry *entry;
> > - entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
> > - if (!entry)
> > - return NULL;
> > + entry = kmem_cache_alloc_node(zswap_entry_cache,
> > + GFP_KERNEL | __GFP_ZERO, nid);
> > + if (entry)
> > + INIT_LIST_HEAD(&entry->lru);
> > return entry;
> > }
> >
> > @@ -795,9 +796,12 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> >
> > static void zswap_entry_free(struct zswap_entry *entry)
> > {
> > - zswap_lru_del(&zswap_list_lru, entry);
> > - zpool_free(zswap_find_zpool(entry), entry->handle);
> > - zswap_pool_put(entry->pool);
> > + if (!list_empty(&entry->lru))
> > + zswap_lru_del(&zswap_list_lru, entry);
> > + if (entry->length)
> > + zpool_free(zswap_find_zpool(entry), entry->handle);
> > + if (entry->pool)
> > + zswap_pool_put(entry->pool);
> > if (entry->objcg) {
> > obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
> > obj_cgroup_put(entry->objcg);
> > @@ -1447,7 +1451,7 @@ bool zswap_store(struct folio *folio)
> > return false;
> >
> > if (!zswap_enabled)
> > - goto check_old;
> > + goto erase_old;
> >
> > /* Check cgroup limits */
> > objcg = get_obj_cgroup_from_folio(folio);
> > @@ -1455,54 +1459,52 @@ bool zswap_store(struct folio *folio)
> > memcg = get_mem_cgroup_from_objcg(objcg);
> > if (shrink_memcg(memcg)) {
> > mem_cgroup_put(memcg);
> > - goto reject;
> > + goto put_objcg;
> > }
> > mem_cgroup_put(memcg);
> > }
> >
> > if (zswap_is_folio_zero_filled(folio)) {
> > if (zswap_store_zero_filled(tree, offset, objcg))
> > - goto reject;
> > + goto put_objcg;
> > goto stored;
> > }
> >
> > if (!zswap_non_zero_filled_pages_enabled)
> > - goto reject;
> > + goto put_objcg;
> >
> > if (!zswap_check_limit())
> > - goto reject;
> > + goto put_objcg;
> >
> > - entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
> > + entry = zswap_entry_cache_alloc(folio_nid(folio));
> > if (!entry) {
> > zswap_reject_kmemcache_fail++;
> > - goto reject;
> > + goto put_objcg;
> > }
> >
> > - /* if entry is successfully added, it keeps the reference */
> > entry->pool = zswap_pool_current_get();
> > if (!entry->pool)
> > - goto freepage;
> > + goto free_entry;
> >
> > if (objcg) {
> > memcg = get_mem_cgroup_from_objcg(objcg);
> > if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) {
> > mem_cgroup_put(memcg);
> > - goto put_pool;
> > + goto free_entry;
> > }
> > mem_cgroup_put(memcg);
> > }
> >
> > if (!zswap_compress(folio, entry))
> > - goto put_pool;
> > -
> > - entry->swpentry = swp;
> > - entry->objcg = objcg;
> > + goto free_entry;
> >
> > if (zswap_tree_store(tree, offset, entry))
> > - goto store_failed;
> > + goto free_entry;
> >
> > - if (objcg)
> > + if (objcg) {
> > obj_cgroup_charge_zswap(objcg, entry->length);
> > + entry->objcg = objcg;
> > + }
> >
> > /*
> > * We finish initializing the entry while it's already in xarray.
> > @@ -1514,7 +1516,7 @@ bool zswap_store(struct folio *folio)
> > * The publishing order matters to prevent writeback from seeing
> > * an incoherent entry.
> > */
> > - INIT_LIST_HEAD(&entry->lru);
> > + entry->swpentry = swp;
> > zswap_lru_add(&zswap_list_lru, entry);
> >
> > stored:
> > @@ -1525,17 +1527,13 @@ bool zswap_store(struct folio *folio)
> >
> > return true;
> >
> > -store_failed:
> > - zpool_free(zswap_find_zpool(entry), entry->handle);
> > -put_pool:
> > - zswap_pool_put(entry->pool);
> > -freepage:
> > - zswap_entry_cache_free(entry);
> > -reject:
> > +free_entry:
> > + zswap_entry_free(entry);
> > +put_objcg:
> > obj_cgroup_put(objcg);
> > if (zswap_pool_reached_full)
> > queue_work(shrink_wq, &zswap_shrink_work);
> > -check_old:
> > +erase_old:
> > /*
> > * If the zswap store fails or zswap is disabled, we must invalidate the
> > * possibly stale entry which was previously stored at this offset.
© 2016 - 2026 Red Hat, Inc.