[v4] page_io: zswap: do not crash the kernel on decompression failure

[PATCH v4] page_io: zswap: do not crash the kernel on decompression failure

Posted by Nhat Pham 11 months, 1 week ago

Currently, we crash the kernel when a decompression failure occurs in
zswap (either because of memory corruption, or a bug in the compression
algorithm). This is overkill. We should only SIGBUS the unfortunate
process asking for the zswap entry on zswap load, and skip the corrupted
entry in zswap writeback.

See [1] for a recent upstream discussion about this.

The zswap writeback case is relatively straightforward to fix. For the
zswap_load() case, we change the return behavior:

* Return 0 on success.
* Return -ENOENT (with the folio locked) if zswap does not own the
  swapped out content.
* Return -EIO if zswap owns the swapped out content, but encounters a
  decompression failure for some reasons. The folio will be unlocked,
  but not be marked up-to-date, which will eventually cause the process
  requesting the page to SIGBUS (see the handling of not-up-to-date
  folio in do_swap_page() in mm/memory.c), without crashing the kernel.
* Return -EINVAL if we encounter a large folio, as large folio should
  not be swapped in while zswap is being used. Similar to the -EIO case,
  we also unlock the folio but do not mark it as up-to-date to SIGBUS
  the faulting process.

As a side effect, we require one extra zswap tree traversal in the load
and writeback paths. Quick benchmarking on a kernel build test shows no
performance difference:

With the new scheme:
real: mean: 125.1s, stdev: 0.12s
user: mean: 3265.23s, stdev: 9.62s
sys: mean: 2156.41s, stdev: 13.98s

The old scheme:
real: mean: 125.78s, stdev: 0.45s
user: mean: 3287.18s, stdev: 5.95s
sys: mean: 2177.08s, stdev: 26.52s

[1]: https://lore.kernel.org/all/ZsiLElTykamcYZ6J@casper.infradead.org/

Suggested-by: Matthew Wilcox <willy@infradead.org>
Suggested-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
 include/linux/zswap.h |   6 +--
 mm/page_io.c          |   6 +--
 mm/zswap.c            | 118 +++++++++++++++++++++++++++++-------------
 3 files changed, 87 insertions(+), 43 deletions(-)

diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index d961ead91bf1..30c193a1207e 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -26,7 +26,7 @@ struct zswap_lruvec_state {
 
 unsigned long zswap_total_pages(void);
 bool zswap_store(struct folio *folio);
-bool zswap_load(struct folio *folio);
+int zswap_load(struct folio *folio);
 void zswap_invalidate(swp_entry_t swp);
 int zswap_swapon(int type, unsigned long nr_pages);
 void zswap_swapoff(int type);
@@ -44,9 +44,9 @@ static inline bool zswap_store(struct folio *folio)
 	return false;
 }
 
-static inline bool zswap_load(struct folio *folio)
+static inline int zswap_load(struct folio *folio)
 {
-	return false;
+	return -ENOENT;
 }
 
 static inline void zswap_invalidate(swp_entry_t swp) {}
diff --git a/mm/page_io.c b/mm/page_io.c
index 9b983de351f9..4bce19df557b 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -638,11 +638,11 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
 	if (swap_read_folio_zeromap(folio)) {
 		folio_unlock(folio);
 		goto finish;
-	} else if (zswap_load(folio)) {
-		folio_unlock(folio);
-		goto finish;
 	}
 
+	if (zswap_load(folio) != -ENOENT)
+		goto finish;
+
 	/* We have to read from slower devices. Increase zswap protection. */
 	zswap_folio_swapin(folio);
 
diff --git a/mm/zswap.c b/mm/zswap.c
index 138b50ba832b..799b22c19b5e 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -62,6 +62,8 @@ static u64 zswap_reject_reclaim_fail;
 static u64 zswap_reject_compress_fail;
 /* Compressed page was too big for the allocator to (optimally) store */
 static u64 zswap_reject_compress_poor;
+/* Load or writeback failed due to decompression failure */
+static u64 zswap_decompress_fail;
 /* Store failed because underlying allocator could not get memory */
 static u64 zswap_reject_alloc_fail;
 /* Store failed because the entry metadata could not be allocated (rare) */
@@ -985,11 +987,12 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
 	return comp_ret == 0 && alloc_ret == 0;
 }
 
-static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
+static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 {
 	struct zpool *zpool = entry->pool->zpool;
 	struct scatterlist input, output;
 	struct crypto_acomp_ctx *acomp_ctx;
+	int decomp_ret, dlen;
 	u8 *src, *obj;
 
 	acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool);
@@ -1012,11 +1015,21 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 	sg_init_table(&output, 1);
 	sg_set_folio(&output, folio, PAGE_SIZE, 0);
 	acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
-	BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
-	BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
+	decomp_ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
+	dlen = acomp_ctx->req->dlen;
 
 	zpool_obj_read_end(zpool, entry->handle, obj);
 	acomp_ctx_put_unlock(acomp_ctx);
+
+	if (!decomp_ret && dlen == PAGE_SIZE)
+		return true;
+
+	zswap_decompress_fail++;
+	pr_alert_ratelimited("Decompression error from zswap (%d:%lu %s %u->%d)\n",
+						swp_type(entry->swpentry),
+						swp_offset(entry->swpentry),
+						entry->pool->tfm_name, entry->length, dlen);
+	return false;
 }
 
 /*********************************
@@ -1046,6 +1059,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_NONE,
 	};
+	int ret = 0;
 
 	/* try to allocate swap cache folio */
 	si = get_swap_device(swpentry);
@@ -1067,8 +1081,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
 	 * and freed when invalidated by the concurrent shrinker anyway.
 	 */
 	if (!folio_was_allocated) {
-		folio_put(folio);
-		return -EEXIST;
+		ret = -EEXIST;
+		goto out;
 	}
 
 	/*
@@ -1081,14 +1095,17 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
 	 * be dereferenced.
 	 */
 	tree = swap_zswap_tree(swpentry);
-	if (entry != xa_cmpxchg(tree, offset, entry, NULL, GFP_KERNEL)) {
-		delete_from_swap_cache(folio);
-		folio_unlock(folio);
-		folio_put(folio);
-		return -ENOMEM;
+	if (entry != xa_load(tree, offset)) {
+		ret = -ENOMEM;
+		goto out;
 	}
 
-	zswap_decompress(entry, folio);
+	if (!zswap_decompress(entry, folio)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	xa_erase(tree, offset);
 
 	count_vm_event(ZSWPWB);
 	if (entry->objcg)
@@ -1104,9 +1121,14 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
 
 	/* start writeback */
 	__swap_writepage(folio, &wbc);
-	folio_put(folio);
 
-	return 0;
+out:
+	if (ret && ret != -EEXIST) {
+		delete_from_swap_cache(folio);
+		folio_unlock(folio);
+	}
+	folio_put(folio);
+	return ret;
 }
 
 /*********************************
@@ -1606,7 +1628,26 @@ bool zswap_store(struct folio *folio)
 	return ret;
 }
 
-bool zswap_load(struct folio *folio)
+/**
+ * zswap_load() - load a page from zswap
+ * @folio: folio to load
+ *
+ * Return: 0 on success, or one of the following error codes:
+ *
+ *  -EIO: if the swapped out content was in zswap, but could not be loaded
+ *  into the page due to a decompression failure. The folio is unlocked, but
+ *  NOT marked up-to-date, so that an IO error is emitted (e.g. do_swap_page()
+ *  will SIGBUS).
+ *
+ *  -EINVAL: if the swapped out content was in zswap, but the page belongs
+ *  to a large folio, which is not supported by zswap. The folio is unlocked,
+ *  but NOT marked up-to-date, so that an IO error is emitted (e.g.
+ *  do_swap_page() will SIGBUS).
+ *
+ *  -ENOENT: if the swapped out content was not in zswap. The folio remains
+ *  locked on return.
+ */
+int zswap_load(struct folio *folio)
 {
 	swp_entry_t swp = folio->swap;
 	pgoff_t offset = swp_offset(swp);
@@ -1617,18 +1658,32 @@ bool zswap_load(struct folio *folio)
 	VM_WARN_ON_ONCE(!folio_test_locked(folio));
 
 	if (zswap_never_enabled())
-		return false;
+		return -ENOENT;
 
 	/*
 	 * Large folios should not be swapped in while zswap is being used, as
 	 * they are not properly handled. Zswap does not properly load large
 	 * folios, and a large folio may only be partially in zswap.
-	 *
-	 * Return true without marking the folio uptodate so that an IO error is
-	 * emitted (e.g. do_swap_page() will sigbus).
 	 */
-	if (WARN_ON_ONCE(folio_test_large(folio)))
-		return true;
+	if (WARN_ON_ONCE(folio_test_large(folio))) {
+		folio_unlock(folio);
+		return -EINVAL;
+	}
+
+	entry = xa_load(tree, offset);
+	if (!entry)
+		return -ENOENT;
+
+	if (!zswap_decompress(entry, folio)) {
+		folio_unlock(folio);
+		return -EIO;
+	}
+
+	folio_mark_uptodate(folio);
+
+	count_vm_event(ZSWPIN);
+	if (entry->objcg)
+		count_objcg_events(entry->objcg, ZSWPIN, 1);
 
 	/*
 	 * When reading into the swapcache, invalidate our entry. The
@@ -1642,27 +1697,14 @@ bool zswap_load(struct folio *folio)
 	 * files, which reads into a private page and may free it if
 	 * the fault fails. We remain the primary owner of the entry.)
 	 */
-	if (swapcache)
-		entry = xa_erase(tree, offset);
-	else
-		entry = xa_load(tree, offset);
-
-	if (!entry)
-		return false;
-
-	zswap_decompress(entry, folio);
-
-	count_vm_event(ZSWPIN);
-	if (entry->objcg)
-		count_objcg_events(entry->objcg, ZSWPIN, 1);
-
 	if (swapcache) {
-		zswap_entry_free(entry);
 		folio_mark_dirty(folio);
+		xa_erase(tree, offset);
+		zswap_entry_free(entry);
 	}
 
-	folio_mark_uptodate(folio);
-	return true;
+	folio_unlock(folio);
+	return 0;
 }
 
 void zswap_invalidate(swp_entry_t swp)
@@ -1757,6 +1799,8 @@ static int zswap_debugfs_init(void)
 			   zswap_debugfs_root, &zswap_reject_compress_fail);
 	debugfs_create_u64("reject_compress_poor", 0444,
 			   zswap_debugfs_root, &zswap_reject_compress_poor);
+	debugfs_create_u64("decompress_fail", 0444,
+			   zswap_debugfs_root, &zswap_decompress_fail);
 	debugfs_create_u64("written_back_pages", 0444,
 			   zswap_debugfs_root, &zswap_written_back_pages);
 	debugfs_create_file("pool_total_size", 0444,

base-commit: 36768063c930199d5e01cde4d7a14fae3ccad51c
-- 
2.43.5

Re: [PATCH v4] page_io: zswap: do not crash the kernel on decompression failure

Posted by Matthew Wilcox 9 months ago

On Thu, Mar 06, 2025 at 12:50:10PM -0800, Nhat Pham wrote:
> -static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
> +static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)

Hm, why do it this way?  I had it as:

-static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
+static int zswap_decompress(struct zswap_entry *entry, struct folio *folio)
...
+       err = crypto_acomp_decompress(acomp_ctx->req);
+       err = crypto_wait_req(err, &acomp_ctx->wait);
+       if (!err && acomp_ctx->req->dlen != PAGE_SIZE)
+               err = -EIO;

which allows us to return something more meaningful than -EIO.  Or is
doing that a bad idea and we should squash all decompression failures
to EIO?

(also i really dislike the chained approach:

        decomp_ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);

that's much harder to understand than the two lines i have above)

Re: [PATCH v4] page_io: zswap: do not crash the kernel on decompression failure

Posted by Nhat Pham 9 months ago

On Mon, May 12, 2025 at 3:03 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Thu, Mar 06, 2025 at 12:50:10PM -0800, Nhat Pham wrote:
> > -static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
> > +static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
>
> Hm, why do it this way?  I had it as:
>
> -static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
> +static int zswap_decompress(struct zswap_entry *entry, struct folio *folio)
> ...
> +       err = crypto_acomp_decompress(acomp_ctx->req);
> +       err = crypto_wait_req(err, &acomp_ctx->wait);
> +       if (!err && acomp_ctx->req->dlen != PAGE_SIZE)
> +               err = -EIO;
>
> which allows us to return something more meaningful than -EIO.  Or is
> doing that a bad idea and we should squash all decompression failures
> to EIO?

Not a bad idea, just no usecase yet. From the POV of
zswap_decompress()'s callers, any decompression failure is handled the
same way, no matter the cause.

Where it might be useful is the debug print statement right below
this. However, zstd and lz4 only return 0 or -EINVAL, I think - not
super useful.

https://github.com/torvalds/linux/blob/master/crypto/zstd.c#L163-L165
https://github.com/torvalds/linux/blob/master/crypto/lz4.c#L61

Not sure about other compressors.

>
> (also i really dislike the chained approach:
>
>         decomp_ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
>
> that's much harder to understand than the two lines i have above)

I can send a style fix sometimes if people dislike this - no big deal :)

BTW, hopefully you don't mind the Suggested-by: tag. I know it
deviated a bit from your original suggestion, but the main spirit of
that suggestion remains, so I feel like I should credit you and Yosry.

Re: [PATCH v4] page_io: zswap: do not crash the kernel on decompression failure

Posted by Matthew Wilcox 9 months ago

On Mon, May 12, 2025 at 03:49:29PM -0400, Nhat Pham wrote:
> Not a bad idea, just no usecase yet. From the POV of
> zswap_decompress()'s callers, any decompression failure is handled the
> same way, no matter the cause.
> 
> Where it might be useful is the debug print statement right below
> this. However, zstd and lz4 only return 0 or -EINVAL, I think - not
> super useful.
> 
> https://github.com/torvalds/linux/blob/master/crypto/zstd.c#L163-L165
> https://github.com/torvalds/linux/blob/master/crypto/lz4.c#L61
> 
> Not sure about other compressors.

OK, fair enough.  If there's no better information to return, then
this way is fine.

> > (also i really dislike the chained approach:
> >
> >         decomp_ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
> >
> > that's much harder to understand than the two lines i have above)
> 
> I can send a style fix sometimes if people dislike this - no big deal :)
> 
> BTW, hopefully you don't mind the Suggested-by: tag. I know it
> deviated a bit from your original suggestion, but the main spirit of
> that suggestion remains, so I feel like I should credit you and Yosry.

The Suggested-by is appropriate, I think.

Re: [PATCH v4] page_io: zswap: do not crash the kernel on decompression failure

Posted by Chengming Zhou 11 months, 1 week ago

On 2025/3/7 04:50, Nhat Pham wrote:
> Currently, we crash the kernel when a decompression failure occurs in
> zswap (either because of memory corruption, or a bug in the compression
> algorithm). This is overkill. We should only SIGBUS the unfortunate
> process asking for the zswap entry on zswap load, and skip the corrupted
> entry in zswap writeback.
> 
> See [1] for a recent upstream discussion about this.
> 
> The zswap writeback case is relatively straightforward to fix. For the
> zswap_load() case, we change the return behavior:
> 
> * Return 0 on success.
> * Return -ENOENT (with the folio locked) if zswap does not own the
>    swapped out content.
> * Return -EIO if zswap owns the swapped out content, but encounters a
>    decompression failure for some reasons. The folio will be unlocked,
>    but not be marked up-to-date, which will eventually cause the process
>    requesting the page to SIGBUS (see the handling of not-up-to-date
>    folio in do_swap_page() in mm/memory.c), without crashing the kernel.
> * Return -EINVAL if we encounter a large folio, as large folio should
>    not be swapped in while zswap is being used. Similar to the -EIO case,
>    we also unlock the folio but do not mark it as up-to-date to SIGBUS
>    the faulting process.
> 
> As a side effect, we require one extra zswap tree traversal in the load
> and writeback paths. Quick benchmarking on a kernel build test shows no
> performance difference:
> 
> With the new scheme:
> real: mean: 125.1s, stdev: 0.12s
> user: mean: 3265.23s, stdev: 9.62s
> sys: mean: 2156.41s, stdev: 13.98s
> 
> The old scheme:
> real: mean: 125.78s, stdev: 0.45s
> user: mean: 3287.18s, stdev: 5.95s
> sys: mean: 2177.08s, stdev: 26.52s
> 
> [1]: https://lore.kernel.org/all/ZsiLElTykamcYZ6J@casper.infradead.org/
> 
> Suggested-by: Matthew Wilcox <willy@infradead.org>
> Suggested-by: Yosry Ahmed <yosry.ahmed@linux.dev>
> Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>

Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>

Thanks!

Re: [PATCH v4] page_io: zswap: do not crash the kernel on decompression failure

Posted by Johannes Weiner 11 months, 1 week ago

On Thu, Mar 06, 2025 at 12:50:10PM -0800, Nhat Pham wrote:
> Currently, we crash the kernel when a decompression failure occurs in
> zswap (either because of memory corruption, or a bug in the compression
> algorithm). This is overkill. We should only SIGBUS the unfortunate
> process asking for the zswap entry on zswap load, and skip the corrupted
> entry in zswap writeback.
> 
> See [1] for a recent upstream discussion about this.
> 
> The zswap writeback case is relatively straightforward to fix. For the
> zswap_load() case, we change the return behavior:
> 
> * Return 0 on success.
> * Return -ENOENT (with the folio locked) if zswap does not own the
>   swapped out content.
> * Return -EIO if zswap owns the swapped out content, but encounters a
>   decompression failure for some reasons. The folio will be unlocked,
>   but not be marked up-to-date, which will eventually cause the process
>   requesting the page to SIGBUS (see the handling of not-up-to-date
>   folio in do_swap_page() in mm/memory.c), without crashing the kernel.
> * Return -EINVAL if we encounter a large folio, as large folio should
>   not be swapped in while zswap is being used. Similar to the -EIO case,
>   we also unlock the folio but do not mark it as up-to-date to SIGBUS
>   the faulting process.
> 
> As a side effect, we require one extra zswap tree traversal in the load
> and writeback paths. Quick benchmarking on a kernel build test shows no
> performance difference:
> 
> With the new scheme:
> real: mean: 125.1s, stdev: 0.12s
> user: mean: 3265.23s, stdev: 9.62s
> sys: mean: 2156.41s, stdev: 13.98s
> 
> The old scheme:
> real: mean: 125.78s, stdev: 0.45s
> user: mean: 3287.18s, stdev: 5.95s
> sys: mean: 2177.08s, stdev: 26.52s
> 
> [1]: https://lore.kernel.org/all/ZsiLElTykamcYZ6J@casper.infradead.org/
> 
> Suggested-by: Matthew Wilcox <willy@infradead.org>
> Suggested-by: Yosry Ahmed <yosry.ahmed@linux.dev>
> Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

Re: [PATCH v4] page_io: zswap: do not crash the kernel on decompression failure

Posted by Yosry Ahmed 11 months, 1 week ago

On Thu, Mar 06, 2025 at 12:50:10PM -0800, Nhat Pham wrote:
> Currently, we crash the kernel when a decompression failure occurs in
> zswap (either because of memory corruption, or a bug in the compression
> algorithm). This is overkill. We should only SIGBUS the unfortunate
> process asking for the zswap entry on zswap load, and skip the corrupted
> entry in zswap writeback.
> 
> See [1] for a recent upstream discussion about this.
> 
> The zswap writeback case is relatively straightforward to fix. For the
> zswap_load() case, we change the return behavior:
> 
> * Return 0 on success.
> * Return -ENOENT (with the folio locked) if zswap does not own the
>   swapped out content.
> * Return -EIO if zswap owns the swapped out content, but encounters a
>   decompression failure for some reasons. The folio will be unlocked,
>   but not be marked up-to-date, which will eventually cause the process
>   requesting the page to SIGBUS (see the handling of not-up-to-date
>   folio in do_swap_page() in mm/memory.c), without crashing the kernel.
> * Return -EINVAL if we encounter a large folio, as large folio should
>   not be swapped in while zswap is being used. Similar to the -EIO case,
>   we also unlock the folio but do not mark it as up-to-date to SIGBUS
>   the faulting process.
> 
> As a side effect, we require one extra zswap tree traversal in the load
> and writeback paths. Quick benchmarking on a kernel build test shows no
> performance difference:
> 
> With the new scheme:
> real: mean: 125.1s, stdev: 0.12s
> user: mean: 3265.23s, stdev: 9.62s
> sys: mean: 2156.41s, stdev: 13.98s
> 
> The old scheme:
> real: mean: 125.78s, stdev: 0.45s
> user: mean: 3287.18s, stdev: 5.95s
> sys: mean: 2177.08s, stdev: 26.52s
> 
> [1]: https://lore.kernel.org/all/ZsiLElTykamcYZ6J@casper.infradead.org/
> 
> Suggested-by: Matthew Wilcox <willy@infradead.org>
> Suggested-by: Yosry Ahmed <yosry.ahmed@linux.dev>
> Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>

Couple of nits below, but otherwise LGTM:

Acked-by: Yosry Ahmed <yosry.ahmed@linux.dev>

(I did expect the swap zeromap change in the same series, so if you send
it separately make sure to mention it's on top of this one because they
will conflict otherwise)

[..]
> @@ -1606,7 +1628,26 @@ bool zswap_store(struct folio *folio)
>  	return ret;
>  }
>  
> -bool zswap_load(struct folio *folio)
> +/**
> + * zswap_load() - load a page from zswap

nit: folio

> + * @folio: folio to load
> + *
> + * Return: 0 on success, or one of the following error codes:

nit: Maybe worth mentioning that the folio is unlocked and marked
uptodate on success for completeness.

> + *
> + *  -EIO: if the swapped out content was in zswap, but could not be loaded
> + *  into the page due to a decompression failure. The folio is unlocked, but
> + *  NOT marked up-to-date, so that an IO error is emitted (e.g. do_swap_page()
> + *  will SIGBUS).
> + *
> + *  -EINVAL: if the swapped out content was in zswap, but the page belongs
> + *  to a large folio, which is not supported by zswap. The folio is unlocked,
> + *  but NOT marked up-to-date, so that an IO error is emitted (e.g.
> + *  do_swap_page() will SIGBUS).
> + *
> + *  -ENOENT: if the swapped out content was not in zswap. The folio remains
> + *  locked on return.
> + */
> +int zswap_load(struct folio *folio)
[..]

Re: [PATCH v4] page_io: zswap: do not crash the kernel on decompression failure

Posted by Nhat Pham 11 months, 1 week ago

On Thu, Mar 6, 2025 at 1:32 PM Yosry Ahmed <yosry.ahmed@linux.dev> wrote:
>
> On Thu, Mar 06, 2025 at 12:50:10PM -0800, Nhat Pham wrote:
>
> Couple of nits below, but otherwise LGTM:
>
> Acked-by: Yosry Ahmed <yosry.ahmed@linux.dev>
>
> (I did expect the swap zeromap change in the same series, so if you send
> it separately make sure to mention it's on top of this one because they
> will conflict otherwise)

Yeah of course. I actually have already finished that second patch,
but wanna spend a bit more time proofreading things :)

>
> [..]
> > @@ -1606,7 +1628,26 @@ bool zswap_store(struct folio *folio)
> >       return ret;
> >  }
> >
> > -bool zswap_load(struct folio *folio)
> > +/**
> > + * zswap_load() - load a page from zswap
>
> nit: folio
>
> > + * @folio: folio to load
> > + *
> > + * Return: 0 on success, or one of the following error codes:
>
> nit: Maybe worth mentioning that the folio is unlocked and marked
> uptodate on success for completeness.

Will do!

>
> > + *
> > + *  -EIO: if the swapped out content was in zswap, but could not be loaded
> > + *  into the page due to a decompression failure. The folio is unlocked, but
> > + *  NOT marked up-to-date, so that an IO error is emitted (e.g. do_swap_page()
> > + *  will SIGBUS).
> > + *
> > + *  -EINVAL: if the swapped out content was in zswap, but the page belongs
> > + *  to a large folio, which is not supported by zswap. The folio is unlocked,
> > + *  but NOT marked up-to-date, so that an IO error is emitted (e.g.
> > + *  do_swap_page() will SIGBUS).
> > + *
> > + *  -ENOENT: if the swapped out content was not in zswap. The folio remains
> > + *  locked on return.
> > + */
> > +int zswap_load(struct folio *folio)
> [..]

[PATCH v4] page_io: zswap: do not crash the kernel on decompression failure (fix)

Posted by Nhat Pham 11 months, 1 week ago

Fix the documentation of zswap_load() - should say load a folio (rather
than a page), and should be explicit that the folio is unlocked and
marked up-to-date in the success case.

Suggested-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
 mm/zswap.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 799b22c19b5e..c470073c17cc 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1629,10 +1629,11 @@ bool zswap_store(struct folio *folio)
 }
 
 /**
- * zswap_load() - load a page from zswap
+ * zswap_load() - load a folio from zswap
  * @folio: folio to load
  *
- * Return: 0 on success, or one of the following error codes:
+ * Return: 0 on success, with the folio unlocked and marked up-to-date, or one
+ * of the following error codes:
  *
  *  -EIO: if the swapped out content was in zswap, but could not be loaded
  *  into the page due to a decompression failure. The folio is unlocked, but
-- 
2.43.5