zswap IAA decompress batching

[RFC PATCH v1 2/7] mm: swap: Add IAA batch decompression API swap_crypto_acomp_decompress_batch().

Posted by Kanchana P Sridhar 1 month, 1 week ago

Added a new API swap_crypto_acomp_decompress_batch() that does batch
decompression. A system that has Intel IAA can avail of this API to submit a
batch of decompress jobs for parallel decompression in the hardware, to improve
performance. On a system without IAA, this API will process each decompress
job sequentially.

The purpose of this API is to be invocable from any swap module that needs
to decompress multiple 4K folios, or a batch of pages in the general case.
For instance, zswap would decompress up to (1UL << SWAP_RA_ORDER_CEILING)
folios in batches of SWAP_CRYPTO_SUB_BATCH_SIZE (i.e. 8 if the system has
IAA) pages prefetched by swapin_readahead(), which would improve readahead
performance.

Towards this eventual goal, the swap_crypto_acomp_decompress_batch()
interface is implemented in swap_state.c and exported via mm/swap.h. It
would be preferable for swap_crypto_acomp_decompress_batch() to be exported
via include/linux/swap.h so that modules outside mm (for e.g. zram) can
potentially use the API for batch decompressions with IAA, since the
swapin_readahead() batching interface is common to all swap modules.
I would appreciate RFC comments on this.

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 mm/swap.h       |  42 +++++++++++++++++--
 mm/swap_state.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 147 insertions(+), 4 deletions(-)

diff --git a/mm/swap.h b/mm/swap.h
index 08c04954304f..0bb386b5fdee 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -10,11 +10,12 @@ struct mempolicy;
 #include <linux/crypto.h>
 
 /*
- * For IAA compression batching:
- * Maximum number of IAA acomp compress requests that will be processed
- * in a sub-batch.
+ * For IAA compression/decompression batching:
+ * Maximum number of IAA acomp compress/decompress requests that will be
+ * processed in a sub-batch.
  */
-#if defined(CONFIG_ZSWAP_STORE_BATCHING_ENABLED)
+#if defined(CONFIG_ZSWAP_STORE_BATCHING_ENABLED) || \
+	defined(CONFIG_ZSWAP_LOAD_BATCHING_ENABLED)
 #define SWAP_CRYPTO_SUB_BATCH_SIZE 8UL
 #else
 #define SWAP_CRYPTO_SUB_BATCH_SIZE 1UL
@@ -60,6 +61,29 @@ void swap_crypto_acomp_compress_batch(
 	int nr_pages,
 	struct crypto_acomp_ctx *acomp_ctx);
 
+/**
+ * This API provides IAA decompress batching functionality for use by swap
+ * modules.
+ * The acomp_ctx mutex should be locked/unlocked before/after calling this
+ * procedure.
+ *
+ * @srcs: The src buffers to be decompressed.
+ * @pages: The pages to store the buffers decompressed by IAA.
+ * @slens: src buffers' compressed lengths.
+ * @errors: Will contain a 0 if the page was successfully decompressed, or a
+ *          non-0 error value to be processed by the calling function.
+ * @nr_pages: The number of pages, up to SWAP_CRYPTO_SUB_BATCH_SIZE,
+ *            to be decompressed.
+ * @acomp_ctx: The acomp context for iaa_crypto/other compressor.
+ */
+void swap_crypto_acomp_decompress_batch(
+	u8 *srcs[],
+	struct page *pages[],
+	unsigned int slens[],
+	int errors[],
+	int nr_pages,
+	struct crypto_acomp_ctx *acomp_ctx);
+
 /* linux/mm/vmscan.c, linux/mm/page_io.c, linux/mm/zswap.c */
 /* For batching of compressions in reclaim path. */
 struct swap_in_memory_cache_cb {
@@ -204,6 +228,16 @@ static inline void swap_write_in_memory_cache_unplug(
 {
 }
 
+static inline void swap_crypto_acomp_decompress_batch(
+	u8 *srcs[],
+	struct page *pages[],
+	unsigned int slens[],
+	int errors[],
+	int nr_pages,
+	struct crypto_acomp_ctx *acomp_ctx)
+{
+}
+
 static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
 {
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 117c3caa5679..3cebbff40804 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -855,6 +855,115 @@ void swap_crypto_acomp_compress_batch(
 }
 EXPORT_SYMBOL_GPL(swap_crypto_acomp_compress_batch);
 
+/**
+ * This API provides IAA decompress batching functionality for use by swap
+ * modules.
+ * The acomp_ctx mutex should be locked/unlocked before/after calling this
+ * procedure.
+ *
+ * @srcs: The src buffers to be decompressed.
+ * @pages: The pages to store the buffers decompressed by IAA.
+ * @slens: src buffers' compressed lengths.
+ * @errors: Will contain a 0 if the page was successfully decompressed, or a
+ *          non-0 error value to be processed by the calling function.
+ * @nr_pages: The number of pages, up to SWAP_CRYPTO_SUB_BATCH_SIZE,
+ *            to be decompressed.
+ * @acomp_ctx: The acomp context for iaa_crypto/other compressor.
+ */
+void swap_crypto_acomp_decompress_batch(
+	u8 *srcs[],
+	struct page *pages[],
+	unsigned int slens[],
+	int errors[],
+	int nr_pages,
+	struct crypto_acomp_ctx *acomp_ctx)
+{
+	struct scatterlist inputs[SWAP_CRYPTO_SUB_BATCH_SIZE];
+	struct scatterlist outputs[SWAP_CRYPTO_SUB_BATCH_SIZE];
+	unsigned int dlens[SWAP_CRYPTO_SUB_BATCH_SIZE];
+	bool decompressions_done = false;
+	int i, j;
+
+	BUG_ON(nr_pages > SWAP_CRYPTO_SUB_BATCH_SIZE);
+
+	/*
+	 * Prepare and submit acomp_reqs to IAA.
+	 * IAA will process these decompress jobs in parallel in async mode.
+	 * If the compressor does not support a poll() method, or if IAA is
+	 * used in sync mode, the jobs will be processed sequentially using
+	 * acomp_ctx->req[0] and acomp_ctx->wait.
+	 */
+	for (i = 0; i < nr_pages; ++i) {
+		j = acomp_ctx->acomp->poll ? i : 0;
+
+		dlens[i] = PAGE_SIZE;
+		sg_init_one(&inputs[i], srcs[i], slens[i]);
+		sg_init_table(&outputs[i], 1);
+		sg_set_page(&outputs[i], pages[i], PAGE_SIZE, 0);
+		acomp_request_set_params(acomp_ctx->req[j], &inputs[i],
+					&outputs[i], slens[i], dlens[i]);
+		/*
+		 * If the crypto_acomp provides an asynchronous poll()
+		 * interface, submit the request to the driver now, and poll for
+		 * a completion status later, after all descriptors have been
+		 * submitted. If the crypto_acomp does not provide a poll()
+		 * interface, submit the request and wait for it to complete,
+		 * i.e., synchronously, before moving on to the next request.
+		 */
+		if (acomp_ctx->acomp->poll) {
+			errors[i] = crypto_acomp_decompress(acomp_ctx->req[j]);
+
+			if (errors[i] != -EINPROGRESS)
+				errors[i] = -EINVAL;
+			else
+				errors[i] = -EAGAIN;
+		} else {
+			errors[i] = crypto_wait_req(
+					crypto_acomp_decompress(acomp_ctx->req[j]),
+					&acomp_ctx->wait);
+			if (!errors[i]) {
+				dlens[i] = acomp_ctx->req[j]->dlen;
+				BUG_ON(dlens[i] != PAGE_SIZE);
+			}
+		}
+	}
+
+	/*
+	 * If not doing async decompressions, the batch has been processed at
+	 * this point and we can return.
+	 */
+	if (!acomp_ctx->acomp->poll)
+		return;
+
+	/*
+	 * Poll for and process IAA decompress job completions
+	 * in out-of-order manner.
+	 */
+	while (!decompressions_done) {
+		decompressions_done = true;
+
+		for (i = 0; i < nr_pages; ++i) {
+			/*
+			 * Skip, if the decompression has already completed
+			 * successfully or with an error.
+			 */
+			if (errors[i] != -EAGAIN)
+				continue;
+
+			errors[i] = crypto_acomp_poll(acomp_ctx->req[i]);
+
+			if (errors[i]) {
+				if (errors[i] == -EAGAIN)
+					decompressions_done = false;
+			} else {
+				dlens[i] = acomp_ctx->req[i]->dlen;
+				BUG_ON(dlens[i] != PAGE_SIZE);
+			}
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(swap_crypto_acomp_decompress_batch);
+
 #endif /* CONFIG_SWAP */
 
 static int swap_vma_ra_win(struct vm_fault *vmf, unsigned long *start,
-- 
2.27.0

[RFC PATCH v1 1/7] mm: zswap: Config variable to enable zswap loads with decompress batching.
[RFC PATCH v1 2/7] mm: swap: Add IAA batch decompression API swap_crypto_acomp_decompress_batch().
[RFC PATCH v1 3/7] pagevec: struct folio_batch changes for decompress batching interface.
[RFC PATCH v1 4/7] mm: swap: swap_read_folio() can add a folio to a folio_batch if it is in zswap.
[RFC PATCH v1 5/7] mm: swap, zswap: zswap folio_batch processing with IAA decompression batching.
[RFC PATCH v1 6/7] mm: do_swap_page() calls swapin_readahead() zswap load batching interface.
[RFC PATCH v1 7/7] mm: For IAA batching, reduce SWAP_BATCH and modify swap slot cache thresholds.