Vectorize and simplify zswap_store_page().

[PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to process multiple pages in a folio.

Posted by Kanchana P Sridhar 1 year, 2 months ago

Modified zswap_store() to store the folio in batches of
SWAP_CRYPTO_BATCH_SIZE pages. Accordingly, refactored zswap_store_page()
into zswap_store_pages() that processes a range of pages in the folio.
zswap_store_pages() is a vectorized version of zswap_store_page().

For now, zswap_store_pages() will sequentially compress these pages with
zswap_compress().

These changes are follow-up to code review comments received for [1], and
are intended to set up zswap_store() for batching with Intel IAA.

[1]: https://patchwork.kernel.org/project/linux-mm/patch/20241123070127.332773-11-kanchana.p.sridhar@intel.com/

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 include/linux/zswap.h |   1 +
 mm/zswap.c            | 154 ++++++++++++++++++++++++------------------
 2 files changed, 88 insertions(+), 67 deletions(-)

diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index d961ead91bf1..05a81e750744 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -7,6 +7,7 @@
 
 struct lruvec;
 
+#define SWAP_CRYPTO_BATCH_SIZE 8UL
 extern atomic_long_t zswap_stored_pages;
 
 #ifdef CONFIG_ZSWAP
diff --git a/mm/zswap.c b/mm/zswap.c
index f6316b66fb23..b09d1023e775 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1409,78 +1409,96 @@ static void shrink_worker(struct work_struct *w)
 * main API
 **********************************/
 
-static ssize_t zswap_store_page(struct page *page,
-				struct obj_cgroup *objcg,
-				struct zswap_pool *pool)
+/*
+ * Store multiple pages in @folio, starting from the page at index @si up to
+ * and including the page at index @ei.
+ */
+static ssize_t zswap_store_pages(struct folio *folio,
+				 long si,
+				 long ei,
+				 struct obj_cgroup *objcg,
+				 struct zswap_pool *pool)
 {
-	swp_entry_t page_swpentry = page_swap_entry(page);
+	struct page *page;
+	swp_entry_t page_swpentry;
 	struct zswap_entry *entry, *old;
+	size_t compressed_bytes = 0;
+	u8 nr_pages = ei - si + 1;
+	u8 i;
+
+	for (i = 0; i < nr_pages; ++i) {
+		page = folio_page(folio, si + i);
+		page_swpentry = page_swap_entry(page);
+
+		/* allocate entry */
+		entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
+		if (!entry) {
+			zswap_reject_kmemcache_fail++;
+			return -EINVAL;
+		}
 
-	/* allocate entry */
-	entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
-	if (!entry) {
-		zswap_reject_kmemcache_fail++;
-		return -EINVAL;
-	}
-
-	if (!zswap_compress(page, entry, pool))
-		goto compress_failed;
+		if (!zswap_compress(page, entry, pool))
+			goto compress_failed;
 
-	old = xa_store(swap_zswap_tree(page_swpentry),
-		       swp_offset(page_swpentry),
-		       entry, GFP_KERNEL);
-	if (xa_is_err(old)) {
-		int err = xa_err(old);
+		old = xa_store(swap_zswap_tree(page_swpentry),
+			       swp_offset(page_swpentry),
+			       entry, GFP_KERNEL);
+		if (xa_is_err(old)) {
+			int err = xa_err(old);
 
-		WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
-		zswap_reject_alloc_fail++;
-		goto store_failed;
-	}
+			WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
+			zswap_reject_alloc_fail++;
+			goto store_failed;
+		}
 
-	/*
-	 * We may have had an existing entry that became stale when
-	 * the folio was redirtied and now the new version is being
-	 * swapped out. Get rid of the old.
-	 */
-	if (old)
-		zswap_entry_free(old);
+		/*
+		 * We may have had an existing entry that became stale when
+		 * the folio was redirtied and now the new version is being
+		 * swapped out. Get rid of the old.
+		 */
+		if (old)
+			zswap_entry_free(old);
 
-	/*
-	 * The entry is successfully compressed and stored in the tree, there is
-	 * no further possibility of failure. Grab refs to the pool and objcg.
-	 * These refs will be dropped by zswap_entry_free() when the entry is
-	 * removed from the tree.
-	 */
-	zswap_pool_get(pool);
-	if (objcg)
-		obj_cgroup_get(objcg);
+		/*
+		 * The entry is successfully compressed and stored in the tree, there is
+		 * no further possibility of failure. Grab refs to the pool and objcg.
+		 * These refs will be dropped by zswap_entry_free() when the entry is
+		 * removed from the tree.
+		 */
+		zswap_pool_get(pool);
+		if (objcg)
+			obj_cgroup_get(objcg);
 
-	/*
-	 * We finish initializing the entry while it's already in xarray.
-	 * This is safe because:
-	 *
-	 * 1. Concurrent stores and invalidations are excluded by folio lock.
-	 *
-	 * 2. Writeback is excluded by the entry not being on the LRU yet.
-	 *    The publishing order matters to prevent writeback from seeing
-	 *    an incoherent entry.
-	 */
-	entry->pool = pool;
-	entry->swpentry = page_swpentry;
-	entry->objcg = objcg;
-	entry->referenced = true;
-	if (entry->length) {
-		INIT_LIST_HEAD(&entry->lru);
-		zswap_lru_add(&zswap_list_lru, entry);
-	}
+		/*
+		 * We finish initializing the entry while it's already in xarray.
+		 * This is safe because:
+		 *
+		 * 1. Concurrent stores and invalidations are excluded by folio lock.
+		 *
+		 * 2. Writeback is excluded by the entry not being on the LRU yet.
+		 *    The publishing order matters to prevent writeback from seeing
+		 *    an incoherent entry.
+		 */
+		entry->pool = pool;
+		entry->swpentry = page_swpentry;
+		entry->objcg = objcg;
+		entry->referenced = true;
+		if (entry->length) {
+			INIT_LIST_HEAD(&entry->lru);
+			zswap_lru_add(&zswap_list_lru, entry);
+		}
 
-	return entry->length;
+		compressed_bytes += entry->length;
+		continue;
 
 store_failed:
-	zpool_free(pool->zpool, entry->handle);
+		zpool_free(pool->zpool, entry->handle);
 compress_failed:
-	zswap_entry_cache_free(entry);
-	return -EINVAL;
+		zswap_entry_cache_free(entry);
+		return -EINVAL;
+	}
+
+	return compressed_bytes;
 }
 
 bool zswap_store(struct folio *folio)
@@ -1492,7 +1510,7 @@ bool zswap_store(struct folio *folio)
 	struct zswap_pool *pool;
 	size_t compressed_bytes = 0;
 	bool ret = false;
-	long index;
+	long si, ei, incr = SWAP_CRYPTO_BATCH_SIZE;
 
 	VM_WARN_ON_ONCE(!folio_test_locked(folio));
 	VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
@@ -1526,11 +1544,13 @@ bool zswap_store(struct folio *folio)
 		mem_cgroup_put(memcg);
 	}
 
-	for (index = 0; index < nr_pages; ++index) {
-		struct page *page = folio_page(folio, index);
+	/* Store the folio in batches of SWAP_CRYPTO_BATCH_SIZE pages. */
+	for (si = 0, ei = min(si + incr - 1, nr_pages - 1);
+	     ((si < nr_pages) && (ei < nr_pages));
+	     si = ei + 1, ei = min(si + incr - 1, nr_pages - 1)) {
 		ssize_t bytes;
 
-		bytes = zswap_store_page(page, objcg, pool);
+		bytes = zswap_store_pages(folio, si, ei, objcg, pool);
 		if (bytes < 0)
 			goto put_pool;
 		compressed_bytes += bytes;
@@ -1565,9 +1585,9 @@ bool zswap_store(struct folio *folio)
 		struct zswap_entry *entry;
 		struct xarray *tree;
 
-		for (index = 0; index < nr_pages; ++index) {
-			tree = swap_zswap_tree(swp_entry(type, offset + index));
-			entry = xa_erase(tree, offset + index);
+		for (si = 0; si < nr_pages; ++si) {
+			tree = swap_zswap_tree(swp_entry(type, offset + si));
+			entry = xa_erase(tree, offset + si);
 			if (entry)
 				zswap_entry_free(entry);
 		}
-- 
2.27.0

Re: [PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to process multiple pages in a folio.

Posted by Chengming Zhou 1 year, 2 months ago

On 2024/11/28 06:53, Kanchana P Sridhar wrote:
> Modified zswap_store() to store the folio in batches of
> SWAP_CRYPTO_BATCH_SIZE pages. Accordingly, refactored zswap_store_page()
> into zswap_store_pages() that processes a range of pages in the folio.
> zswap_store_pages() is a vectorized version of zswap_store_page().

Maybe I missed something, but this refactor change looks confused to me.

Why not zswap_store_pages() just reuse the existing zswap_store_page()?
```
zswap_store_pages()
	compressed_bytes = 0
	for each page in this batch
		size = zswap_store_page(page)
		if (size < 0)
			return size;
		compressed_bytes += size;
	return compressed_bytes;
```

Thanks.

> 
> For now, zswap_store_pages() will sequentially compress these pages with
> zswap_compress().
> 
> These changes are follow-up to code review comments received for [1], and
> are intended to set up zswap_store() for batching with Intel IAA.
> 
> [1]: https://patchwork.kernel.org/project/linux-mm/patch/20241123070127.332773-11-kanchana.p.sridhar@intel.com/
> 
> Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
> ---
>   include/linux/zswap.h |   1 +
>   mm/zswap.c            | 154 ++++++++++++++++++++++++------------------
>   2 files changed, 88 insertions(+), 67 deletions(-)
> 
> diff --git a/include/linux/zswap.h b/include/linux/zswap.h
> index d961ead91bf1..05a81e750744 100644
> --- a/include/linux/zswap.h
> +++ b/include/linux/zswap.h
> @@ -7,6 +7,7 @@
>   
>   struct lruvec;
>   
> +#define SWAP_CRYPTO_BATCH_SIZE 8UL
>   extern atomic_long_t zswap_stored_pages;
>   
>   #ifdef CONFIG_ZSWAP
> diff --git a/mm/zswap.c b/mm/zswap.c
> index f6316b66fb23..b09d1023e775 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1409,78 +1409,96 @@ static void shrink_worker(struct work_struct *w)
>   * main API
>   **********************************/
>   
> -static ssize_t zswap_store_page(struct page *page,
> -				struct obj_cgroup *objcg,
> -				struct zswap_pool *pool)
> +/*
> + * Store multiple pages in @folio, starting from the page at index @si up to
> + * and including the page at index @ei.
> + */
> +static ssize_t zswap_store_pages(struct folio *folio,
> +				 long si,
> +				 long ei,
> +				 struct obj_cgroup *objcg,
> +				 struct zswap_pool *pool)
>   {
> -	swp_entry_t page_swpentry = page_swap_entry(page);
> +	struct page *page;
> +	swp_entry_t page_swpentry;
>   	struct zswap_entry *entry, *old;
> +	size_t compressed_bytes = 0;
> +	u8 nr_pages = ei - si + 1;
> +	u8 i;
> +
> +	for (i = 0; i < nr_pages; ++i) {
> +		page = folio_page(folio, si + i);
> +		page_swpentry = page_swap_entry(page);
> +
> +		/* allocate entry */
> +		entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> +		if (!entry) {
> +			zswap_reject_kmemcache_fail++;
> +			return -EINVAL;
> +		}
>   
> -	/* allocate entry */
> -	entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> -	if (!entry) {
> -		zswap_reject_kmemcache_fail++;
> -		return -EINVAL;
> -	}
> -
> -	if (!zswap_compress(page, entry, pool))
> -		goto compress_failed;
> +		if (!zswap_compress(page, entry, pool))
> +			goto compress_failed;
>   
> -	old = xa_store(swap_zswap_tree(page_swpentry),
> -		       swp_offset(page_swpentry),
> -		       entry, GFP_KERNEL);
> -	if (xa_is_err(old)) {
> -		int err = xa_err(old);
> +		old = xa_store(swap_zswap_tree(page_swpentry),
> +			       swp_offset(page_swpentry),
> +			       entry, GFP_KERNEL);
> +		if (xa_is_err(old)) {
> +			int err = xa_err(old);
>   
> -		WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
> -		zswap_reject_alloc_fail++;
> -		goto store_failed;
> -	}
> +			WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
> +			zswap_reject_alloc_fail++;
> +			goto store_failed;
> +		}
>   
> -	/*
> -	 * We may have had an existing entry that became stale when
> -	 * the folio was redirtied and now the new version is being
> -	 * swapped out. Get rid of the old.
> -	 */
> -	if (old)
> -		zswap_entry_free(old);
> +		/*
> +		 * We may have had an existing entry that became stale when
> +		 * the folio was redirtied and now the new version is being
> +		 * swapped out. Get rid of the old.
> +		 */
> +		if (old)
> +			zswap_entry_free(old);
>   
> -	/*
> -	 * The entry is successfully compressed and stored in the tree, there is
> -	 * no further possibility of failure. Grab refs to the pool and objcg.
> -	 * These refs will be dropped by zswap_entry_free() when the entry is
> -	 * removed from the tree.
> -	 */
> -	zswap_pool_get(pool);
> -	if (objcg)
> -		obj_cgroup_get(objcg);
> +		/*
> +		 * The entry is successfully compressed and stored in the tree, there is
> +		 * no further possibility of failure. Grab refs to the pool and objcg.
> +		 * These refs will be dropped by zswap_entry_free() when the entry is
> +		 * removed from the tree.
> +		 */
> +		zswap_pool_get(pool);
> +		if (objcg)
> +			obj_cgroup_get(objcg);
>   
> -	/*
> -	 * We finish initializing the entry while it's already in xarray.
> -	 * This is safe because:
> -	 *
> -	 * 1. Concurrent stores and invalidations are excluded by folio lock.
> -	 *
> -	 * 2. Writeback is excluded by the entry not being on the LRU yet.
> -	 *    The publishing order matters to prevent writeback from seeing
> -	 *    an incoherent entry.
> -	 */
> -	entry->pool = pool;
> -	entry->swpentry = page_swpentry;
> -	entry->objcg = objcg;
> -	entry->referenced = true;
> -	if (entry->length) {
> -		INIT_LIST_HEAD(&entry->lru);
> -		zswap_lru_add(&zswap_list_lru, entry);
> -	}
> +		/*
> +		 * We finish initializing the entry while it's already in xarray.
> +		 * This is safe because:
> +		 *
> +		 * 1. Concurrent stores and invalidations are excluded by folio lock.
> +		 *
> +		 * 2. Writeback is excluded by the entry not being on the LRU yet.
> +		 *    The publishing order matters to prevent writeback from seeing
> +		 *    an incoherent entry.
> +		 */
> +		entry->pool = pool;
> +		entry->swpentry = page_swpentry;
> +		entry->objcg = objcg;
> +		entry->referenced = true;
> +		if (entry->length) {
> +			INIT_LIST_HEAD(&entry->lru);
> +			zswap_lru_add(&zswap_list_lru, entry);
> +		}
>   
> -	return entry->length;
> +		compressed_bytes += entry->length;
> +		continue;
>   
>   store_failed:
> -	zpool_free(pool->zpool, entry->handle);
> +		zpool_free(pool->zpool, entry->handle);
>   compress_failed:
> -	zswap_entry_cache_free(entry);
> -	return -EINVAL;
> +		zswap_entry_cache_free(entry);
> +		return -EINVAL;
> +	}
> +
> +	return compressed_bytes;
>   }
>   
>   bool zswap_store(struct folio *folio)
> @@ -1492,7 +1510,7 @@ bool zswap_store(struct folio *folio)
>   	struct zswap_pool *pool;
>   	size_t compressed_bytes = 0;
>   	bool ret = false;
> -	long index;
> +	long si, ei, incr = SWAP_CRYPTO_BATCH_SIZE;
>   
>   	VM_WARN_ON_ONCE(!folio_test_locked(folio));
>   	VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
> @@ -1526,11 +1544,13 @@ bool zswap_store(struct folio *folio)
>   		mem_cgroup_put(memcg);
>   	}
>   
> -	for (index = 0; index < nr_pages; ++index) {
> -		struct page *page = folio_page(folio, index);
> +	/* Store the folio in batches of SWAP_CRYPTO_BATCH_SIZE pages. */
> +	for (si = 0, ei = min(si + incr - 1, nr_pages - 1);
> +	     ((si < nr_pages) && (ei < nr_pages));
> +	     si = ei + 1, ei = min(si + incr - 1, nr_pages - 1)) {
>   		ssize_t bytes;
>   
> -		bytes = zswap_store_page(page, objcg, pool);
> +		bytes = zswap_store_pages(folio, si, ei, objcg, pool);
>   		if (bytes < 0)
>   			goto put_pool;
>   		compressed_bytes += bytes;
> @@ -1565,9 +1585,9 @@ bool zswap_store(struct folio *folio)
>   		struct zswap_entry *entry;
>   		struct xarray *tree;
>   
> -		for (index = 0; index < nr_pages; ++index) {
> -			tree = swap_zswap_tree(swp_entry(type, offset + index));
> -			entry = xa_erase(tree, offset + index);
> +		for (si = 0; si < nr_pages; ++si) {
> +			tree = swap_zswap_tree(swp_entry(type, offset + si));
> +			entry = xa_erase(tree, offset + si);
>   			if (entry)
>   				zswap_entry_free(entry);
>   		}

RE: [PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to process multiple pages in a folio.

Posted by Sridhar, Kanchana P 1 year, 2 months ago

> -----Original Message-----
> From: Chengming Zhou <chengming.zhou@linux.dev>
> Sent: Monday, December 2, 2024 7:24 PM
> To: Sridhar, Kanchana P <kanchana.p.sridhar@intel.com>; linux-
> kernel@vger.kernel.org; linux-mm@kvack.org; hannes@cmpxchg.org;
> yosryahmed@google.com; nphamcs@gmail.com; usamaarif642@gmail.com;
> ryan.roberts@arm.com; 21cnbao@gmail.com; akpm@linux-foundation.org
> Cc: Feghali, Wajdi K <wajdi.k.feghali@intel.com>; Gopal, Vinodh
> <vinodh.gopal@intel.com>
> Subject: Re: [PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to
> process multiple pages in a folio.
> 
> On 2024/11/28 06:53, Kanchana P Sridhar wrote:
> > Modified zswap_store() to store the folio in batches of
> > SWAP_CRYPTO_BATCH_SIZE pages. Accordingly, refactored
> zswap_store_page()
> > into zswap_store_pages() that processes a range of pages in the folio.
> > zswap_store_pages() is a vectorized version of zswap_store_page().
> 
> Maybe I missed something, but this refactor change looks confused to me.
> 
> Why not zswap_store_pages() just reuse the existing zswap_store_page()?
> ```
> zswap_store_pages()
> 	compressed_bytes = 0
> 	for each page in this batch
> 		size = zswap_store_page(page)
> 		if (size < 0)
> 			return size;
> 		compressed_bytes += size;
> 	return compressed_bytes;
> ```

zswap_store_pages() is just a vectorized version of zswap_store_page(), that
operates on (a chunk of) the pages of a large folio. If the compressor supports
batching, this allows us to batch-compress the chunk of pages. For e.g., IAA
can potentially compress the chunk of pages in parallel. If the compressor
does not support batching, each page in the "batch" (IOW the "chunk of pages"
in the folio) will be compressed by calling zswap_compress(each page), as is
done in patch 2/2 currently.

If we agree with the structure of zswap_store() -- zswap_store_pages()
introduced in this patch-series, it allows me to add batching in a really
seamless manner -- this would be contained to a code branch that either
calls zswap_batch_compress(all pages) or zswap_compress(each page).
Yosry had also suggested this in an earlier comment to [1]:

"For example, most of zswap_store() should remain the same. It is still
getting a folio to compress, the only difference is that we will
parallelize the page compressions. zswap_store_page() is where some
changes need to be made. Instead of a single function that handles the
storage of each page, we need a vectorized function that handles the
storage of N pages in a folio (allocate zswap_entry's, do xarray
insertions, etc). This should be refactoring in a separate patch."

Besides these immediate benefits, I believe processing the pages of
a large folio in chunks allows us to explore batch-processing of other
ops, for instance, adding the zswap entries to the xarray, to further
reduce zswap_store() latency for large folios.

I hope this explains things a little better.

[1]: https://patchwork.kernel.org/project/linux-mm/patch/20241123070127.332773-11-kanchana.p.sridhar@intel.com/

Thanks,
Kanchana

> 
> Thanks.
> 
> >
> > For now, zswap_store_pages() will sequentially compress these pages with
> > zswap_compress().
> >
> > These changes are follow-up to code review comments received for [1], and
> > are intended to set up zswap_store() for batching with Intel IAA.
> >
> > [1]: https://patchwork.kernel.org/project/linux-
> mm/patch/20241123070127.332773-11-kanchana.p.sridhar@intel.com/
> >
> > Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
> > ---
> >   include/linux/zswap.h |   1 +
> >   mm/zswap.c            | 154 ++++++++++++++++++++++++------------------
> >   2 files changed, 88 insertions(+), 67 deletions(-)
> >
> > diff --git a/include/linux/zswap.h b/include/linux/zswap.h
> > index d961ead91bf1..05a81e750744 100644
> > --- a/include/linux/zswap.h
> > +++ b/include/linux/zswap.h
> > @@ -7,6 +7,7 @@
> >
> >   struct lruvec;
> >
> > +#define SWAP_CRYPTO_BATCH_SIZE 8UL
> >   extern atomic_long_t zswap_stored_pages;
> >
> >   #ifdef CONFIG_ZSWAP
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index f6316b66fb23..b09d1023e775 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -1409,78 +1409,96 @@ static void shrink_worker(struct work_struct
> *w)
> >   * main API
> >   **********************************/
> >
> > -static ssize_t zswap_store_page(struct page *page,
> > -				struct obj_cgroup *objcg,
> > -				struct zswap_pool *pool)
> > +/*
> > + * Store multiple pages in @folio, starting from the page at index @si up to
> > + * and including the page at index @ei.
> > + */
> > +static ssize_t zswap_store_pages(struct folio *folio,
> > +				 long si,
> > +				 long ei,
> > +				 struct obj_cgroup *objcg,
> > +				 struct zswap_pool *pool)
> >   {
> > -	swp_entry_t page_swpentry = page_swap_entry(page);
> > +	struct page *page;
> > +	swp_entry_t page_swpentry;
> >   	struct zswap_entry *entry, *old;
> > +	size_t compressed_bytes = 0;
> > +	u8 nr_pages = ei - si + 1;
> > +	u8 i;
> > +
> > +	for (i = 0; i < nr_pages; ++i) {
> > +		page = folio_page(folio, si + i);
> > +		page_swpentry = page_swap_entry(page);
> > +
> > +		/* allocate entry */
> > +		entry = zswap_entry_cache_alloc(GFP_KERNEL,
> page_to_nid(page));
> > +		if (!entry) {
> > +			zswap_reject_kmemcache_fail++;
> > +			return -EINVAL;
> > +		}
> >
> > -	/* allocate entry */
> > -	entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> > -	if (!entry) {
> > -		zswap_reject_kmemcache_fail++;
> > -		return -EINVAL;
> > -	}
> > -
> > -	if (!zswap_compress(page, entry, pool))
> > -		goto compress_failed;
> > +		if (!zswap_compress(page, entry, pool))
> > +			goto compress_failed;
> >
> > -	old = xa_store(swap_zswap_tree(page_swpentry),
> > -		       swp_offset(page_swpentry),
> > -		       entry, GFP_KERNEL);
> > -	if (xa_is_err(old)) {
> > -		int err = xa_err(old);
> > +		old = xa_store(swap_zswap_tree(page_swpentry),
> > +			       swp_offset(page_swpentry),
> > +			       entry, GFP_KERNEL);
> > +		if (xa_is_err(old)) {
> > +			int err = xa_err(old);
> >
> > -		WARN_ONCE(err != -ENOMEM, "unexpected xarray error:
> %d\n", err);
> > -		zswap_reject_alloc_fail++;
> > -		goto store_failed;
> > -	}
> > +			WARN_ONCE(err != -ENOMEM, "unexpected xarray
> error: %d\n", err);
> > +			zswap_reject_alloc_fail++;
> > +			goto store_failed;
> > +		}
> >
> > -	/*
> > -	 * We may have had an existing entry that became stale when
> > -	 * the folio was redirtied and now the new version is being
> > -	 * swapped out. Get rid of the old.
> > -	 */
> > -	if (old)
> > -		zswap_entry_free(old);
> > +		/*
> > +		 * We may have had an existing entry that became stale when
> > +		 * the folio was redirtied and now the new version is being
> > +		 * swapped out. Get rid of the old.
> > +		 */
> > +		if (old)
> > +			zswap_entry_free(old);
> >
> > -	/*
> > -	 * The entry is successfully compressed and stored in the tree, there is
> > -	 * no further possibility of failure. Grab refs to the pool and objcg.
> > -	 * These refs will be dropped by zswap_entry_free() when the entry is
> > -	 * removed from the tree.
> > -	 */
> > -	zswap_pool_get(pool);
> > -	if (objcg)
> > -		obj_cgroup_get(objcg);
> > +		/*
> > +		 * The entry is successfully compressed and stored in the tree,
> there is
> > +		 * no further possibility of failure. Grab refs to the pool and
> objcg.
> > +		 * These refs will be dropped by zswap_entry_free() when the
> entry is
> > +		 * removed from the tree.
> > +		 */
> > +		zswap_pool_get(pool);
> > +		if (objcg)
> > +			obj_cgroup_get(objcg);
> >
> > -	/*
> > -	 * We finish initializing the entry while it's already in xarray.
> > -	 * This is safe because:
> > -	 *
> > -	 * 1. Concurrent stores and invalidations are excluded by folio lock.
> > -	 *
> > -	 * 2. Writeback is excluded by the entry not being on the LRU yet.
> > -	 *    The publishing order matters to prevent writeback from seeing
> > -	 *    an incoherent entry.
> > -	 */
> > -	entry->pool = pool;
> > -	entry->swpentry = page_swpentry;
> > -	entry->objcg = objcg;
> > -	entry->referenced = true;
> > -	if (entry->length) {
> > -		INIT_LIST_HEAD(&entry->lru);
> > -		zswap_lru_add(&zswap_list_lru, entry);
> > -	}
> > +		/*
> > +		 * We finish initializing the entry while it's already in xarray.
> > +		 * This is safe because:
> > +		 *
> > +		 * 1. Concurrent stores and invalidations are excluded by folio
> lock.
> > +		 *
> > +		 * 2. Writeback is excluded by the entry not being on the LRU
> yet.
> > +		 *    The publishing order matters to prevent writeback from
> seeing
> > +		 *    an incoherent entry.
> > +		 */
> > +		entry->pool = pool;
> > +		entry->swpentry = page_swpentry;
> > +		entry->objcg = objcg;
> > +		entry->referenced = true;
> > +		if (entry->length) {
> > +			INIT_LIST_HEAD(&entry->lru);
> > +			zswap_lru_add(&zswap_list_lru, entry);
> > +		}
> >
> > -	return entry->length;
> > +		compressed_bytes += entry->length;
> > +		continue;
> >
> >   store_failed:
> > -	zpool_free(pool->zpool, entry->handle);
> > +		zpool_free(pool->zpool, entry->handle);
> >   compress_failed:
> > -	zswap_entry_cache_free(entry);
> > -	return -EINVAL;
> > +		zswap_entry_cache_free(entry);
> > +		return -EINVAL;
> > +	}
> > +
> > +	return compressed_bytes;
> >   }
> >
> >   bool zswap_store(struct folio *folio)
> > @@ -1492,7 +1510,7 @@ bool zswap_store(struct folio *folio)
> >   	struct zswap_pool *pool;
> >   	size_t compressed_bytes = 0;
> >   	bool ret = false;
> > -	long index;
> > +	long si, ei, incr = SWAP_CRYPTO_BATCH_SIZE;
> >
> >   	VM_WARN_ON_ONCE(!folio_test_locked(folio));
> >   	VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
> > @@ -1526,11 +1544,13 @@ bool zswap_store(struct folio *folio)
> >   		mem_cgroup_put(memcg);
> >   	}
> >
> > -	for (index = 0; index < nr_pages; ++index) {
> > -		struct page *page = folio_page(folio, index);
> > +	/* Store the folio in batches of SWAP_CRYPTO_BATCH_SIZE pages. */
> > +	for (si = 0, ei = min(si + incr - 1, nr_pages - 1);
> > +	     ((si < nr_pages) && (ei < nr_pages));
> > +	     si = ei + 1, ei = min(si + incr - 1, nr_pages - 1)) {
> >   		ssize_t bytes;
> >
> > -		bytes = zswap_store_page(page, objcg, pool);
> > +		bytes = zswap_store_pages(folio, si, ei, objcg, pool);
> >   		if (bytes < 0)
> >   			goto put_pool;
> >   		compressed_bytes += bytes;
> > @@ -1565,9 +1585,9 @@ bool zswap_store(struct folio *folio)
> >   		struct zswap_entry *entry;
> >   		struct xarray *tree;
> >
> > -		for (index = 0; index < nr_pages; ++index) {
> > -			tree = swap_zswap_tree(swp_entry(type, offset +
> index));
> > -			entry = xa_erase(tree, offset + index);
> > +		for (si = 0; si < nr_pages; ++si) {
> > +			tree = swap_zswap_tree(swp_entry(type, offset +
> si));
> > +			entry = xa_erase(tree, offset + si);
> >   			if (entry)
> >   				zswap_entry_free(entry);
> >   		}

Re: [PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to process multiple pages in a folio.

Posted by Yosry Ahmed 1 year, 2 months ago

On Wed, Nov 27, 2024 at 2:53 PM Kanchana P Sridhar
<kanchana.p.sridhar@intel.com> wrote:
>
> Modified zswap_store() to store the folio in batches of
> SWAP_CRYPTO_BATCH_SIZE pages. Accordingly, refactored zswap_store_page()
> into zswap_store_pages() that processes a range of pages in the folio.
> zswap_store_pages() is a vectorized version of zswap_store_page().
>
> For now, zswap_store_pages() will sequentially compress these pages with
> zswap_compress().
>
> These changes are follow-up to code review comments received for [1], and
> are intended to set up zswap_store() for batching with Intel IAA.
>
> [1]: https://patchwork.kernel.org/project/linux-mm/patch/20241123070127.332773-11-kanchana.p.sridhar@intel.com/
>
> Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
> ---
>  include/linux/zswap.h |   1 +
>  mm/zswap.c            | 154 ++++++++++++++++++++++++------------------
>  2 files changed, 88 insertions(+), 67 deletions(-)
>
> diff --git a/include/linux/zswap.h b/include/linux/zswap.h
> index d961ead91bf1..05a81e750744 100644
> --- a/include/linux/zswap.h
> +++ b/include/linux/zswap.h
> @@ -7,6 +7,7 @@
>
>  struct lruvec;
>
> +#define SWAP_CRYPTO_BATCH_SIZE 8UL
>  extern atomic_long_t zswap_stored_pages;
>
>  #ifdef CONFIG_ZSWAP
> diff --git a/mm/zswap.c b/mm/zswap.c
> index f6316b66fb23..b09d1023e775 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1409,78 +1409,96 @@ static void shrink_worker(struct work_struct *w)
>  * main API
>  **********************************/
>
> -static ssize_t zswap_store_page(struct page *page,
> -                               struct obj_cgroup *objcg,
> -                               struct zswap_pool *pool)
> +/*
> + * Store multiple pages in @folio, starting from the page at index @si up to
> + * and including the page at index @ei.
> + */
> +static ssize_t zswap_store_pages(struct folio *folio,
> +                                long si,
> +                                long ei,
> +                                struct obj_cgroup *objcg,
> +                                struct zswap_pool *pool)
>  {
> -       swp_entry_t page_swpentry = page_swap_entry(page);
> +       struct page *page;
> +       swp_entry_t page_swpentry;
>         struct zswap_entry *entry, *old;
> +       size_t compressed_bytes = 0;
> +       u8 nr_pages = ei - si + 1;
> +       u8 i;
> +
> +       for (i = 0; i < nr_pages; ++i) {
> +               page = folio_page(folio, si + i);
> +               page_swpentry = page_swap_entry(page);
> +
> +               /* allocate entry */
> +               entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> +               if (!entry) {
> +                       zswap_reject_kmemcache_fail++;
> +                       return -EINVAL;
> +               }

I think this patch is wrong on its own, for example if an allocation
fails in the above loop we exit without cleaning up previous
allocations. I think it's fixed in patch 2 but we cannot introduce
bugs in-between patches. I think the helpers in patch 2 don't really
help as I mentioned. Please combine the changes and keep them in the
main series (unless you have a reason not to).

>
> -       /* allocate entry */
> -       entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> -       if (!entry) {
> -               zswap_reject_kmemcache_fail++;
> -               return -EINVAL;
> -       }
> -
> -       if (!zswap_compress(page, entry, pool))
> -               goto compress_failed;
> +               if (!zswap_compress(page, entry, pool))
> +                       goto compress_failed;
>
> -       old = xa_store(swap_zswap_tree(page_swpentry),
> -                      swp_offset(page_swpentry),
> -                      entry, GFP_KERNEL);
> -       if (xa_is_err(old)) {
> -               int err = xa_err(old);
> +               old = xa_store(swap_zswap_tree(page_swpentry),
> +                              swp_offset(page_swpentry),
> +                              entry, GFP_KERNEL);
> +               if (xa_is_err(old)) {
> +                       int err = xa_err(old);
>
> -               WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
> -               zswap_reject_alloc_fail++;
> -               goto store_failed;
> -       }
> +                       WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
> +                       zswap_reject_alloc_fail++;
> +                       goto store_failed;
> +               }
>
> -       /*
> -        * We may have had an existing entry that became stale when
> -        * the folio was redirtied and now the new version is being
> -        * swapped out. Get rid of the old.
> -        */
> -       if (old)
> -               zswap_entry_free(old);
> +               /*
> +                * We may have had an existing entry that became stale when
> +                * the folio was redirtied and now the new version is being
> +                * swapped out. Get rid of the old.
> +                */
> +               if (old)
> +                       zswap_entry_free(old);
>
> -       /*
> -        * The entry is successfully compressed and stored in the tree, there is
> -        * no further possibility of failure. Grab refs to the pool and objcg.
> -        * These refs will be dropped by zswap_entry_free() when the entry is
> -        * removed from the tree.
> -        */
> -       zswap_pool_get(pool);
> -       if (objcg)
> -               obj_cgroup_get(objcg);
> +               /*
> +                * The entry is successfully compressed and stored in the tree, there is
> +                * no further possibility of failure. Grab refs to the pool and objcg.
> +                * These refs will be dropped by zswap_entry_free() when the entry is
> +                * removed from the tree.
> +                */
> +               zswap_pool_get(pool);
> +               if (objcg)
> +                       obj_cgroup_get(objcg);
>
> -       /*
> -        * We finish initializing the entry while it's already in xarray.
> -        * This is safe because:
> -        *
> -        * 1. Concurrent stores and invalidations are excluded by folio lock.
> -        *
> -        * 2. Writeback is excluded by the entry not being on the LRU yet.
> -        *    The publishing order matters to prevent writeback from seeing
> -        *    an incoherent entry.
> -        */
> -       entry->pool = pool;
> -       entry->swpentry = page_swpentry;
> -       entry->objcg = objcg;
> -       entry->referenced = true;
> -       if (entry->length) {
> -               INIT_LIST_HEAD(&entry->lru);
> -               zswap_lru_add(&zswap_list_lru, entry);
> -       }
> +               /*
> +                * We finish initializing the entry while it's already in xarray.
> +                * This is safe because:
> +                *
> +                * 1. Concurrent stores and invalidations are excluded by folio lock.
> +                *
> +                * 2. Writeback is excluded by the entry not being on the LRU yet.
> +                *    The publishing order matters to prevent writeback from seeing
> +                *    an incoherent entry.
> +                */
> +               entry->pool = pool;
> +               entry->swpentry = page_swpentry;
> +               entry->objcg = objcg;
> +               entry->referenced = true;
> +               if (entry->length) {
> +                       INIT_LIST_HEAD(&entry->lru);
> +                       zswap_lru_add(&zswap_list_lru, entry);
> +               }
>
> -       return entry->length;
> +               compressed_bytes += entry->length;
> +               continue;
>
>  store_failed:
> -       zpool_free(pool->zpool, entry->handle);
> +               zpool_free(pool->zpool, entry->handle);
>  compress_failed:
> -       zswap_entry_cache_free(entry);
> -       return -EINVAL;
> +               zswap_entry_cache_free(entry);
> +               return -EINVAL;
> +       }
> +
> +       return compressed_bytes;
>  }
>
>  bool zswap_store(struct folio *folio)
> @@ -1492,7 +1510,7 @@ bool zswap_store(struct folio *folio)
>         struct zswap_pool *pool;
>         size_t compressed_bytes = 0;
>         bool ret = false;
> -       long index;
> +       long si, ei, incr = SWAP_CRYPTO_BATCH_SIZE;
>
>         VM_WARN_ON_ONCE(!folio_test_locked(folio));
>         VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
> @@ -1526,11 +1544,13 @@ bool zswap_store(struct folio *folio)
>                 mem_cgroup_put(memcg);
>         }
>
> -       for (index = 0; index < nr_pages; ++index) {
> -               struct page *page = folio_page(folio, index);
> +       /* Store the folio in batches of SWAP_CRYPTO_BATCH_SIZE pages. */
> +       for (si = 0, ei = min(si + incr - 1, nr_pages - 1);
> +            ((si < nr_pages) && (ei < nr_pages));
> +            si = ei + 1, ei = min(si + incr - 1, nr_pages - 1)) {
>                 ssize_t bytes;
>
> -               bytes = zswap_store_page(page, objcg, pool);
> +               bytes = zswap_store_pages(folio, si, ei, objcg, pool);
>                 if (bytes < 0)
>                         goto put_pool;
>                 compressed_bytes += bytes;
> @@ -1565,9 +1585,9 @@ bool zswap_store(struct folio *folio)
>                 struct zswap_entry *entry;
>                 struct xarray *tree;
>
> -               for (index = 0; index < nr_pages; ++index) {
> -                       tree = swap_zswap_tree(swp_entry(type, offset + index));
> -                       entry = xa_erase(tree, offset + index);
> +               for (si = 0; si < nr_pages; ++si) {
> +                       tree = swap_zswap_tree(swp_entry(type, offset + si));
> +                       entry = xa_erase(tree, offset + si);
>                         if (entry)
>                                 zswap_entry_free(entry);
>                 }
> --
> 2.27.0
>

RE: [PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to process multiple pages in a folio.

Posted by Sridhar, Kanchana P 1 year, 2 months ago

> -----Original Message-----
> From: Yosry Ahmed <yosryahmed@google.com>
> Sent: Monday, December 2, 2024 11:34 AM
> To: Sridhar, Kanchana P <kanchana.p.sridhar@intel.com>
> Cc: linux-kernel@vger.kernel.org; linux-mm@kvack.org;
> hannes@cmpxchg.org; nphamcs@gmail.com; chengming.zhou@linux.dev;
> usamaarif642@gmail.com; ryan.roberts@arm.com; 21cnbao@gmail.com;
> akpm@linux-foundation.org; Feghali, Wajdi K <wajdi.k.feghali@intel.com>;
> Gopal, Vinodh <vinodh.gopal@intel.com>
> Subject: Re: [PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to
> process multiple pages in a folio.
> 
> On Wed, Nov 27, 2024 at 2:53 PM Kanchana P Sridhar
> <kanchana.p.sridhar@intel.com> wrote:
> >
> > Modified zswap_store() to store the folio in batches of
> > SWAP_CRYPTO_BATCH_SIZE pages. Accordingly, refactored
> zswap_store_page()
> > into zswap_store_pages() that processes a range of pages in the folio.
> > zswap_store_pages() is a vectorized version of zswap_store_page().
> >
> > For now, zswap_store_pages() will sequentially compress these pages with
> > zswap_compress().
> >
> > These changes are follow-up to code review comments received for [1], and
> > are intended to set up zswap_store() for batching with Intel IAA.
> >
> > [1]: https://patchwork.kernel.org/project/linux-
> mm/patch/20241123070127.332773-11-kanchana.p.sridhar@intel.com/
> >
> > Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
> > ---
> >  include/linux/zswap.h |   1 +
> >  mm/zswap.c            | 154 ++++++++++++++++++++++++------------------
> >  2 files changed, 88 insertions(+), 67 deletions(-)
> >
> > diff --git a/include/linux/zswap.h b/include/linux/zswap.h
> > index d961ead91bf1..05a81e750744 100644
> > --- a/include/linux/zswap.h
> > +++ b/include/linux/zswap.h
> > @@ -7,6 +7,7 @@
> >
> >  struct lruvec;
> >
> > +#define SWAP_CRYPTO_BATCH_SIZE 8UL
> >  extern atomic_long_t zswap_stored_pages;
> >
> >  #ifdef CONFIG_ZSWAP
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index f6316b66fb23..b09d1023e775 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -1409,78 +1409,96 @@ static void shrink_worker(struct work_struct
> *w)
> >  * main API
> >  **********************************/
> >
> > -static ssize_t zswap_store_page(struct page *page,
> > -                               struct obj_cgroup *objcg,
> > -                               struct zswap_pool *pool)
> > +/*
> > + * Store multiple pages in @folio, starting from the page at index @si up to
> > + * and including the page at index @ei.
> > + */
> > +static ssize_t zswap_store_pages(struct folio *folio,
> > +                                long si,
> > +                                long ei,
> > +                                struct obj_cgroup *objcg,
> > +                                struct zswap_pool *pool)
> >  {
> > -       swp_entry_t page_swpentry = page_swap_entry(page);
> > +       struct page *page;
> > +       swp_entry_t page_swpentry;
> >         struct zswap_entry *entry, *old;
> > +       size_t compressed_bytes = 0;
> > +       u8 nr_pages = ei - si + 1;
> > +       u8 i;
> > +
> > +       for (i = 0; i < nr_pages; ++i) {
> > +               page = folio_page(folio, si + i);
> > +               page_swpentry = page_swap_entry(page);
> > +
> > +               /* allocate entry */
> > +               entry = zswap_entry_cache_alloc(GFP_KERNEL,
> page_to_nid(page));
> > +               if (!entry) {
> > +                       zswap_reject_kmemcache_fail++;
> > +                       return -EINVAL;
> > +               }
> 
> I think this patch is wrong on its own, for example if an allocation
> fails in the above loop we exit without cleaning up previous
> allocations. I think it's fixed in patch 2 but we cannot introduce

I think there might be a misunderstanding. zswap_store_pages() will
clean up local resources allocated during an iteration of the for loop,
upon an error in that iteration. If you see the "goto store_failed" and
"goto compress_failed" this would explain what I mean. If an allocation
fails for an iteration, we simply return -EINVAL, and zswap_store()
will goto the "put_pool" label with "ret" still false, which will delete
all zswap entries for the folio (allocated up until the error iteration in
zswap_store_pages(); or potentially already in the xarray).

Hence, there is no bug and each of the two patches are correct by
themselves AFAICT, but please let me know if I am missing anything. Thanks!

> bugs in-between patches. I think the helpers in patch 2 don't really
> help as I mentioned. Please combine the changes and keep them in the
> main series (unless you have a reason not to).

Sure. As noted in my earlier response to comments received for patch 2,
I can either inline all iterations or create helpers for all iterations over
the pages in a batch. Appreciate your suggestions on which would be a
better approach. 

Thanks,
Kanchana

> 
> >
> > -       /* allocate entry */
> > -       entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> > -       if (!entry) {
> > -               zswap_reject_kmemcache_fail++;
> > -               return -EINVAL;
> > -       }
> > -
> > -       if (!zswap_compress(page, entry, pool))
> > -               goto compress_failed;
> > +               if (!zswap_compress(page, entry, pool))
> > +                       goto compress_failed;
> >
> > -       old = xa_store(swap_zswap_tree(page_swpentry),
> > -                      swp_offset(page_swpentry),
> > -                      entry, GFP_KERNEL);
> > -       if (xa_is_err(old)) {
> > -               int err = xa_err(old);
> > +               old = xa_store(swap_zswap_tree(page_swpentry),
> > +                              swp_offset(page_swpentry),
> > +                              entry, GFP_KERNEL);
> > +               if (xa_is_err(old)) {
> > +                       int err = xa_err(old);
> >
> > -               WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n",
> err);
> > -               zswap_reject_alloc_fail++;
> > -               goto store_failed;
> > -       }
> > +                       WARN_ONCE(err != -ENOMEM, "unexpected xarray error:
> %d\n", err);
> > +                       zswap_reject_alloc_fail++;
> > +                       goto store_failed;
> > +               }
> >
> > -       /*
> > -        * We may have had an existing entry that became stale when
> > -        * the folio was redirtied and now the new version is being
> > -        * swapped out. Get rid of the old.
> > -        */
> > -       if (old)
> > -               zswap_entry_free(old);
> > +               /*
> > +                * We may have had an existing entry that became stale when
> > +                * the folio was redirtied and now the new version is being
> > +                * swapped out. Get rid of the old.
> > +                */
> > +               if (old)
> > +                       zswap_entry_free(old);
> >
> > -       /*
> > -        * The entry is successfully compressed and stored in the tree, there is
> > -        * no further possibility of failure. Grab refs to the pool and objcg.
> > -        * These refs will be dropped by zswap_entry_free() when the entry is
> > -        * removed from the tree.
> > -        */
> > -       zswap_pool_get(pool);
> > -       if (objcg)
> > -               obj_cgroup_get(objcg);
> > +               /*
> > +                * The entry is successfully compressed and stored in the tree,
> there is
> > +                * no further possibility of failure. Grab refs to the pool and objcg.
> > +                * These refs will be dropped by zswap_entry_free() when the
> entry is
> > +                * removed from the tree.
> > +                */
> > +               zswap_pool_get(pool);
> > +               if (objcg)
> > +                       obj_cgroup_get(objcg);
> >
> > -       /*
> > -        * We finish initializing the entry while it's already in xarray.
> > -        * This is safe because:
> > -        *
> > -        * 1. Concurrent stores and invalidations are excluded by folio lock.
> > -        *
> > -        * 2. Writeback is excluded by the entry not being on the LRU yet.
> > -        *    The publishing order matters to prevent writeback from seeing
> > -        *    an incoherent entry.
> > -        */
> > -       entry->pool = pool;
> > -       entry->swpentry = page_swpentry;
> > -       entry->objcg = objcg;
> > -       entry->referenced = true;
> > -       if (entry->length) {
> > -               INIT_LIST_HEAD(&entry->lru);
> > -               zswap_lru_add(&zswap_list_lru, entry);
> > -       }
> > +               /*
> > +                * We finish initializing the entry while it's already in xarray.
> > +                * This is safe because:
> > +                *
> > +                * 1. Concurrent stores and invalidations are excluded by folio
> lock.
> > +                *
> > +                * 2. Writeback is excluded by the entry not being on the LRU yet.
> > +                *    The publishing order matters to prevent writeback from seeing
> > +                *    an incoherent entry.
> > +                */
> > +               entry->pool = pool;
> > +               entry->swpentry = page_swpentry;
> > +               entry->objcg = objcg;
> > +               entry->referenced = true;
> > +               if (entry->length) {
> > +                       INIT_LIST_HEAD(&entry->lru);
> > +                       zswap_lru_add(&zswap_list_lru, entry);
> > +               }
> >
> > -       return entry->length;
> > +               compressed_bytes += entry->length;
> > +               continue;
> >
> >  store_failed:
> > -       zpool_free(pool->zpool, entry->handle);
> > +               zpool_free(pool->zpool, entry->handle);
> >  compress_failed:
> > -       zswap_entry_cache_free(entry);
> > -       return -EINVAL;
> > +               zswap_entry_cache_free(entry);
> > +               return -EINVAL;
> > +       }
> > +
> > +       return compressed_bytes;
> >  }
> >
> >  bool zswap_store(struct folio *folio)
> > @@ -1492,7 +1510,7 @@ bool zswap_store(struct folio *folio)
> >         struct zswap_pool *pool;
> >         size_t compressed_bytes = 0;
> >         bool ret = false;
> > -       long index;
> > +       long si, ei, incr = SWAP_CRYPTO_BATCH_SIZE;
> >
> >         VM_WARN_ON_ONCE(!folio_test_locked(folio));
> >         VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
> > @@ -1526,11 +1544,13 @@ bool zswap_store(struct folio *folio)
> >                 mem_cgroup_put(memcg);
> >         }
> >
> > -       for (index = 0; index < nr_pages; ++index) {
> > -               struct page *page = folio_page(folio, index);
> > +       /* Store the folio in batches of SWAP_CRYPTO_BATCH_SIZE pages. */
> > +       for (si = 0, ei = min(si + incr - 1, nr_pages - 1);
> > +            ((si < nr_pages) && (ei < nr_pages));
> > +            si = ei + 1, ei = min(si + incr - 1, nr_pages - 1)) {
> >                 ssize_t bytes;
> >
> > -               bytes = zswap_store_page(page, objcg, pool);
> > +               bytes = zswap_store_pages(folio, si, ei, objcg, pool);
> >                 if (bytes < 0)
> >                         goto put_pool;
> >                 compressed_bytes += bytes;
> > @@ -1565,9 +1585,9 @@ bool zswap_store(struct folio *folio)
> >                 struct zswap_entry *entry;
> >                 struct xarray *tree;
> >
> > -               for (index = 0; index < nr_pages; ++index) {
> > -                       tree = swap_zswap_tree(swp_entry(type, offset + index));
> > -                       entry = xa_erase(tree, offset + index);
> > +               for (si = 0; si < nr_pages; ++si) {
> > +                       tree = swap_zswap_tree(swp_entry(type, offset + si));
> > +                       entry = xa_erase(tree, offset + si);
> >                         if (entry)
> >                                 zswap_entry_free(entry);
> >                 }
> > --
> > 2.27.0
> >

Re: [PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to process multiple pages in a folio.

Posted by Yosry Ahmed 1 year, 2 months ago

On Mon, Dec 2, 2024 at 5:13 PM Sridhar, Kanchana P
<kanchana.p.sridhar@intel.com> wrote:
>
>
> > -----Original Message-----
> > From: Yosry Ahmed <yosryahmed@google.com>
> > Sent: Monday, December 2, 2024 11:34 AM
> > To: Sridhar, Kanchana P <kanchana.p.sridhar@intel.com>
> > Cc: linux-kernel@vger.kernel.org; linux-mm@kvack.org;
> > hannes@cmpxchg.org; nphamcs@gmail.com; chengming.zhou@linux.dev;
> > usamaarif642@gmail.com; ryan.roberts@arm.com; 21cnbao@gmail.com;
> > akpm@linux-foundation.org; Feghali, Wajdi K <wajdi.k.feghali@intel.com>;
> > Gopal, Vinodh <vinodh.gopal@intel.com>
> > Subject: Re: [PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to
> > process multiple pages in a folio.
> >
> > On Wed, Nov 27, 2024 at 2:53 PM Kanchana P Sridhar
> > <kanchana.p.sridhar@intel.com> wrote:
> > >
> > > Modified zswap_store() to store the folio in batches of
> > > SWAP_CRYPTO_BATCH_SIZE pages. Accordingly, refactored
> > zswap_store_page()
> > > into zswap_store_pages() that processes a range of pages in the folio.
> > > zswap_store_pages() is a vectorized version of zswap_store_page().
> > >
> > > For now, zswap_store_pages() will sequentially compress these pages with
> > > zswap_compress().
> > >
> > > These changes are follow-up to code review comments received for [1], and
> > > are intended to set up zswap_store() for batching with Intel IAA.
> > >
> > > [1]: https://patchwork.kernel.org/project/linux-
> > mm/patch/20241123070127.332773-11-kanchana.p.sridhar@intel.com/
> > >
> > > Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
> > > ---
> > >  include/linux/zswap.h |   1 +
> > >  mm/zswap.c            | 154 ++++++++++++++++++++++++------------------
> > >  2 files changed, 88 insertions(+), 67 deletions(-)
> > >
> > > diff --git a/include/linux/zswap.h b/include/linux/zswap.h
> > > index d961ead91bf1..05a81e750744 100644
> > > --- a/include/linux/zswap.h
> > > +++ b/include/linux/zswap.h
> > > @@ -7,6 +7,7 @@
> > >
> > >  struct lruvec;
> > >
> > > +#define SWAP_CRYPTO_BATCH_SIZE 8UL
> > >  extern atomic_long_t zswap_stored_pages;
> > >
> > >  #ifdef CONFIG_ZSWAP
> > > diff --git a/mm/zswap.c b/mm/zswap.c
> > > index f6316b66fb23..b09d1023e775 100644
> > > --- a/mm/zswap.c
> > > +++ b/mm/zswap.c
> > > @@ -1409,78 +1409,96 @@ static void shrink_worker(struct work_struct
> > *w)
> > >  * main API
> > >  **********************************/
> > >
> > > -static ssize_t zswap_store_page(struct page *page,
> > > -                               struct obj_cgroup *objcg,
> > > -                               struct zswap_pool *pool)
> > > +/*
> > > + * Store multiple pages in @folio, starting from the page at index @si up to
> > > + * and including the page at index @ei.
> > > + */
> > > +static ssize_t zswap_store_pages(struct folio *folio,
> > > +                                long si,
> > > +                                long ei,
> > > +                                struct obj_cgroup *objcg,
> > > +                                struct zswap_pool *pool)
> > >  {
> > > -       swp_entry_t page_swpentry = page_swap_entry(page);
> > > +       struct page *page;
> > > +       swp_entry_t page_swpentry;
> > >         struct zswap_entry *entry, *old;
> > > +       size_t compressed_bytes = 0;
> > > +       u8 nr_pages = ei - si + 1;
> > > +       u8 i;
> > > +
> > > +       for (i = 0; i < nr_pages; ++i) {
> > > +               page = folio_page(folio, si + i);
> > > +               page_swpentry = page_swap_entry(page);
> > > +
> > > +               /* allocate entry */
> > > +               entry = zswap_entry_cache_alloc(GFP_KERNEL,
> > page_to_nid(page));
> > > +               if (!entry) {
> > > +                       zswap_reject_kmemcache_fail++;
> > > +                       return -EINVAL;
> > > +               }
> >
> > I think this patch is wrong on its own, for example if an allocation
> > fails in the above loop we exit without cleaning up previous
> > allocations. I think it's fixed in patch 2 but we cannot introduce
>
> I think there might be a misunderstanding. zswap_store_pages() will
> clean up local resources allocated during an iteration of the for loop,
> upon an error in that iteration. If you see the "goto store_failed" and
> "goto compress_failed" this would explain what I mean. If an allocation
> fails for an iteration, we simply return -EINVAL, and zswap_store()
> will goto the "put_pool" label with "ret" still false, which will delete
> all zswap entries for the folio (allocated up until the error iteration in
> zswap_store_pages(); or potentially already in the xarray).
>
> Hence, there is no bug and each of the two patches are correct by
> themselves AFAICT, but please let me know if I am missing anything. Thanks!

Uh yes, the cleanup is in zswap_store().

>
> > bugs in-between patches. I think the helpers in patch 2 don't really
> > help as I mentioned. Please combine the changes and keep them in the
> > main series (unless you have a reason not to).
>
> Sure. As noted in my earlier response to comments received for patch 2,
> I can either inline all iterations or create helpers for all iterations over
> the pages in a batch. Appreciate your suggestions on which would be a
> better approach.

I think leaving them open-coded will be clearer for now. We can
revisit the code organization later if it gets out of hand.

RE: [PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to process multiple pages in a folio.

Posted by Sridhar, Kanchana P 1 year, 2 months ago

> -----Original Message-----
> From: Yosry Ahmed <yosryahmed@google.com>
> Sent: Monday, December 2, 2024 9:34 PM
> To: Sridhar, Kanchana P <kanchana.p.sridhar@intel.com>
> Cc: linux-kernel@vger.kernel.org; linux-mm@kvack.org;
> hannes@cmpxchg.org; nphamcs@gmail.com; chengming.zhou@linux.dev;
> usamaarif642@gmail.com; ryan.roberts@arm.com; 21cnbao@gmail.com;
> akpm@linux-foundation.org; Feghali, Wajdi K <wajdi.k.feghali@intel.com>;
> Gopal, Vinodh <vinodh.gopal@intel.com>
> Subject: Re: [PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to
> process multiple pages in a folio.
> 
> On Mon, Dec 2, 2024 at 5:13 PM Sridhar, Kanchana P
> <kanchana.p.sridhar@intel.com> wrote:
> >
> >
> > > -----Original Message-----
> > > From: Yosry Ahmed <yosryahmed@google.com>
> > > Sent: Monday, December 2, 2024 11:34 AM
> > > To: Sridhar, Kanchana P <kanchana.p.sridhar@intel.com>
> > > Cc: linux-kernel@vger.kernel.org; linux-mm@kvack.org;
> > > hannes@cmpxchg.org; nphamcs@gmail.com;
> chengming.zhou@linux.dev;
> > > usamaarif642@gmail.com; ryan.roberts@arm.com; 21cnbao@gmail.com;
> > > akpm@linux-foundation.org; Feghali, Wajdi K
> <wajdi.k.feghali@intel.com>;
> > > Gopal, Vinodh <vinodh.gopal@intel.com>
> > > Subject: Re: [PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to
> > > process multiple pages in a folio.
> > >
> > > On Wed, Nov 27, 2024 at 2:53 PM Kanchana P Sridhar
> > > <kanchana.p.sridhar@intel.com> wrote:
> > > >
> > > > Modified zswap_store() to store the folio in batches of
> > > > SWAP_CRYPTO_BATCH_SIZE pages. Accordingly, refactored
> > > zswap_store_page()
> > > > into zswap_store_pages() that processes a range of pages in the folio.
> > > > zswap_store_pages() is a vectorized version of zswap_store_page().
> > > >
> > > > For now, zswap_store_pages() will sequentially compress these pages
> with
> > > > zswap_compress().
> > > >
> > > > These changes are follow-up to code review comments received for [1],
> and
> > > > are intended to set up zswap_store() for batching with Intel IAA.
> > > >
> > > > [1]: https://patchwork.kernel.org/project/linux-
> > > mm/patch/20241123070127.332773-11-kanchana.p.sridhar@intel.com/
> > > >
> > > > Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
> > > > ---
> > > >  include/linux/zswap.h |   1 +
> > > >  mm/zswap.c            | 154 ++++++++++++++++++++++++------------------
> > > >  2 files changed, 88 insertions(+), 67 deletions(-)
> > > >
> > > > diff --git a/include/linux/zswap.h b/include/linux/zswap.h
> > > > index d961ead91bf1..05a81e750744 100644
> > > > --- a/include/linux/zswap.h
> > > > +++ b/include/linux/zswap.h
> > > > @@ -7,6 +7,7 @@
> > > >
> > > >  struct lruvec;
> > > >
> > > > +#define SWAP_CRYPTO_BATCH_SIZE 8UL
> > > >  extern atomic_long_t zswap_stored_pages;
> > > >
> > > >  #ifdef CONFIG_ZSWAP
> > > > diff --git a/mm/zswap.c b/mm/zswap.c
> > > > index f6316b66fb23..b09d1023e775 100644
> > > > --- a/mm/zswap.c
> > > > +++ b/mm/zswap.c
> > > > @@ -1409,78 +1409,96 @@ static void shrink_worker(struct
> work_struct
> > > *w)
> > > >  * main API
> > > >  **********************************/
> > > >
> > > > -static ssize_t zswap_store_page(struct page *page,
> > > > -                               struct obj_cgroup *objcg,
> > > > -                               struct zswap_pool *pool)
> > > > +/*
> > > > + * Store multiple pages in @folio, starting from the page at index @si
> up to
> > > > + * and including the page at index @ei.
> > > > + */
> > > > +static ssize_t zswap_store_pages(struct folio *folio,
> > > > +                                long si,
> > > > +                                long ei,
> > > > +                                struct obj_cgroup *objcg,
> > > > +                                struct zswap_pool *pool)
> > > >  {
> > > > -       swp_entry_t page_swpentry = page_swap_entry(page);
> > > > +       struct page *page;
> > > > +       swp_entry_t page_swpentry;
> > > >         struct zswap_entry *entry, *old;
> > > > +       size_t compressed_bytes = 0;
> > > > +       u8 nr_pages = ei - si + 1;
> > > > +       u8 i;
> > > > +
> > > > +       for (i = 0; i < nr_pages; ++i) {
> > > > +               page = folio_page(folio, si + i);
> > > > +               page_swpentry = page_swap_entry(page);
> > > > +
> > > > +               /* allocate entry */
> > > > +               entry = zswap_entry_cache_alloc(GFP_KERNEL,
> > > page_to_nid(page));
> > > > +               if (!entry) {
> > > > +                       zswap_reject_kmemcache_fail++;
> > > > +                       return -EINVAL;
> > > > +               }
> > >
> > > I think this patch is wrong on its own, for example if an allocation
> > > fails in the above loop we exit without cleaning up previous
> > > allocations. I think it's fixed in patch 2 but we cannot introduce
> >
> > I think there might be a misunderstanding. zswap_store_pages() will
> > clean up local resources allocated during an iteration of the for loop,
> > upon an error in that iteration. If you see the "goto store_failed" and
> > "goto compress_failed" this would explain what I mean. If an allocation
> > fails for an iteration, we simply return -EINVAL, and zswap_store()
> > will goto the "put_pool" label with "ret" still false, which will delete
> > all zswap entries for the folio (allocated up until the error iteration in
> > zswap_store_pages(); or potentially already in the xarray).
> >
> > Hence, there is no bug and each of the two patches are correct by
> > themselves AFAICT, but please let me know if I am missing anything.
> Thanks!
> 
> Uh yes, the cleanup is in zswap_store().
> 
> >
> > > bugs in-between patches. I think the helpers in patch 2 don't really
> > > help as I mentioned. Please combine the changes and keep them in the
> > > main series (unless you have a reason not to).
> >
> > Sure. As noted in my earlier response to comments received for patch 2,
> > I can either inline all iterations or create helpers for all iterations over
> > the pages in a batch. Appreciate your suggestions on which would be a
> > better approach.
> 
> I think leaving them open-coded will be clearer for now. We can
> revisit the code organization later if it gets out of hand.

Sounds good!

Thanks,
Kanchana

[PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to process multiple pages in a folio.
[PATCH v1 2/2] mm: zswap: zswap_store_pages() simplifications for batching.