From nobody Fri Dec 19 00:18:08 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2D97DC46CA7 for ; Wed, 6 Dec 2023 09:46:40 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1377271AbjLFJqb (ORCPT ); Wed, 6 Dec 2023 04:46:31 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57126 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1377255AbjLFJq3 (ORCPT ); Wed, 6 Dec 2023 04:46:29 -0500 Received: from out-184.mta1.migadu.com (out-184.mta1.migadu.com [95.215.58.184]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2C2EFBA for ; Wed, 6 Dec 2023 01:46:35 -0800 (PST) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. From: Chengming Zhou Date: Wed, 06 Dec 2023 09:46:24 +0000 Subject: [PATCH 1/7] mm/zswap: make sure each swapfile always have zswap rb-tree MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20231206-zswap-lock-optimize-v1-1-e25b059f9c3a@bytedance.com> References: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> In-Reply-To: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> To: Vitaly Wool , Nhat Pham , Johannes Weiner , Michal Hocko , Seth Jennings , Dan Streetman , Andrew Morton , Yosry Ahmed Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Chengming Zhou X-Developer-Signature: v=1; a=ed25519-sha256; t=1701855988; l=3698; i=zhouchengming@bytedance.com; s=20231204; h=from:subject:message-id; bh=PZN0xLXpa6FJXo43SkR7sRqIVKPQ5jd6uVBrSVx9EAE=; b=/IE8MsA85GAZ6MZCk9fTWWd8zckSLavOzPjJFd5hfxDZQxaW88TjMxNyw9krvi0fXcdtJlBTb 6fS45gqAMpsAiMJo+PpOcT2QfWAipRf/YXZ1gMVtSGgZ7M7zveNY0XP X-Developer-Key: i=zhouchengming@bytedance.com; a=ed25519; pk=xFTmRtMG3vELGJBUiml7OYNdM393WOMv0iWWeQEVVdA= X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Not all zswap interfaces can handle the absence of the zswap rb-tree, actually only zswap_store() has handled it for now. To make things simple, we make sure each swapfile always have the zswap rb-tree prepared before being enabled and used. The preparation is unlikely to fail in practice, this patch just make it explicit. Signed-off-by: Chengming Zhou --- include/linux/zswap.h | 4 ++-- mm/swapfile.c | 10 +++++++--- mm/zswap.c | 7 ++++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/include/linux/zswap.h b/include/linux/zswap.h index 08c240e16a01..7cccc02cb9e9 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -30,7 +30,7 @@ struct zswap_lruvec_state { bool zswap_store(struct folio *folio); bool zswap_load(struct folio *folio); void zswap_invalidate(int type, pgoff_t offset); -void zswap_swapon(int type); +int zswap_swapon(int type); void zswap_swapoff(int type); void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg); void zswap_lruvec_state_init(struct lruvec *lruvec); @@ -50,7 +50,7 @@ static inline bool zswap_load(struct folio *folio) } =20 static inline void zswap_invalidate(int type, pgoff_t offset) {} -static inline void zswap_swapon(int type) {} +static inline int zswap_swapon(int type) {} static inline void zswap_swapoff(int type) {} static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {} static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {} diff --git a/mm/swapfile.c b/mm/swapfile.c index 8be70912e298..939e7590feda 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2346,8 +2346,6 @@ static void enable_swap_info(struct swap_info_struct = *p, int prio, unsigned char *swap_map, struct swap_cluster_info *cluster_info) { - zswap_swapon(p->type); - spin_lock(&swap_lock); spin_lock(&p->lock); setup_swap_info(p, prio, swap_map, cluster_info); @@ -3165,6 +3163,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, special= file, int, swap_flags) if (error) goto bad_swap_unlock_inode; =20 + error =3D zswap_swapon(p->type); + if (error) + goto free_swap_address_space; + /* * Flush any pending IO and dirty mappings before we start using this * swap device. @@ -3173,7 +3175,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialf= ile, int, swap_flags) error =3D inode_drain_writes(inode); if (error) { inode->i_flags &=3D ~S_SWAPFILE; - goto free_swap_address_space; + goto free_swap_zswap; } =20 mutex_lock(&swapon_mutex); @@ -3197,6 +3199,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialf= ile, int, swap_flags) =20 error =3D 0; goto out; +free_swap_zswap: + zswap_swapoff(p->type); free_swap_address_space: exit_swap_address_space(p->type); bad_swap_unlock_inode: diff --git a/mm/zswap.c b/mm/zswap.c index 0f086ffd7b6a..5e2b8d5ee33b 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1560,7 +1560,7 @@ bool zswap_store(struct folio *folio) if (folio_test_large(folio)) return false; =20 - if (!zswap_enabled || !tree) + if (!zswap_enabled) return false; =20 /* @@ -1850,19 +1850,20 @@ void zswap_invalidate(int type, pgoff_t offset) spin_unlock(&tree->lock); } =20 -void zswap_swapon(int type) +int zswap_swapon(int type) { struct zswap_tree *tree; =20 tree =3D kzalloc(sizeof(*tree), GFP_KERNEL); if (!tree) { pr_err("alloc failed, zswap disabled for swap type %d\n", type); - return; + return -ENOMEM; } =20 tree->rbroot =3D RB_ROOT; spin_lock_init(&tree->lock); zswap_trees[type] =3D tree; + return 0; } =20 void zswap_swapoff(int type) --=20 b4 0.10.1 From nobody Fri Dec 19 00:18:08 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5475BC10F04 for ; Wed, 6 Dec 2023 09:46:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1377287AbjLFJqf (ORCPT ); Wed, 6 Dec 2023 04:46:35 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57134 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1377267AbjLFJqb (ORCPT ); Wed, 6 Dec 2023 04:46:31 -0500 Received: from out-178.mta1.migadu.com (out-178.mta1.migadu.com [95.215.58.178]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4CA4EB9 for ; Wed, 6 Dec 2023 01:46:37 -0800 (PST) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. From: Chengming Zhou Date: Wed, 06 Dec 2023 09:46:25 +0000 Subject: [PATCH 2/7] mm/zswap: split zswap rb-tree MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20231206-zswap-lock-optimize-v1-2-e25b059f9c3a@bytedance.com> References: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> In-Reply-To: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> To: Vitaly Wool , Nhat Pham , Johannes Weiner , Michal Hocko , Seth Jennings , Dan Streetman , Andrew Morton , Yosry Ahmed Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Chengming Zhou X-Developer-Signature: v=1; a=ed25519-sha256; t=1701855988; l=6878; i=zhouchengming@bytedance.com; s=20231204; h=from:subject:message-id; bh=91S6pF6My/bUROdLpQALha8Vf8LlqYuhabIDQmuzuWI=; b=wPMG7SuyQz02Gmf6/zoPkNXypnlp9pzAs3LaGP2c2Co5uxHZeHfDwL1yFMzUvPUEcTV7ChSyQ ajsHFI83VQLCQGcg29JkEtY4nc6utk5FaXQG6gU7Y5SrzXyxOn7RFyY X-Developer-Key: i=zhouchengming@bytedance.com; a=ed25519; pk=xFTmRtMG3vELGJBUiml7OYNdM393WOMv0iWWeQEVVdA= X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Each swapfile has one rb-tree to search the mapping of swp_entry_t to zswap_entry, that use a spinlock to protect, which can cause heavy lock contention if multiple tasks zswap_store/load concurrently. Optimize the scalability problem by splitting the zswap rb-tree into multiple rb-trees, each corresponds to SWAP_ADDRESS_SPACE_PAGES (64M), just like we did in the swap cache address_space splitting. Signed-off-by: Chengming Zhou --- include/linux/zswap.h | 4 +-- mm/swapfile.c | 2 +- mm/zswap.c | 69 ++++++++++++++++++++++++++++++++---------------= ---- 3 files changed, 47 insertions(+), 28 deletions(-) diff --git a/include/linux/zswap.h b/include/linux/zswap.h index 7cccc02cb9e9..d3a8bc300b70 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -30,7 +30,7 @@ struct zswap_lruvec_state { bool zswap_store(struct folio *folio); bool zswap_load(struct folio *folio); void zswap_invalidate(int type, pgoff_t offset); -int zswap_swapon(int type); +int zswap_swapon(int type, unsigned long nr_pages); void zswap_swapoff(int type); void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg); void zswap_lruvec_state_init(struct lruvec *lruvec); @@ -50,7 +50,7 @@ static inline bool zswap_load(struct folio *folio) } =20 static inline void zswap_invalidate(int type, pgoff_t offset) {} -static inline int zswap_swapon(int type) {} +static inline int zswap_swapon(int type, unsigned long nr_pages) {} static inline void zswap_swapoff(int type) {} static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {} static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {} diff --git a/mm/swapfile.c b/mm/swapfile.c index 939e7590feda..da8367a3e076 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3163,7 +3163,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialf= ile, int, swap_flags) if (error) goto bad_swap_unlock_inode; =20 - error =3D zswap_swapon(p->type); + error =3D zswap_swapon(p->type, maxpages); if (error) goto free_swap_address_space; =20 diff --git a/mm/zswap.c b/mm/zswap.c index 5e2b8d5ee33b..a6b4859a0164 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -234,6 +234,7 @@ struct zswap_tree { }; =20 static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; +static unsigned int nr_zswap_trees[MAX_SWAPFILES]; =20 /* RCU-protected iteration */ static LIST_HEAD(zswap_pools); @@ -260,6 +261,10 @@ static bool zswap_has_pool; * helpers and fwd declarations **********************************/ =20 +#define swap_zswap_tree(entry) \ + (&zswap_trees[swp_type(entry)][swp_offset(entry) \ + >> SWAP_ADDRESS_SPACE_SHIFT]) + #define zswap_pool_debug(msg, p) \ pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ zpool_get_type((p)->zpools[0])) @@ -885,7 +890,7 @@ static enum lru_status shrink_memcg_cb(struct list_head= *item, struct list_lru_o * until the entry is verified to still be alive in the tree. */ swpoffset =3D swp_offset(entry->swpentry); - tree =3D zswap_trees[swp_type(entry->swpentry)]; + tree =3D swap_zswap_tree(entry->swpentry); list_lru_isolate(l, item); /* * It's safe to drop the lock here because we return either @@ -1535,10 +1540,9 @@ static void zswap_fill_page(void *ptr, unsigned long= value) bool zswap_store(struct folio *folio) { swp_entry_t swp =3D folio->swap; - int type =3D swp_type(swp); pgoff_t offset =3D swp_offset(swp); struct page *page =3D &folio->page; - struct zswap_tree *tree =3D zswap_trees[type]; + struct zswap_tree *tree =3D swap_zswap_tree(swp); struct zswap_entry *entry, *dupentry; struct scatterlist input, output; struct crypto_acomp_ctx *acomp_ctx; @@ -1610,7 +1614,7 @@ bool zswap_store(struct folio *folio) src =3D kmap_local_page(page); if (zswap_is_page_same_filled(src, &value)) { kunmap_local(src); - entry->swpentry =3D swp_entry(type, offset); + entry->swpentry =3D swp; entry->length =3D 0; entry->value =3D value; atomic_inc(&zswap_same_filled_pages); @@ -1688,7 +1692,7 @@ bool zswap_store(struct folio *folio) mutex_unlock(acomp_ctx->mutex); =20 /* populate entry */ - entry->swpentry =3D swp_entry(type, offset); + entry->swpentry =3D swp; entry->handle =3D handle; entry->length =3D dlen; =20 @@ -1748,10 +1752,9 @@ bool zswap_store(struct folio *folio) bool zswap_load(struct folio *folio) { swp_entry_t swp =3D folio->swap; - int type =3D swp_type(swp); pgoff_t offset =3D swp_offset(swp); struct page *page =3D &folio->page; - struct zswap_tree *tree =3D zswap_trees[type]; + struct zswap_tree *tree =3D swap_zswap_tree(swp); struct zswap_entry *entry; struct scatterlist input, output; struct crypto_acomp_ctx *acomp_ctx; @@ -1835,7 +1838,7 @@ bool zswap_load(struct folio *folio) =20 void zswap_invalidate(int type, pgoff_t offset) { - struct zswap_tree *tree =3D zswap_trees[type]; + struct zswap_tree *tree =3D swap_zswap_tree(swp_entry(type, offset)); struct zswap_entry *entry; =20 /* find */ @@ -1850,37 +1853,53 @@ void zswap_invalidate(int type, pgoff_t offset) spin_unlock(&tree->lock); } =20 -int zswap_swapon(int type) +int zswap_swapon(int type, unsigned long nr_pages) { - struct zswap_tree *tree; + struct zswap_tree *trees, *tree; + unsigned int nr, i; =20 - tree =3D kzalloc(sizeof(*tree), GFP_KERNEL); - if (!tree) { + nr =3D DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); + trees =3D kvcalloc(nr, sizeof(*tree), GFP_KERNEL); + if (!trees) { pr_err("alloc failed, zswap disabled for swap type %d\n", type); return -ENOMEM; } =20 - tree->rbroot =3D RB_ROOT; - spin_lock_init(&tree->lock); - zswap_trees[type] =3D tree; + for (i =3D 0; i < nr; i++) { + tree =3D trees + i; + tree->rbroot =3D RB_ROOT; + spin_lock_init(&tree->lock); + } + + nr_zswap_trees[type] =3D nr; + zswap_trees[type] =3D trees; return 0; } =20 void zswap_swapoff(int type) { - struct zswap_tree *tree =3D zswap_trees[type]; - struct zswap_entry *entry, *n; + struct zswap_tree *trees =3D zswap_trees[type]; + unsigned int i; =20 - if (!tree) + if (!trees) return; =20 - /* walk the tree and free everything */ - spin_lock(&tree->lock); - rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) - zswap_free_entry(entry); - tree->rbroot =3D RB_ROOT; - spin_unlock(&tree->lock); - kfree(tree); + for (i =3D 0; i < nr_zswap_trees[type]; i++) { + struct zswap_tree *tree =3D trees + i; + struct zswap_entry *entry, *n; + + /* walk the tree and free everything */ + spin_lock(&tree->lock); + rbtree_postorder_for_each_entry_safe(entry, n, + &tree->rbroot, + rbnode) + zswap_free_entry(entry); + tree->rbroot =3D RB_ROOT; + spin_unlock(&tree->lock); + } + + kvfree(trees); + nr_zswap_trees[type] =3D 0; zswap_trees[type] =3D NULL; } =20 --=20 b4 0.10.1 From nobody Fri Dec 19 00:18:08 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1A7CEC10DC1 for ; Wed, 6 Dec 2023 09:46:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1377282AbjLFJqh (ORCPT ); Wed, 6 Dec 2023 04:46:37 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33404 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1377274AbjLFJqc (ORCPT ); Wed, 6 Dec 2023 04:46:32 -0500 Received: from out-171.mta1.migadu.com (out-171.mta1.migadu.com [95.215.58.171]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 28393D1 for ; Wed, 6 Dec 2023 01:46:39 -0800 (PST) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. From: Chengming Zhou Date: Wed, 06 Dec 2023 09:46:26 +0000 Subject: [PATCH 3/7] mm/zswap: reuse dstmem when decompress MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20231206-zswap-lock-optimize-v1-3-e25b059f9c3a@bytedance.com> References: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> In-Reply-To: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> To: Vitaly Wool , Nhat Pham , Johannes Weiner , Michal Hocko , Seth Jennings , Dan Streetman , Andrew Morton , Yosry Ahmed Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Chengming Zhou X-Developer-Signature: v=1; a=ed25519-sha256; t=1701855988; l=2376; i=zhouchengming@bytedance.com; s=20231204; h=from:subject:message-id; bh=fpCgC2uqFVZCuogrKgOI9EeZNXHQYwaI+M0J57wRCA4=; b=wudSyMvTqyzG7OZuJjTkGo+D9zCpS8Tofe/rODy3CP0YFRdLMSz4sWGiyGOubX4khTnGFFSyX Zm343RFwsfDDmmErMRr+FCJT56Wg4VhAosu9lLVrHVfRY+DTTGtIZ9b X-Developer-Key: i=zhouchengming@bytedance.com; a=ed25519; pk=xFTmRtMG3vELGJBUiml7OYNdM393WOMv0iWWeQEVVdA= X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org In the !zpool_can_sleep_mapped() case such as zsmalloc, we need to first copy the entry->handle memory to a temporary memory, which is allocated using kmalloc. Obviously we can reuse the per-compressor dstmem to avoid allocating every time, since it's percpu-compressor and protected in mutex. Signed-off-by: Chengming Zhou Reviewed-by: Nhat Pham --- mm/zswap.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index a6b4859a0164..d93a7b58b5af 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1758,9 +1758,9 @@ bool zswap_load(struct folio *folio) struct zswap_entry *entry; struct scatterlist input, output; struct crypto_acomp_ctx *acomp_ctx; - u8 *src, *dst, *tmp; + unsigned int dlen =3D PAGE_SIZE; + u8 *src, *dst; struct zpool *zpool; - unsigned int dlen; bool ret; =20 VM_WARN_ON_ONCE(!folio_test_locked(folio)); @@ -1782,27 +1782,18 @@ bool zswap_load(struct folio *folio) goto stats; } =20 - zpool =3D zswap_find_zpool(entry); - if (!zpool_can_sleep_mapped(zpool)) { - tmp =3D kmalloc(entry->length, GFP_KERNEL); - if (!tmp) { - ret =3D false; - goto freeentry; - } - } - /* decompress */ - dlen =3D PAGE_SIZE; - src =3D zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); + acomp_ctx =3D raw_cpu_ptr(entry->pool->acomp_ctx); + mutex_lock(acomp_ctx->mutex); =20 + zpool =3D zswap_find_zpool(entry); + src =3D zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); if (!zpool_can_sleep_mapped(zpool)) { - memcpy(tmp, src, entry->length); - src =3D tmp; + memcpy(acomp_ctx->dstmem, src, entry->length); + src =3D acomp_ctx->dstmem; zpool_unmap_handle(zpool, entry->handle); } =20 - acomp_ctx =3D raw_cpu_ptr(entry->pool->acomp_ctx); - mutex_lock(acomp_ctx->mutex); sg_init_one(&input, src, entry->length); sg_init_table(&output, 1); sg_set_page(&output, page, PAGE_SIZE, 0); @@ -1813,15 +1804,13 @@ bool zswap_load(struct folio *folio) =20 if (zpool_can_sleep_mapped(zpool)) zpool_unmap_handle(zpool, entry->handle); - else - kfree(tmp); =20 ret =3D true; stats: count_vm_event(ZSWPIN); if (entry->objcg) count_objcg_event(entry->objcg, ZSWPIN); -freeentry: + spin_lock(&tree->lock); if (ret && zswap_exclusive_loads_enabled) { zswap_invalidate_entry(tree, entry); --=20 b4 0.10.1 From nobody Fri Dec 19 00:18:08 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A899BC10F04 for ; Wed, 6 Dec 2023 09:46:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1377298AbjLFJqk (ORCPT ); Wed, 6 Dec 2023 04:46:40 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33572 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1377299AbjLFJqh (ORCPT ); Wed, 6 Dec 2023 04:46:37 -0500 Received: from out-176.mta1.migadu.com (out-176.mta1.migadu.com [IPv6:2001:41d0:203:375::b0]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A0FB8D53 for ; Wed, 6 Dec 2023 01:46:42 -0800 (PST) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. From: Chengming Zhou Date: Wed, 06 Dec 2023 09:46:27 +0000 Subject: [PATCH 4/7] mm/zswap: change dstmem size to one page MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20231206-zswap-lock-optimize-v1-4-e25b059f9c3a@bytedance.com> References: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> In-Reply-To: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> To: Vitaly Wool , Nhat Pham , Johannes Weiner , Michal Hocko , Seth Jennings , Dan Streetman , Andrew Morton , Yosry Ahmed Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Chengming Zhou X-Developer-Signature: v=1; a=ed25519-sha256; t=1701855988; l=1265; i=zhouchengming@bytedance.com; s=20231204; h=from:subject:message-id; bh=HqwNJM8tE/N/sls10yfGIW7SQYFQNs7CH/U23vKZJJA=; b=vLSavMUZY2NbU7Ht3zXNimzw+b1It8sOxPqmY7gDO7tM7+foam8clg5CtKs4bOcNaCYz3xPso WsTwNYwXlA9BQKJg3zkaJ53yg9R0yBfDoIjVp0o2j0FWuC1shqBwv4r X-Developer-Key: i=zhouchengming@bytedance.com; a=ed25519; pk=xFTmRtMG3vELGJBUiml7OYNdM393WOMv0iWWeQEVVdA= X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Maybe I missed something, but the dstmem size of 2 * PAGE_SIZE is very confusing, since we only need at most one page when compress, and the "dlen" is also PAGE_SIZE in acomp_request_set_params(). So change it to one page, and fix the comments. Signed-off-by: Chengming Zhou --- mm/zswap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index d93a7b58b5af..999671dcb469 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -699,7 +699,7 @@ static int zswap_dstmem_prepare(unsigned int cpu) struct mutex *mutex; u8 *dst; =20 - dst =3D kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); + dst =3D kmalloc_node(PAGE_SIZE, GFP_KERNEL, cpu_to_node(cpu)); if (!dst) return -ENOMEM; =20 @@ -1649,8 +1649,7 @@ bool zswap_store(struct folio *folio) sg_init_table(&input, 1); sg_set_page(&input, page, PAGE_SIZE, 0); =20 - /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */ - sg_init_one(&output, dst, PAGE_SIZE * 2); + sg_init_one(&output, dst, PAGE_SIZE); acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen= ); /* * it maybe looks a little bit silly that we send an asynchronous request, --=20 b4 0.10.1 From nobody Fri Dec 19 00:18:08 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 45A4CC4167B for ; Wed, 6 Dec 2023 09:46:52 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1377318AbjLFJqn (ORCPT ); Wed, 6 Dec 2023 04:46:43 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33546 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1377296AbjLFJqi (ORCPT ); Wed, 6 Dec 2023 04:46:38 -0500 Received: from out-170.mta1.migadu.com (out-170.mta1.migadu.com [95.215.58.170]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AAD6DD40 for ; Wed, 6 Dec 2023 01:46:43 -0800 (PST) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. From: Chengming Zhou Date: Wed, 06 Dec 2023 09:46:28 +0000 Subject: [PATCH 5/7] mm/zswap: refactor out __zswap_load() MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20231206-zswap-lock-optimize-v1-5-e25b059f9c3a@bytedance.com> References: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> In-Reply-To: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> To: Vitaly Wool , Nhat Pham , Johannes Weiner , Michal Hocko , Seth Jennings , Dan Streetman , Andrew Morton , Yosry Ahmed Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Chengming Zhou X-Developer-Signature: v=1; a=ed25519-sha256; t=1701855988; l=5290; i=zhouchengming@bytedance.com; s=20231204; h=from:subject:message-id; bh=ju3QBW8cgbkzkTilyJdKpdv6IgD35PF5PphWVt2TBXg=; b=sN2QzBgyvVSvfWoqMPvo+BA+Z/wX8hCyJ3NhGmP4yMNcUemm3UiYwXWjJiunDYw+3nKWn0X+s um4PqJ1hXePCeO2kEV8Gev6JvZihkII6L9Ti8GacRbiWlvx29DlAv96 X-Developer-Key: i=zhouchengming@bytedance.com; a=ed25519; pk=xFTmRtMG3vELGJBUiml7OYNdM393WOMv0iWWeQEVVdA= X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org The zswap_load() and zswap_writeback_entry() have the same part that decompress the data from zswap_entry to page, so refactor out the common part as __zswap_load(entry, page). Signed-off-by: Chengming Zhou Reviewed-by: Nhat Pham --- mm/zswap.c | 108 ++++++++++++++++++++++-----------------------------------= ---- 1 file changed, 39 insertions(+), 69 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index 999671dcb469..667b66a3911b 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1380,6 +1380,42 @@ static int zswap_enabled_param_set(const char *val, return ret; } =20 +static void __zswap_load(struct zswap_entry *entry, struct page *page) +{ + struct scatterlist input, output; + unsigned int dlen =3D PAGE_SIZE; + struct crypto_acomp_ctx *acomp_ctx; + struct zpool *zpool; + u8 *src; + int ret; + + /* decompress */ + acomp_ctx =3D raw_cpu_ptr(entry->pool->acomp_ctx); + mutex_lock(acomp_ctx->mutex); + + zpool =3D zswap_find_zpool(entry); + src =3D zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); + if (!zpool_can_sleep_mapped(zpool)) { + memcpy(acomp_ctx->dstmem, src, entry->length); + src =3D acomp_ctx->dstmem; + zpool_unmap_handle(zpool, entry->handle); + } + + sg_init_one(&input, src, entry->length); + sg_init_table(&output, 1); + sg_set_page(&output, page, PAGE_SIZE, 0); + acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, = dlen); + ret =3D crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_c= tx->wait); + dlen =3D acomp_ctx->req->dlen; + mutex_unlock(acomp_ctx->mutex); + + if (zpool_can_sleep_mapped(zpool)) + zpool_unmap_handle(zpool, entry->handle); + + BUG_ON(ret); + BUG_ON(dlen !=3D PAGE_SIZE); +} + /********************************* * writeback code **********************************/ @@ -1401,23 +1437,12 @@ static int zswap_writeback_entry(struct zswap_entry= *entry, swp_entry_t swpentry =3D entry->swpentry; struct page *page; struct mempolicy *mpol; - struct scatterlist input, output; - struct crypto_acomp_ctx *acomp_ctx; - struct zpool *pool =3D zswap_find_zpool(entry); bool page_was_allocated; - u8 *src, *tmp =3D NULL; - unsigned int dlen; int ret; struct writeback_control wbc =3D { .sync_mode =3D WB_SYNC_NONE, }; =20 - if (!zpool_can_sleep_mapped(pool)) { - tmp =3D kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!tmp) - return -ENOMEM; - } - /* try to allocate swap cache page */ mpol =3D get_task_policy(current); page =3D __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, @@ -1450,33 +1475,7 @@ static int zswap_writeback_entry(struct zswap_entry = *entry, } spin_unlock(&tree->lock); =20 - /* decompress */ - acomp_ctx =3D raw_cpu_ptr(entry->pool->acomp_ctx); - dlen =3D PAGE_SIZE; - - src =3D zpool_map_handle(pool, entry->handle, ZPOOL_MM_RO); - if (!zpool_can_sleep_mapped(pool)) { - memcpy(tmp, src, entry->length); - src =3D tmp; - zpool_unmap_handle(pool, entry->handle); - } - - mutex_lock(acomp_ctx->mutex); - sg_init_one(&input, src, entry->length); - sg_init_table(&output, 1); - sg_set_page(&output, page, PAGE_SIZE, 0); - acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, = dlen); - ret =3D crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_c= tx->wait); - dlen =3D acomp_ctx->req->dlen; - mutex_unlock(acomp_ctx->mutex); - - if (!zpool_can_sleep_mapped(pool)) - kfree(tmp); - else - zpool_unmap_handle(pool, entry->handle); - - BUG_ON(ret); - BUG_ON(dlen !=3D PAGE_SIZE); + __zswap_load(entry, page); =20 /* page is up to date */ SetPageUptodate(page); @@ -1496,9 +1495,6 @@ static int zswap_writeback_entry(struct zswap_entry *= entry, return ret; =20 fail: - if (!zpool_can_sleep_mapped(pool)) - kfree(tmp); - /* * If we get here because the page is already in swapcache, a * load may be happening concurrently. It is safe and okay to @@ -1755,11 +1751,7 @@ bool zswap_load(struct folio *folio) struct page *page =3D &folio->page; struct zswap_tree *tree =3D swap_zswap_tree(swp); struct zswap_entry *entry; - struct scatterlist input, output; - struct crypto_acomp_ctx *acomp_ctx; - unsigned int dlen =3D PAGE_SIZE; - u8 *src, *dst; - struct zpool *zpool; + u8 *dst; bool ret; =20 VM_WARN_ON_ONCE(!folio_test_locked(folio)); @@ -1781,29 +1773,7 @@ bool zswap_load(struct folio *folio) goto stats; } =20 - /* decompress */ - acomp_ctx =3D raw_cpu_ptr(entry->pool->acomp_ctx); - mutex_lock(acomp_ctx->mutex); - - zpool =3D zswap_find_zpool(entry); - src =3D zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); - if (!zpool_can_sleep_mapped(zpool)) { - memcpy(acomp_ctx->dstmem, src, entry->length); - src =3D acomp_ctx->dstmem; - zpool_unmap_handle(zpool, entry->handle); - } - - sg_init_one(&input, src, entry->length); - sg_init_table(&output, 1); - sg_set_page(&output, page, PAGE_SIZE, 0); - acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, = dlen); - if (crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->= wait)) - WARN_ON(1); - mutex_unlock(acomp_ctx->mutex); - - if (zpool_can_sleep_mapped(zpool)) - zpool_unmap_handle(zpool, entry->handle); - + __zswap_load(entry, page); ret =3D true; stats: count_vm_event(ZSWPIN); --=20 b4 0.10.1 From nobody Fri Dec 19 00:18:08 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 18723C10DC1 for ; Wed, 6 Dec 2023 09:46:54 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1377328AbjLFJqp (ORCPT ); Wed, 6 Dec 2023 04:46:45 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33594 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1377292AbjLFJqj (ORCPT ); Wed, 6 Dec 2023 04:46:39 -0500 Received: from out-184.mta1.migadu.com (out-184.mta1.migadu.com [IPv6:2001:41d0:203:375::b8]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C6595135 for ; Wed, 6 Dec 2023 01:46:45 -0800 (PST) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. From: Chengming Zhou Date: Wed, 06 Dec 2023 09:46:29 +0000 Subject: [PATCH 6/7] mm/zswap: cleanup zswap_load() MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20231206-zswap-lock-optimize-v1-6-e25b059f9c3a@bytedance.com> References: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> In-Reply-To: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> To: Vitaly Wool , Nhat Pham , Johannes Weiner , Michal Hocko , Seth Jennings , Dan Streetman , Andrew Morton , Yosry Ahmed Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Chengming Zhou X-Developer-Signature: v=1; a=ed25519-sha256; t=1701855988; l=1423; i=zhouchengming@bytedance.com; s=20231204; h=from:subject:message-id; bh=4dWwyqNx8c2zAiSbKiIuSiLwoPGaeKufk6rm1H1OjyU=; b=mZ9Iu4zkHefQk19MezNP9JCSi8K14mS0atJNAlahjaBU1aLDqTWfFZSjHbYanEfMEBZ7TUpMw gqC7DA3KfbRCEXYKYzA63WfdprZF2BjpqhLC6VPExYC46NfHkEsxMYf X-Developer-Key: i=zhouchengming@bytedance.com; a=ed25519; pk=xFTmRtMG3vELGJBUiml7OYNdM393WOMv0iWWeQEVVdA= X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org After the common decompress part goes to __zswap_load(), we can cleanup the zswap_load() a little. Signed-off-by: Chengming Zhou --- mm/zswap.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index 667b66a3911b..50405811cd7b 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1752,7 +1752,6 @@ bool zswap_load(struct folio *folio) struct zswap_tree *tree =3D swap_zswap_tree(swp); struct zswap_entry *entry; u8 *dst; - bool ret; =20 VM_WARN_ON_ONCE(!folio_test_locked(folio)); =20 @@ -1769,19 +1768,16 @@ bool zswap_load(struct folio *folio) dst =3D kmap_local_page(page); zswap_fill_page(dst, entry->value); kunmap_local(dst); - ret =3D true; - goto stats; + } else { + __zswap_load(entry, page); } =20 - __zswap_load(entry, page); - ret =3D true; -stats: count_vm_event(ZSWPIN); if (entry->objcg) count_objcg_event(entry->objcg, ZSWPIN); =20 spin_lock(&tree->lock); - if (ret && zswap_exclusive_loads_enabled) { + if (zswap_exclusive_loads_enabled) { zswap_invalidate_entry(tree, entry); folio_mark_dirty(folio); } else if (entry->length) { @@ -1791,7 +1787,7 @@ bool zswap_load(struct folio *folio) zswap_entry_put(tree, entry); spin_unlock(&tree->lock); =20 - return ret; + return true; } =20 void zswap_invalidate(int type, pgoff_t offset) --=20 b4 0.10.1 From nobody Fri Dec 19 00:18:08 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 97726C4167B for ; Wed, 6 Dec 2023 09:47:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1377362AbjLFJqy (ORCPT ); Wed, 6 Dec 2023 04:46:54 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52456 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1377321AbjLFJqn (ORCPT ); Wed, 6 Dec 2023 04:46:43 -0500 Received: from out-171.mta1.migadu.com (out-171.mta1.migadu.com [95.215.58.171]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C2EDDD67 for ; Wed, 6 Dec 2023 01:46:47 -0800 (PST) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. From: Chengming Zhou Date: Wed, 06 Dec 2023 09:46:30 +0000 Subject: [PATCH 7/7] mm/zswap: cleanup zswap_reclaim_entry() MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20231206-zswap-lock-optimize-v1-7-e25b059f9c3a@bytedance.com> References: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> In-Reply-To: <20231206-zswap-lock-optimize-v1-0-e25b059f9c3a@bytedance.com> To: Vitaly Wool , Nhat Pham , Johannes Weiner , Michal Hocko , Seth Jennings , Dan Streetman , Andrew Morton , Yosry Ahmed Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Chengming Zhou X-Developer-Signature: v=1; a=ed25519-sha256; t=1701855988; l=1963; i=zhouchengming@bytedance.com; s=20231204; h=from:subject:message-id; bh=WQp3TO7eGK4NEJ2KSQiW8NgcHiI75uDxqzzNLxV8SyA=; b=cX50JTACBKj/y4OSJ3fqN9XlNGNG8ePq/CbHqPflo46e6ZMG5upWYYRApku021YWMN1SgS3i9 B/xOj7S/bMGBm4R4og7jAUlPoBl15i7PcZIVp5JnUC1yxy4hhNQBj7H X-Developer-Key: i=zhouchengming@bytedance.com; a=ed25519; pk=xFTmRtMG3vELGJBUiml7OYNdM393WOMv0iWWeQEVVdA= X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Also after the common decompress part goes to __zswap_load(), we can cleanup the zswap_reclaim_entry() a little. Signed-off-by: Chengming Zhou --- mm/zswap.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index 50405811cd7b..d3fedda0d774 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1438,7 +1438,6 @@ static int zswap_writeback_entry(struct zswap_entry *= entry, struct page *page; struct mempolicy *mpol; bool page_was_allocated; - int ret; struct writeback_control wbc =3D { .sync_mode =3D WB_SYNC_NONE, }; @@ -1447,16 +1446,13 @@ static int zswap_writeback_entry(struct zswap_entry= *entry, mpol =3D get_task_policy(current); page =3D __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, NO_INTERLEAVE_INDEX, &page_was_allocated, true); - if (!page) { - ret =3D -ENOMEM; - goto fail; - } + if (!page) + return -ENOMEM; =20 /* Found an existing page, we raced with load/swapin */ if (!page_was_allocated) { put_page(page); - ret =3D -EEXIST; - goto fail; + return -EEXIST; } =20 /* @@ -1470,8 +1466,7 @@ static int zswap_writeback_entry(struct zswap_entry *= entry, if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) !=3D entr= y) { spin_unlock(&tree->lock); delete_from_swap_cache(page_folio(page)); - ret =3D -ENOMEM; - goto fail; + return -ENOMEM; } spin_unlock(&tree->lock); =20 @@ -1492,15 +1487,7 @@ static int zswap_writeback_entry(struct zswap_entry = *entry, __swap_writepage(page, &wbc); put_page(page); =20 - return ret; - -fail: - /* - * If we get here because the page is already in swapcache, a - * load may be happening concurrently. It is safe and okay to - * not free the entry. It is also okay to return !0. - */ - return ret; + return 0; } =20 static int zswap_is_page_same_filled(void *ptr, unsigned long *value) --=20 b4 0.10.1