From nobody Fri Dec 19 15:19:31 2025 Received: from mail-pl1-f173.google.com (mail-pl1-f173.google.com [209.85.214.173]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A9FEB1DA10F for ; Wed, 4 Sep 2024 13:25:17 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.173 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1725456319; cv=none; b=WC/PKvm1djvFMwVxykJfvDFgg7dUx9DCfQ/NszNjTh0xVaLN+HxDaUgjTPKBM94gvPjDPbPa7Iu5yzdARLPrCfq/MetDQgQ0R+igq4irrNeGNsGOUfbYo2TMz5nj/AORAyx1ZqgfMnssaSTKfIt/sK7OtClFNQh2AIC03SI9D08= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1725456319; c=relaxed/simple; bh=lUHRhX9XC9fid7bK5peUhG1lZWv1daGrfzSvhVwFqcM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=KeWsJJqoayM/K797Pii36zk5x9t8swQ4P4fTYBSHzpWP0PEra5e+WQDjl9EQd+Dij2gd8HMSaV4F1fRyKe4W31JGPOnFGdJZUfHVaM18wRtm2RR5+GtpMrt2rXhHD544IgieQWyI5tqnNOVeTsx/++KiN1An/H8WvtvrQkko5aA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=chromium.org; spf=pass smtp.mailfrom=chromium.org; dkim=pass (1024-bit key) header.d=chromium.org header.i=@chromium.org header.b=ZEJFMCVi; arc=none smtp.client-ip=209.85.214.173 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=chromium.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=chromium.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=chromium.org header.i=@chromium.org header.b="ZEJFMCVi" Received: by mail-pl1-f173.google.com with SMTP id d9443c01a7336-20573eb852aso5247925ad.1 for ; Wed, 04 Sep 2024 06:25:17 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=chromium.org; s=google; t=1725456317; x=1726061117; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=hIs7MeinccseyjglpvttZenxTG40vvELLgrx+T/yVvc=; b=ZEJFMCViEDHbt1YyUNDDLPQoWJAzMoaJfAY6UuutZ004b9as3AFL/YID/d8dr3beuo AQDSksXr5jSYNWd6RRFtTs6nIv08KVnNgiV0a/rV3mkLoHgr1TryvbTEj6awAOJUlqfi 9ABKaGUdnSzzlZil2F6decvTBJDj7V2t6UVkU= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1725456317; x=1726061117; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=hIs7MeinccseyjglpvttZenxTG40vvELLgrx+T/yVvc=; b=wulg5tIY64z6KAccvbHlwJN0aKimlCFTflLZLTcTxL2ZxnMtffAjytDVPMf5OAqWuT aWTQPdXsQK2XwAtAngk8/VRe2y3n6Ky68S82z/3EKYM8qwdwkRiJcVyr2446x/T0DXe+ RgHeZ04QtKc6zBoHR07tI3G/4D9yFrHbrl8TJjte5md8Z/4+Nf8XUlBy377To2nMCA/+ mdhKTzliHSjTcPY8LnCndmOfSmfehyzAVq+ppauGqw3lHuWNEdGfqSXX5nn4Opbc5SaD QM/d97wNFn5HvWYawscqqm0JEUhnkI/v0A8NWgCkTeqA4cKYVUe6umuKrNc9kNhiHYPJ cFRQ== X-Forwarded-Encrypted: i=1; AJvYcCUDkvZlKx7JM66I3myIYoMvBhOocMsF36vlU0r8JxyIFW3x1SNKT7aslYAGttv5IRPyY54mzzQ4SupAsMg=@vger.kernel.org X-Gm-Message-State: AOJu0Yxd2kFxYbJLQutk9e/hyId8xNm0p9X+hGSsTxrBm4jJiJ2IlH4G FSE8w8mDRE4293sxhMQK0btKvY96sG50JrPCJwm7ARV+3ekNabesD76bhPWe+g== X-Google-Smtp-Source: AGHT+IERVgWhVCAuZsrsR3gmDMnf4CmrbR/HweyKQX8nGmOOyQPKMPzjm6YZWkK8KhvQN/oXoHzt3A== X-Received: by 2002:a17:903:283:b0:205:76c1:3742 with SMTP id d9443c01a7336-206b7d00d3amr38467745ad.3.1725456316924; Wed, 04 Sep 2024 06:25:16 -0700 (PDT) Received: from tigerii.tok.corp.google.com ([2401:fa00:8f:203:4132:a2a2:35bc:acba]) by smtp.gmail.com with ESMTPSA id d9443c01a7336-206aea54e7bsm13479215ad.183.2024.09.04.06.25.15 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 04 Sep 2024 06:25:16 -0700 (PDT) From: Sergey Senozhatsky To: Minchan Kim Cc: Andrew Morton , Richard Chang , linux-kernel@vger.kernel.org, Sergey Senozhatsky Subject: [RFC PATCH 1/3] zram: introduce ZRAM_PP_SLOT flag Date: Wed, 4 Sep 2024 22:24:53 +0900 Message-ID: <20240904132508.2000743-2-senozhatsky@chromium.org> X-Mailer: git-send-email 2.46.0.469.g59c65b2a67-goog In-Reply-To: <20240904132508.2000743-1-senozhatsky@chromium.org> References: <20240904132508.2000743-1-senozhatsky@chromium.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" This flag will indicate that the slot was selected as a candidate slot for post-processing (pp) and was assigned to a pp group. Signed-off-by: Sergey Senozhatsky --- drivers/block/zram/zram_drv.c | 3 +++ drivers/block/zram/zram_drv.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1f1bf175a6c3..c91fdf399d1b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1368,6 +1368,9 @@ static void zram_free_page(struct zram *zram, size_t = index) goto out; } =20 + if (zram_test_flag(zram, index, ZRAM_PP_SLOT)) + zram_clear_flag(zram, index, ZRAM_PP_SLOT); + handle =3D zram_get_handle(zram, index); if (!handle) return; diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index b976824ead67..e0578b3542ce 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -50,6 +50,7 @@ enum zram_pageflags { ZRAM_SAME, /* Page consists the same element */ ZRAM_WB, /* page is stored on backing_device */ ZRAM_UNDER_WB, /* page is under writeback */ + ZRAM_PP_SLOT, /* Selected for post-processing */ ZRAM_HUGE, /* Incompressible page */ ZRAM_IDLE, /* not accessed page since last idle marking */ ZRAM_INCOMPRESSIBLE, /* none of the algorithms could compress it */ --=20 2.46.0.469.g59c65b2a67-goog From nobody Fri Dec 19 15:19:31 2025 Received: from mail-pl1-f182.google.com (mail-pl1-f182.google.com [209.85.214.182]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 44B161E884 for ; Wed, 4 Sep 2024 13:25:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.182 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1725456321; cv=none; b=nEeowZXG8trPAY1yOG/pdp1fDxBUmkb9MNZiSK8i+XUNIO5hEVgAKRA8Q03/ciQoMvRfZuzcAJ4F/AaLBY4haOSmPdVmGGExQuPADEqQ7yijo6bo9lWuaFFE1+f8jnQixdqPcO9xSgMQmhTJpqzIN36Rcu1HSifXXwdOq0Rk+pY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1725456321; c=relaxed/simple; bh=H51Ud3CZIyk+NjFn1860hGhDUtqgBzkBz8CLbDVsRPQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=dWXfKPG3uk6m7OScmQGq+u1Rc92vGusYM+C9ygchsEIIiA1vfQNZLzbwsHtHZ165BxBXScT1MusqkKikx12CcEsb7S8xnOiA7RKOf+jBlLXM6ROFOuKnBozHTOxYM2rLe+FQSLBcDiwlEEFF9uKVZT6D9HLfhAb1uIlMbCfoqzs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=chromium.org; spf=pass smtp.mailfrom=chromium.org; dkim=pass (1024-bit key) header.d=chromium.org header.i=@chromium.org header.b=MuZaDfTX; arc=none smtp.client-ip=209.85.214.182 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=chromium.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=chromium.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=chromium.org header.i=@chromium.org header.b="MuZaDfTX" Received: by mail-pl1-f182.google.com with SMTP id d9443c01a7336-20573eb852aso5248215ad.1 for ; Wed, 04 Sep 2024 06:25:20 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=chromium.org; s=google; t=1725456319; x=1726061119; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=XjfNYwgRmFwRyRd2A+1fxkz5SQc6AYOk84bCepgxx8I=; b=MuZaDfTX4x7XukJn9CBUUxq5GGQ6fATJiNwB1xc1IMjoCg4K2NfD6I5+sCtgAcYSPF czSB4xcxkKLjhfCnCc/wizGGo5yp1JD5gfVNy8Julm5kYVZz8pDvDr0VUInksXSuJHQ6 orEcgJ7bVGvpt2grr32bShpbc3SCqQYNOPch0= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1725456319; x=1726061119; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=XjfNYwgRmFwRyRd2A+1fxkz5SQc6AYOk84bCepgxx8I=; b=NAPoOjvOMcUC1YObyifj1epd1ZYvwInLQ8M8xgz5Ryivn2Kj1/2wgzIDBwhC8JtpQT 4C7qyOfHWOCAHOwMNKbpyahFIDLOKGs22e5qXRuI0LFYJLg7VGFq8pZalTjETxQN3dS1 ebcCjj+GnkCYbf6PzasMln+tuYF0SKlTiOG1PVn9WEup++JXiVjiw7EENUVz4MsPZUH2 TNBnQL6/Dns9CBxVAhbM5CqrzwReKnKy4iGT5QqsC2bkWHp17t8KVbdZR1PhGRjnTiP4 5SKdKYvr+z8Kw1m7zr4SDyeDlkxkE2jsrMDsU/1tDm5V8hjM8pQULn2Rf35pLOfe/CGO nDew== X-Forwarded-Encrypted: i=1; AJvYcCXN9hiCTOIWZ0TP78poBUMLtGM9uSGLfMDhE2aFu1x6ZjjJTFJ2JHb0aNwybw3tGvaQUV3Wfuuhr+TTnFQ=@vger.kernel.org X-Gm-Message-State: AOJu0Yyq2gyhEsMGNXTO6VS9ZD2YLhqlGL3v11VSVfac78hrAEUzcP0Q j7eP9odmA2pTL9aKb/2BMG+AcsRn/qkGjxHD6sTKdWJf+A163RhJdYATdc196DAEr9upIxn7pav 3Mw== X-Google-Smtp-Source: AGHT+IG8Z6ty2uJavX/X05eo41F80ttboNTYucuMzQ+OVRdZYEfWed8fHP7XGePjyyqABwzDlQZk3w== X-Received: by 2002:a17:902:e545:b0:1fd:6ca4:f987 with SMTP id d9443c01a7336-206b8341fdfmr38379035ad.15.1725456319545; Wed, 04 Sep 2024 06:25:19 -0700 (PDT) Received: from tigerii.tok.corp.google.com ([2401:fa00:8f:203:4132:a2a2:35bc:acba]) by smtp.gmail.com with ESMTPSA id d9443c01a7336-206aea54e7bsm13479215ad.183.2024.09.04.06.25.17 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 04 Sep 2024 06:25:19 -0700 (PDT) From: Sergey Senozhatsky To: Minchan Kim Cc: Andrew Morton , Richard Chang , linux-kernel@vger.kernel.org, Sergey Senozhatsky Subject: [RFC PATCH 2/3] zram: rework recompress target selection logic Date: Wed, 4 Sep 2024 22:24:54 +0900 Message-ID: <20240904132508.2000743-3-senozhatsky@chromium.org> X-Mailer: git-send-email 2.46.0.469.g59c65b2a67-goog In-Reply-To: <20240904132508.2000743-1-senozhatsky@chromium.org> References: <20240904132508.2000743-1-senozhatsky@chromium.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Target slot selection for recompression is just a simple iteration over zram->table entries (stored pages) from slot 0 to max slot. Given that zram->table slots are written in random order and are not sorted by size, a simple iteration over slots selects suboptimal targets for recompression. This is not a problem if we recompress every single zram->table slot, but we never do that in reality. In reality we limit the number of slots we can recompress (via max_pages parameter) and hence proper slot selection becomes very important. The strategy is quite simple, suppose we have two candidate slots for recompression, one of size 48 bytes and one of size 2800 bytes, and we can recompress only one, then it certainly makes more sense to pick 2800 entry for recompression. Because even if we manage to compress 48 bytes objects even further the savings are going to be very small. Potential savings after good re-compression of 2800 bytes objects are much higher. This patch reworks slot selection and introduces the strategy described above: among candidate slots always select the biggest ones first. For that the patch introduces zram_pp_ctl (post-processing) structure which holds 16 groups of slots. Slots are assigned to a particular group based on their sizes - the larger the size of the slot the higher the group index. This, basically, sorts slots by size in liner time (we still perform just one iteration over zram->table slots). When we select slot for recompression we always first lookup in higher pp groups (those that hold the largest slots). Which achieves the desired behavior. TEST =3D=3D=3D=3D A very simple demonstration: zram is configured with zstd, and zstd with dict as a recompression stream. A limited (max 4096 pages) recompression is performed then, with a log of sizes of slots that were recompressed. You can see that patched zram selects slots for recompression in significantly different manner, which leads to higher memory savings (see column #2 of mm_stat output). BASE ---- *** initial state of zram device /sys/block/zram0/mm_stat 1750994944 504491413 514203648 0 514203648 1 0 3420= 4 34204 *** recompress idle max_pages=3D4096 /sys/block/zram0/mm_stat 1750994944 504262229 514953216 0 514203648 1 0 3420= 4 34204 Sizes of selected objects for recompression: ... 45 58 24 226 91 40 24 24 24 424 2104 93 2078 2078 2078 959 154 ... PATCHED ------- *** initial state of zram device /sys/block/zram0/mm_stat 1750982656 504492801 514170880 0 514170880 1 0 3420= 4 34204 *** recompress idle max_pages=3D4096 /sys/block/zram0/mm_stat 1750982656 503716710 517586944 0 514170880 1 0 3420= 4 34204 Sizes of selected objects for recompression: ... 2826 2861 2829 2989 2713 2583 2698 2685 2748 2617 ... Signed-off-by: Sergey Senozhatsky --- drivers/block/zram/zram_drv.c | 180 ++++++++++++++++++++++++++++------ 1 file changed, 152 insertions(+), 28 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c91fdf399d1b..998efe3979f8 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -659,8 +659,9 @@ static ssize_t writeback_store(struct device *dev, goto next; =20 if (zram_test_flag(zram, index, ZRAM_WB) || - zram_test_flag(zram, index, ZRAM_SAME) || - zram_test_flag(zram, index, ZRAM_UNDER_WB)) + zram_test_flag(zram, index, ZRAM_SAME) || + zram_test_flag(zram, index, ZRAM_UNDER_WB) || + zram_test_flag(zram, index, ZRAM_PP_SLOT)) goto next; =20 if (mode & IDLE_WRITEBACK && @@ -1648,6 +1649,115 @@ static int zram_bvec_write(struct zram *zram, struc= t bio_vec *bvec, } =20 #ifdef CONFIG_ZRAM_MULTI_COMP +struct zram_pp_slot { + unsigned long index; + struct list_head entry; +}; + +#define NUM_PP_GROUPS 17 + +struct zram_pp_ctl { + struct list_head slots[NUM_PP_GROUPS]; +}; + +static void init_pp_ctl(struct zram_pp_ctl *ctl) +{ + u32 idx; + + for (idx =3D 0; idx < NUM_PP_GROUPS; idx++) + INIT_LIST_HEAD(&ctl->slots[idx]); +} + +static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) +{ + zram_slot_lock(zram, pps->index); + if (zram_test_flag(zram, pps->index, ZRAM_PP_SLOT)) + zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT); + zram_slot_unlock(zram, pps->index); + kfree(pps); +} + +static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) +{ + u32 idx; + + for (idx =3D 0; idx < NUM_PP_GROUPS; idx++) { + while (!list_empty(&ctl->slots[idx])) { + struct zram_pp_slot *pps; + + pps =3D list_first_entry(&ctl->slots[idx], + struct zram_pp_slot, + entry); + list_del_init(&pps->entry); + release_pp_slot(zram, pps); + } + } +} + +static void place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, + struct zram_pp_slot *pps) +{ + s32 diff, idx; + + /* + * On 4K system this keeps PP slot groups 256 bytes apart. The + * higher the group IDX the larger the slot size. + */ + diff =3D PAGE_SIZE / (NUM_PP_GROUPS - 1); + idx =3D zram_get_obj_size(zram, pps->index) / diff; + list_add(&pps->entry, &ctl->slots[idx]); + + zram_set_flag(zram, pps->index, ZRAM_PP_SLOT); +} + +#define RECOMPRESS_IDLE (1 << 0) +#define RECOMPRESS_HUGE (1 << 1) + +static int scan_slots_for_recompress(struct zram *zram, u32 mode, + struct zram_pp_ctl *ctl) +{ + unsigned long nr_pages =3D zram->disksize >> PAGE_SHIFT; + struct zram_pp_slot *pps =3D NULL; + unsigned long index; + + for (index =3D 0; index < nr_pages; index++) { + if (!pps) + pps =3D kmalloc(sizeof(*pps), GFP_KERNEL); + if (!pps) + return -ENOMEM; + + INIT_LIST_HEAD(&pps->entry); + + zram_slot_lock(zram, index); + if (!zram_allocated(zram, index)) + goto next; + + if (mode & RECOMPRESS_IDLE && + !zram_test_flag(zram, index, ZRAM_IDLE)) + goto next; + + if (mode & RECOMPRESS_HUGE && + !zram_test_flag(zram, index, ZRAM_HUGE)) + goto next; + + if (zram_test_flag(zram, index, ZRAM_WB) || + zram_test_flag(zram, index, ZRAM_UNDER_WB) || + zram_test_flag(zram, index, ZRAM_PP_SLOT) || + zram_test_flag(zram, index, ZRAM_SAME) || + zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + goto next; + + pps->index =3D index; + place_pp_slot(zram, ctl, pps); + pps =3D NULL; +next: + zram_slot_unlock(zram, index); + } + + kfree(pps); + return 0; +} + /* * This function will decompress (unless it's ZRAM_HUGE) the page and then * attempt to compress it using provided compression algorithm priority @@ -1655,7 +1765,7 @@ static int zram_bvec_write(struct zram *zram, struct = bio_vec *bvec, * * Corresponding ZRAM slot should be locked. */ -static int zram_recompress(struct zram *zram, u32 index, struct page *page, +static int recompress_slot(struct zram *zram, u32 index, struct page *page, u64 *num_recomp_pages, u32 threshold, u32 prio, u32 prio_max) { @@ -1675,6 +1785,7 @@ static int zram_recompress(struct zram *zram, u32 ind= ex, struct page *page, return -EINVAL; =20 comp_len_old =3D zram_get_obj_size(zram, index); + /* * Do not recompress objects that are already "small enough". */ @@ -1798,8 +1909,28 @@ static int zram_recompress(struct zram *zram, u32 in= dex, struct page *page, return 0; } =20 -#define RECOMPRESS_IDLE (1 << 0) -#define RECOMPRESS_HUGE (1 << 1) +static struct zram_pp_slot *select_slot_for_recompress(struct zram_pp_ctl = *ctl) +{ + struct zram_pp_slot *pps =3D NULL; + s32 idx =3D NUM_PP_GROUPS - 1; + + /* + * Select PP-slots starting from the highest group, which should + * give us the best candidate for recompression. + */ + while(idx > 0) { + pps =3D list_first_entry_or_null(&ctl->slots[idx], + struct zram_pp_slot, + entry); + if (pps) { + list_del_init(&pps->entry); + break; + } + + idx--; + } + return pps; +} =20 static ssize_t recompress_store(struct device *dev, struct device_attribute *attr, @@ -1807,14 +1938,16 @@ static ssize_t recompress_store(struct device *dev, { u32 prio =3D ZRAM_SECONDARY_COMP, prio_max =3D ZRAM_MAX_COMPS; struct zram *zram =3D dev_to_zram(dev); - unsigned long nr_pages =3D zram->disksize >> PAGE_SHIFT; char *args, *param, *val, *algo =3D NULL; u64 num_recomp_pages =3D ULLONG_MAX; + struct zram_pp_slot *pps; + struct zram_pp_ctl ctl; u32 mode =3D 0, threshold =3D 0; - unsigned long index; struct page *page; ssize_t ret; =20 + init_pp_ctl(&ctl); + args =3D skip_spaces(buf); while (*args) { args =3D next_arg(args, ¶m, &val); @@ -1907,36 +2040,26 @@ static ssize_t recompress_store(struct device *dev, goto release_init_lock; } =20 + scan_slots_for_recompress(zram, mode, &ctl); + ret =3D len; - for (index =3D 0; index < nr_pages; index++) { + while ((pps =3D select_slot_for_recompress(&ctl))) { int err =3D 0; =20 if (!num_recomp_pages) break; =20 - zram_slot_lock(zram, index); - - if (!zram_allocated(zram, index)) - goto next; - - if (mode & RECOMPRESS_IDLE && - !zram_test_flag(zram, index, ZRAM_IDLE)) - goto next; - - if (mode & RECOMPRESS_HUGE && - !zram_test_flag(zram, index, ZRAM_HUGE)) - goto next; - - if (zram_test_flag(zram, index, ZRAM_WB) || - zram_test_flag(zram, index, ZRAM_UNDER_WB) || - zram_test_flag(zram, index, ZRAM_SAME) || - zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + zram_slot_lock(zram, pps->index); + if (!zram_test_flag(zram, pps->index, ZRAM_PP_SLOT)) goto next; =20 - err =3D zram_recompress(zram, index, page, &num_recomp_pages, - threshold, prio, prio_max); + err =3D recompress_slot(zram, pps->index, page, + &num_recomp_pages, threshold, + prio, prio_max); next: - zram_slot_unlock(zram, index); + zram_slot_unlock(zram, pps->index); + release_pp_slot(zram, pps); + if (err) { ret =3D err; break; @@ -1948,6 +2071,7 @@ static ssize_t recompress_store(struct device *dev, __free_page(page); =20 release_init_lock: + release_pp_ctl(zram, &ctl); up_read(&zram->init_lock); return ret; } --=20 2.46.0.469.g59c65b2a67-goog From nobody Fri Dec 19 15:19:31 2025 Received: from mail-pl1-f173.google.com (mail-pl1-f173.google.com [209.85.214.173]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E86B21CF7D3 for ; Wed, 4 Sep 2024 13:25:22 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.173 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1725456324; cv=none; b=XGHmthBAbFzs8G2M2Oc+DRgql3OijuL/LphjresPUTlbK5WXzPYg94oB2QtUDpf1xYsYzdVVBODFMgM3Y8Qh1AF2CHYss9lBArJmIyNctnW2kGFc8i33EX2xJTjn55NaNBRRzAyLnCR8D+aip7qsthLaq8TpvcjA20rL5sUdOa8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1725456324; c=relaxed/simple; bh=wr+/UN933hxfG0zMVwK11X/rvLKg7fjaLJgJGXHA2H8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=NX83AObWy/JDpxS8ZltuFwGpshxf87Szb6KVc40EcplbPG05gst7FDQJTZevb5qf6b9M8S/zK7Za3jwEdzmDFLcNWz4Z2OxAS3B7KaDIQtI86R2vdtd3BIp7/xjjI1ITnoqmhms2jks07iZi7DZN3/zguEwCw6XEqGN55Atc5S0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=chromium.org; spf=pass smtp.mailfrom=chromium.org; dkim=pass (1024-bit key) header.d=chromium.org header.i=@chromium.org header.b=n6yajirE; arc=none smtp.client-ip=209.85.214.173 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=chromium.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=chromium.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=chromium.org header.i=@chromium.org header.b="n6yajirE" Received: by mail-pl1-f173.google.com with SMTP id d9443c01a7336-2068bee21d8so7569045ad.2 for ; Wed, 04 Sep 2024 06:25:22 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=chromium.org; s=google; t=1725456322; x=1726061122; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=h6qxoe3/9WaKoQVJeWn4sgJsVVarYIXJHNnVXrp/W9E=; b=n6yajirE1w4snPb3GtvSuSig+yc/iDEp5v7RmVjViTc/ffjVb4cTYlqlIEXLnrPD8H 5Z33EJKgy5h5v4kIPOu7tXK4GKDW9qR4+KHiSJwKAtnDaaZ2mh5qoJ6opGRdpVhTEhzV Siv2uwMDvoLhgPXl7IX/aZk2cTzCrR4wairME= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1725456322; x=1726061122; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=h6qxoe3/9WaKoQVJeWn4sgJsVVarYIXJHNnVXrp/W9E=; b=t2Vr8I1eg1M4B6mgAvbyWcq9XXwx40dBlWnWJe8qbSoy0Ddb2KqtPX4idsAIc1H2IA 1bR60bqhZ9aWGm/gC3OQ9+3G6c94qRi5PMyx9goGWnLCUp7vNIKruUqEqt7H0eVRCBNP HwZQE+XY2hnPHevzxLBAAbS7VGTGaIG+1NPCtrT9Mpugcd5rWRruI4P+f97Jb9ZUF7Gy 38azHgC+0YwmHkbzpLh0nN34zIcPetT3BFFqU2lvXK4bECGbaqUBvCuT+fnrGkcn7nTW YKGVuwoOAGzlRj8IdIe9CVuuTCFE1CuD46oytyI6nunj3wQBwU1tChdb9lKELhRhgnng 32Ww== X-Forwarded-Encrypted: i=1; AJvYcCWby6cm7lhJkrRdSph4zgwixA6O5FiI5Uw7k2qXgwkiF0dQ4fmwiXyeTmZFjNYPtta6MhgnKMZ/TvvaefQ=@vger.kernel.org X-Gm-Message-State: AOJu0YyMUOZV134G4W3zwLKd/3Nqry15UFCLOEKYRVOgILWUZLbIGTTT IHoJnOjpMQfuEjuPlQX2C7llpHFLNruxVbJMecmXKAYECBViPdRwNXXpngyQNsxdEqneVclE9yZ uqg== X-Google-Smtp-Source: AGHT+IEqaJfa1bx2FH/AFFwIlk2P2dyzO/eROM/s8fgOPSyspj8JJmbnpg/sjA2rPxZTR7VQ6GbESQ== X-Received: by 2002:a17:902:e5c4:b0:205:43b8:1aa4 with SMTP id d9443c01a7336-20543b81d7amr138950405ad.19.1725456322050; Wed, 04 Sep 2024 06:25:22 -0700 (PDT) Received: from tigerii.tok.corp.google.com ([2401:fa00:8f:203:4132:a2a2:35bc:acba]) by smtp.gmail.com with ESMTPSA id d9443c01a7336-206aea54e7bsm13479215ad.183.2024.09.04.06.25.20 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 04 Sep 2024 06:25:21 -0700 (PDT) From: Sergey Senozhatsky To: Minchan Kim Cc: Andrew Morton , Richard Chang , linux-kernel@vger.kernel.org, Sergey Senozhatsky Subject: [RFC PATCH 3/3] zram: rework writeback target selection logic Date: Wed, 4 Sep 2024 22:24:55 +0900 Message-ID: <20240904132508.2000743-4-senozhatsky@chromium.org> X-Mailer: git-send-email 2.46.0.469.g59c65b2a67-goog In-Reply-To: <20240904132508.2000743-1-senozhatsky@chromium.org> References: <20240904132508.2000743-1-senozhatsky@chromium.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Writeback suffers from the same problem as recompression did before - target slot selection for writeback is just a simple iteration over zram->table entries (stored pages) which selects suboptimal targets for writeback. This is especially problematic for writeback, because we uncompress objects before writeback so each of them takes 4K out of limited writeback storage. For example, when we take a 48 bytes slot and store it as a 4K object to writeback device we only save 48 bytes of memory (release from zsmalloc pool). We naturally want to pick the largest objects for writeback, because then each writeback will relase the largest amount of memory. This patch applies the same solution and strategy as for recompression target selection: pp control (post-process) with 16 groups of candidate pp slots. Slots are assigned to pp groups based on sizes - the larger the slot the higher the group index. This gives us sorted by size lists of candidate slots (in linear time), so that among candidate slots we always select the largest ones first. TEST =3D=3D=3D=3D A very simple demonstration: zram is configured with a writeback device. A limited writeback (wb_limit 2500 pages) is performed then, with a log of sizes of slots that were written back. You can see that patched zram selects slots for recompression in significantly different manner, which leads to higher memory savings (see column #2 of mm_stat output). BASE ---- *** initial state of zram device /sys/block/zram0/mm_stat 1750327296 619765836 631902208 0 631902208 1 0 3427= 8 34278 *** writeback idle wb_limit 2500 /sys/block/zram0/mm_stat 1750327296 617622333 631578624 0 631902208 1 0 3427= 8 34278 Sizes of selected objects for writeback: ... 193 349 46 46 46 46 852 1002 543 162 107 49 34 34 34 ... PATCHED ------- *** initial state of zram device /sys/block/zram0/mm_stat 1750319104 619760957 631992320 0 631992320 1 0 3427= 8 34278 *** writeback idle wb_limit 2500 /sys/block/zram0/mm_stat 1750319104 612672056 626135040 0 631992320 1 0 3427= 8 34278 Sizes of selected objects for writeback: ... 3680 3614 3694 3667 3553 3537 3342 3362 ... Signed-off-by: Sergey Senozhatsky --- drivers/block/zram/zram_drv.c | 232 ++++++++++++++++++++++------------ 1 file changed, 151 insertions(+), 81 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 998efe3979f8..a384939b2501 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -183,6 +183,69 @@ static void zram_accessed(struct zram *zram, u32 index) #endif } =20 +#if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP +struct zram_pp_slot { + unsigned long index; + struct list_head entry; +}; + +#define NUM_PP_GROUPS 17 + +struct zram_pp_ctl { + struct list_head slots[NUM_PP_GROUPS]; +}; + +static void init_pp_ctl(struct zram_pp_ctl *ctl) +{ + u32 idx; + + for (idx =3D 0; idx < NUM_PP_GROUPS; idx++) + INIT_LIST_HEAD(&ctl->slots[idx]); +} + +static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) +{ + zram_slot_lock(zram, pps->index); + if (zram_test_flag(zram, pps->index, ZRAM_PP_SLOT)) + zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT); + zram_slot_unlock(zram, pps->index); + kfree(pps); +} + +static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) +{ + u32 idx; + + for (idx =3D 0; idx < NUM_PP_GROUPS; idx++) { + while (!list_empty(&ctl->slots[idx])) { + struct zram_pp_slot *pps; + + pps =3D list_first_entry(&ctl->slots[idx], + struct zram_pp_slot, + entry); + list_del_init(&pps->entry); + release_pp_slot(zram, pps); + } + } +} + +static void place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, + struct zram_pp_slot *pps) +{ + s32 diff, idx; + + /* + * On 4K system this keeps PP slot groups 256 bytes apart. The + * higher the group IDX the larger the slot size. + */ + diff =3D PAGE_SIZE / (NUM_PP_GROUPS - 1); + idx =3D zram_get_obj_size(zram, pps->index) / diff; + list_add(&pps->entry, &ctl->slots[idx]); + + zram_set_flag(zram, pps->index, ZRAM_PP_SLOT); +} +#endif + static inline void update_used_max(struct zram *zram, const unsigned long pages) { @@ -587,11 +650,82 @@ static void read_from_bdev_async(struct zram *zram, s= truct page *page, #define IDLE_WRITEBACK (1<<1) #define INCOMPRESSIBLE_WRITEBACK (1<<2) =20 +static int scan_slots_for_writeback(struct zram *zram, u32 mode, + unsigned long nr_pages, + unsigned long index, + struct zram_pp_ctl *ctl) +{ + struct zram_pp_slot *pps =3D NULL; + + for (; nr_pages !=3D 0; index++, nr_pages--) { + if (!pps) + pps =3D kmalloc(sizeof(*pps), GFP_KERNEL); + if (!pps) + return -ENOMEM; + + INIT_LIST_HEAD(&pps->entry); + + zram_slot_lock(zram, index); + if (!zram_allocated(zram, index)) + goto next; + + if (zram_test_flag(zram, index, ZRAM_WB) || + zram_test_flag(zram, index, ZRAM_SAME) || + zram_test_flag(zram, index, ZRAM_UNDER_WB) || + zram_test_flag(zram, index, ZRAM_PP_SLOT)) + goto next; + + if (mode & IDLE_WRITEBACK && + !zram_test_flag(zram, index, ZRAM_IDLE)) + goto next; + if (mode & HUGE_WRITEBACK && + !zram_test_flag(zram, index, ZRAM_HUGE)) + goto next; + if (mode & INCOMPRESSIBLE_WRITEBACK && + !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + goto next; + + pps->index =3D index; + place_pp_slot(zram, ctl, pps); + pps =3D NULL; +next: + zram_slot_unlock(zram, index); + } + + kfree(pps); + return 0; +} + +static struct zram_pp_slot *select_slot_for_writeback(struct zram_pp_ctl *= ctl) +{ + struct zram_pp_slot *pps =3D NULL; + s32 idx =3D NUM_PP_GROUPS - 1; + + /* + * Select PP-slots starting from the highest group, which should + * give us the best candidate for recompression. + */ + while(idx > 0) { + pps =3D list_first_entry_or_null(&ctl->slots[idx], + struct zram_pp_slot, + entry); + if (pps) { + list_del_init(&pps->entry); + break; + } + + idx--; + } + return pps; +} + static ssize_t writeback_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct zram *zram =3D dev_to_zram(dev); unsigned long nr_pages =3D zram->disksize >> PAGE_SHIFT; + struct zram_pp_slot *pps; + struct zram_pp_ctl ctl; unsigned long index =3D 0; struct bio bio; struct bio_vec bio_vec; @@ -600,6 +734,8 @@ static ssize_t writeback_store(struct device *dev, int mode, err; unsigned long blk_idx =3D 0; =20 + init_pp_ctl(&ctl); + if (sysfs_streq(buf, "idle")) mode =3D IDLE_WRITEBACK; else if (sysfs_streq(buf, "huge")) @@ -637,11 +773,14 @@ static ssize_t writeback_store(struct device *dev, goto release_init_lock; } =20 - for (; nr_pages !=3D 0; index++, nr_pages--) { + scan_slots_for_writeback(zram, mode, nr_pages, index, &ctl); + + while ((pps =3D select_slot_for_writeback(&ctl))) { spin_lock(&zram->wb_limit_lock); if (zram->wb_limit_enable && !zram->bd_wb_limit) { spin_unlock(&zram->wb_limit_lock); ret =3D -EIO; + release_pp_slot(zram, pps); break; } spin_unlock(&zram->wb_limit_lock); @@ -650,30 +789,15 @@ static ssize_t writeback_store(struct device *dev, blk_idx =3D alloc_block_bdev(zram); if (!blk_idx) { ret =3D -ENOSPC; + release_pp_slot(zram, pps); break; } } =20 + index =3D pps->index; zram_slot_lock(zram, index); - if (!zram_allocated(zram, index)) + if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) goto next; - - if (zram_test_flag(zram, index, ZRAM_WB) || - zram_test_flag(zram, index, ZRAM_SAME) || - zram_test_flag(zram, index, ZRAM_UNDER_WB) || - zram_test_flag(zram, index, ZRAM_PP_SLOT)) - goto next; - - if (mode & IDLE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_IDLE)) - goto next; - if (mode & HUGE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_HUGE)) - goto next; - if (mode & INCOMPRESSIBLE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) - goto next; - /* * Clearing ZRAM_UNDER_WB is duty of caller. * IOW, zram_free_page never clear it. @@ -682,11 +806,14 @@ static ssize_t writeback_store(struct device *dev, /* Need for hugepage writeback racing */ zram_set_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); + if (zram_read_page(zram, page, index, NULL)) { zram_slot_lock(zram, index); zram_clear_flag(zram, index, ZRAM_UNDER_WB); zram_clear_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); + + release_pp_slot(zram, pps); continue; } =20 @@ -705,6 +832,8 @@ static ssize_t writeback_store(struct device *dev, zram_clear_flag(zram, index, ZRAM_UNDER_WB); zram_clear_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); + + release_pp_slot(zram, pps); /* * BIO errors are not fatal, we continue and simply * attempt to writeback the remaining objects (pages). @@ -729,7 +858,7 @@ static ssize_t writeback_store(struct device *dev, */ zram_slot_lock(zram, index); if (!zram_allocated(zram, index) || - !zram_test_flag(zram, index, ZRAM_IDLE)) { + !zram_test_flag(zram, index, ZRAM_IDLE)) { zram_clear_flag(zram, index, ZRAM_UNDER_WB); zram_clear_flag(zram, index, ZRAM_IDLE); goto next; @@ -747,12 +876,14 @@ static ssize_t writeback_store(struct device *dev, spin_unlock(&zram->wb_limit_lock); next: zram_slot_unlock(zram, index); + release_pp_slot(zram, pps); } =20 if (blk_idx) free_block_bdev(zram, blk_idx); __free_page(page); release_init_lock: + release_pp_ctl(zram, &ctl); up_read(&zram->init_lock); =20 return ret; @@ -1649,67 +1780,6 @@ static int zram_bvec_write(struct zram *zram, struct= bio_vec *bvec, } =20 #ifdef CONFIG_ZRAM_MULTI_COMP -struct zram_pp_slot { - unsigned long index; - struct list_head entry; -}; - -#define NUM_PP_GROUPS 17 - -struct zram_pp_ctl { - struct list_head slots[NUM_PP_GROUPS]; -}; - -static void init_pp_ctl(struct zram_pp_ctl *ctl) -{ - u32 idx; - - for (idx =3D 0; idx < NUM_PP_GROUPS; idx++) - INIT_LIST_HEAD(&ctl->slots[idx]); -} - -static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) -{ - zram_slot_lock(zram, pps->index); - if (zram_test_flag(zram, pps->index, ZRAM_PP_SLOT)) - zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT); - zram_slot_unlock(zram, pps->index); - kfree(pps); -} - -static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) -{ - u32 idx; - - for (idx =3D 0; idx < NUM_PP_GROUPS; idx++) { - while (!list_empty(&ctl->slots[idx])) { - struct zram_pp_slot *pps; - - pps =3D list_first_entry(&ctl->slots[idx], - struct zram_pp_slot, - entry); - list_del_init(&pps->entry); - release_pp_slot(zram, pps); - } - } -} - -static void place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, - struct zram_pp_slot *pps) -{ - s32 diff, idx; - - /* - * On 4K system this keeps PP slot groups 256 bytes apart. The - * higher the group IDX the larger the slot size. - */ - diff =3D PAGE_SIZE / (NUM_PP_GROUPS - 1); - idx =3D zram_get_obj_size(zram, pps->index) / diff; - list_add(&pps->entry, &ctl->slots[idx]); - - zram_set_flag(zram, pps->index, ZRAM_PP_SLOT); -} - #define RECOMPRESS_IDLE (1 << 0) #define RECOMPRESS_HUGE (1 << 1) =20 --=20 2.46.0.469.g59c65b2a67-goog