From nobody Fri Dec 19 08:08:54 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8901EC4167B for ; Wed, 6 Dec 2023 09:11:24 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1346790AbjLFJLP (ORCPT ); Wed, 6 Dec 2023 04:11:15 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:34742 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1346664AbjLFJLJ (ORCPT ); Wed, 6 Dec 2023 04:11:09 -0500 Received: from out30-101.freemail.mail.aliyun.com (out30-101.freemail.mail.aliyun.com [115.124.30.101]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BF47BD68 for ; Wed, 6 Dec 2023 01:11:14 -0800 (PST) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R831e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018046049;MF=hsiangkao@linux.alibaba.com;NM=1;PH=DS;RN=4;SR=0;TI=SMTPD_---0VxxSRPy_1701853871; Received: from e69b19392.et15sqa.tbsite.net(mailfrom:hsiangkao@linux.alibaba.com fp:SMTPD_---0VxxSRPy_1701853871) by smtp.aliyun-inc.com; Wed, 06 Dec 2023 17:11:12 +0800 From: Gao Xiang To: linux-erofs@lists.ozlabs.org Cc: LKML , dhavale@google.com, Gao Xiang Subject: [PATCH 1/5] erofs: support I/O submission for sub-page compressed blocks Date: Wed, 6 Dec 2023 17:10:53 +0800 Message-Id: <20231206091057.87027-2-hsiangkao@linux.alibaba.com> X-Mailer: git-send-email 2.39.3 In-Reply-To: <20231206091057.87027-1-hsiangkao@linux.alibaba.com> References: <20231206091057.87027-1-hsiangkao@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Add a basic I/O submission path first to support sub-page blocks: - Temporary short-lived pages will be used entirely; - In-place I/O pages can be used partially, but compressed pages need to be able to be mapped in contiguous virtual memory. As a start, currently cache decompression is explicitly disabled for sub-page blocks, which will be supported in the future. Signed-off-by: Gao Xiang --- fs/erofs/zdata.c | 156 ++++++++++++++++++++++------------------------- 1 file changed, 74 insertions(+), 82 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index a33cd6757f98..421c0a88a0ca 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1435,86 +1435,85 @@ static void z_erofs_decompress_kickoff(struct z_ero= fs_decompressqueue *io, z_erofs_decompressqueue_work(&io->u.work); } =20 -static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pc= l, - unsigned int nr, - struct page **pagepool, - struct address_space *mc) +static void z_erofs_fill_bio_vec(struct bio_vec *bvec, + struct z_erofs_decompress_frontend *f, + struct z_erofs_pcluster *pcl, + unsigned int nr, + struct address_space *mc) { - const pgoff_t index =3D pcl->obj.index; gfp_t gfp =3D mapping_gfp_mask(mc); bool tocache =3D false; - + struct z_erofs_bvec *zbv =3D pcl->compressed_bvecs + nr; struct address_space *mapping; - struct page *oldpage, *page; - int justfound; + struct page *page, *oldpage; + int justfound, bs =3D i_blocksize(f->inode); =20 + /* Except for inplace pages, the entire page can be used for I/Os */ + bvec->bv_offset =3D 0; + bvec->bv_len =3D PAGE_SIZE; repeat: - page =3D READ_ONCE(pcl->compressed_bvecs[nr].page); - oldpage =3D page; - - if (!page) + oldpage =3D READ_ONCE(zbv->page); + if (!oldpage) goto out_allocpage; =20 - justfound =3D (unsigned long)page & 1UL; - page =3D (struct page *)((unsigned long)page & ~1UL); + justfound =3D (unsigned long)oldpage & 1UL; + page =3D (struct page *)((unsigned long)oldpage & ~1UL); + bvec->bv_page =3D page; =20 + DBG_BUGON(z_erofs_is_shortlived_page(page)); /* - * preallocated cached pages, which is used to avoid direct reclaim - * otherwise, it will go inplace I/O path instead. + * Handle preallocated cached pages. We tried to allocate such pages + * without triggering direct reclaim. If allocation failed, inplace + * file-backed pages will be used instead. */ if (page->private =3D=3D Z_EROFS_PREALLOCATED_PAGE) { - WRITE_ONCE(pcl->compressed_bvecs[nr].page, page); set_page_private(page, 0); + WRITE_ONCE(zbv->page, page); tocache =3D true; goto out_tocache; } - mapping =3D READ_ONCE(page->mapping); =20 + mapping =3D READ_ONCE(page->mapping); /* - * file-backed online pages in plcuster are all locked steady, - * therefore it is impossible for `mapping' to be NULL. + * File-backed pages for inplace I/Os are all locked steady, + * therefore it is impossible for `mapping` to be NULL. */ - if (mapping && mapping !=3D mc) - /* ought to be unmanaged pages */ - goto out; - - /* directly return for shortlived page as well */ - if (z_erofs_is_shortlived_page(page)) - goto out; + if (mapping && mapping !=3D mc) { + if (zbv->offset < 0) + bvec->bv_offset =3D round_up(-zbv->offset, bs); + bvec->bv_len =3D round_up(zbv->end, bs) - bvec->bv_offset; + return; + } =20 lock_page(page); - /* only true if page reclaim goes wrong, should never happen */ DBG_BUGON(justfound && PagePrivate(page)); =20 - /* the page is still in manage cache */ + /* the cached page is still in managed cache */ if (page->mapping =3D=3D mc) { - WRITE_ONCE(pcl->compressed_bvecs[nr].page, page); - + WRITE_ONCE(zbv->page, page); + /* + * The cached page is still available but without a valid + * `->private` pcluster hint. Let's reconnect them. + */ if (!PagePrivate(page)) { - /* - * impossible to be !PagePrivate(page) for - * the current restriction as well if - * the page is already in compressed_bvecs[]. - */ DBG_BUGON(!justfound); - - justfound =3D 0; - set_page_private(page, (unsigned long)pcl); - SetPagePrivate(page); + /* compressed_bvecs[] already takes a ref */ + attach_page_private(page, pcl); + put_page(page); } =20 - /* no need to submit io if it is already up-to-date */ + /* no need to submit if it is already up-to-date */ if (PageUptodate(page)) { unlock_page(page); - page =3D NULL; + bvec->bv_page =3D NULL; } - goto out; + return; } =20 /* - * the managed page has been truncated, it's unsafe to - * reuse this one, let's allocate a new cache-managed page. + * It has been truncated, so it's unsafe to reuse this one. Let's + * allocate a new page for compressed data. */ DBG_BUGON(page->mapping); DBG_BUGON(!justfound); @@ -1523,25 +1522,23 @@ static struct page *pickup_page_for_submission(stru= ct z_erofs_pcluster *pcl, unlock_page(page); put_page(page); out_allocpage: - page =3D erofs_allocpage(pagepool, gfp | __GFP_NOFAIL); - if (oldpage !=3D cmpxchg(&pcl->compressed_bvecs[nr].page, - oldpage, page)) { - erofs_pagepool_add(pagepool, page); + page =3D erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); + if (oldpage !=3D cmpxchg(&zbv->page, oldpage, page)) { + erofs_pagepool_add(&f->pagepool, page); cond_resched(); goto repeat; } + bvec->bv_page =3D page; out_tocache: - if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) { - /* turn into temporary page if fails (1 ref) */ + if (!tocache || bs !=3D PAGE_SIZE || + add_to_page_cache_lru(page, mc, pcl->obj.index + nr, gfp)) { + /* turn into a temporary shortlived page (1 ref) */ set_page_private(page, Z_EROFS_SHORTLIVED_PAGE); - goto out; + return; } attach_page_private(page, pcl); - /* drop a refcount added by allocpage (then we have 2 refs here) */ + /* drop a refcount added by allocpage (then 2 refs in total here) */ put_page(page); - -out: /* the only exit (for tracing and debugging) */ - return page; } =20 static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *s= b, @@ -1596,7 +1593,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pc= luster *pcl, qtail[JQ_BYPASS] =3D &pcl->next; } =20 -static void z_erofs_decompressqueue_endio(struct bio *bio) +static void z_erofs_submissionqueue_endio(struct bio *bio) { struct z_erofs_decompressqueue *q =3D bio->bi_private; blk_status_t err =3D bio->bi_status; @@ -1608,7 +1605,6 @@ static void z_erofs_decompressqueue_endio(struct bio = *bio) =20 DBG_BUGON(PageUptodate(page)); DBG_BUGON(z_erofs_page_is_invalidated(page)); - if (erofs_page_is_managed(EROFS_SB(q->sb), page)) { if (!err) SetPageUptodate(page); @@ -1631,17 +1627,14 @@ static void z_erofs_submit_queue(struct z_erofs_dec= ompress_frontend *f, struct z_erofs_decompressqueue *q[NR_JOBQUEUES]; z_erofs_next_pcluster_t owned_head =3D f->owned_head; /* bio is NULL initially, so no need to initialize last_{index,bdev} */ - pgoff_t last_index; + erofs_off_t last_pa; struct block_device *last_bdev; unsigned int nr_bios =3D 0; struct bio *bio =3D NULL; unsigned long pflags; int memstall =3D 0; =20 - /* - * if managed cache is enabled, bypass jobqueue is needed, - * no need to read from device for all pclusters in this queue. - */ + /* No need to read from device for pclusters in the bypass queue. */ q[JQ_BYPASS] =3D jobqueue_init(sb, fgq + JQ_BYPASS, NULL); q[JQ_SUBMIT] =3D jobqueue_init(sb, fgq + JQ_SUBMIT, force_fg); =20 @@ -1654,7 +1647,8 @@ static void z_erofs_submit_queue(struct z_erofs_decom= press_frontend *f, do { struct erofs_map_dev mdev; struct z_erofs_pcluster *pcl; - pgoff_t cur, end; + erofs_off_t cur, end; + struct bio_vec bvec; unsigned int i =3D 0; bool bypass =3D true; =20 @@ -1673,18 +1667,14 @@ static void z_erofs_submit_queue(struct z_erofs_dec= ompress_frontend *f, }; (void)erofs_map_dev(sb, &mdev); =20 - cur =3D erofs_blknr(sb, mdev.m_pa); - end =3D cur + pcl->pclusterpages; - + cur =3D mdev.m_pa; + end =3D cur + pcl->pclusterpages << PAGE_SHIFT; do { - struct page *page; - - page =3D pickup_page_for_submission(pcl, i++, - &f->pagepool, mc); - if (!page) + z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc); + if (!bvec.bv_page) continue; =20 - if (bio && (cur !=3D last_index + 1 || + if (bio && (cur !=3D last_pa || last_bdev !=3D mdev.m_bdev)) { submit_bio_retry: submit_bio(bio); @@ -1695,7 +1685,8 @@ static void z_erofs_submit_queue(struct z_erofs_decom= press_frontend *f, bio =3D NULL; } =20 - if (unlikely(PageWorkingset(page)) && !memstall) { + if (unlikely(PageWorkingset(bvec.bv_page)) && + !memstall) { psi_memstall_enter(&pflags); memstall =3D 1; } @@ -1703,23 +1694,24 @@ static void z_erofs_submit_queue(struct z_erofs_dec= ompress_frontend *f, if (!bio) { bio =3D bio_alloc(mdev.m_bdev, BIO_MAX_VECS, REQ_OP_READ, GFP_NOIO); - bio->bi_end_io =3D z_erofs_decompressqueue_endio; - - last_bdev =3D mdev.m_bdev; - bio->bi_iter.bi_sector =3D (sector_t)cur << - (sb->s_blocksize_bits - 9); + bio->bi_end_io =3D z_erofs_submissionqueue_endio; + bio->bi_iter.bi_sector =3D cur >> 9; bio->bi_private =3D q[JQ_SUBMIT]; if (readahead) bio->bi_opf |=3D REQ_RAHEAD; ++nr_bios; + last_bdev =3D mdev.m_bdev; } =20 - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) + if (cur + bvec.bv_len > end) + bvec.bv_len =3D end - cur; + if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len, + bvec.bv_offset)) goto submit_bio_retry; =20 - last_index =3D cur; + last_pa =3D cur + bvec.bv_len; bypass =3D false; - } while (++cur < end); + } while ((cur +=3D bvec.bv_len) < end); =20 if (!bypass) qtail[JQ_SUBMIT] =3D &pcl->next; --=20 2.39.3