From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2BAE6C433F5 for ; Wed, 6 Apr 2022 12:12:13 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230360AbiDFMOK (ORCPT ); Wed, 6 Apr 2022 08:14:10 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44316 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231659AbiDFMNw (ORCPT ); Wed, 6 Apr 2022 08:13:52 -0400 Received: from out30-57.freemail.mail.aliyun.com (out30-57.freemail.mail.aliyun.com [115.124.30.57]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E3348424F05; Wed, 6 Apr 2022 00:56:19 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R171e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04400;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9Kyhod_1649231774; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9Kyhod_1649231774) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:15 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 01/20] cachefiles: unmark inode in use in error path Date: Wed, 6 Apr 2022 15:55:53 +0800 Message-Id: <20220406075612.60298-2-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Unmark inode in use if error encountered. If the in-use flag leakage occurs in cachefiles_open_file(), Cachefiles will complain "Inode already in use" when later another cookie with the same index key is looked up. If the in-use flag leakage occurs in cachefiles_create_tmpfile(), though the "Inode already in use" warning won't be triggered, fix the leakage anyway. Reported-by: Gao Xiang Fixes: 1f08c925e7a3 ("cachefiles: Implement backing file wrangling") Signed-off-by: Jeffle Xu Tested-by: Zichen Tian --- fs/cachefiles/namei.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index f256c8aff7bb..fe1bab0f36d4 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -57,6 +57,16 @@ static void __cachefiles_unmark_inode_in_use(struct cach= efiles_object *object, trace_cachefiles_mark_inactive(object, inode); } =20 +static void cachefiles_do_unmark_inode_in_use(struct cachefiles_object *ob= ject, + struct dentry *dentry) +{ + struct inode *inode =3D d_backing_inode(dentry); + + inode_lock(inode); + __cachefiles_unmark_inode_in_use(object, dentry); + inode_unlock(inode); +} + /* * Unmark a backing inode and tell cachefilesd that there's something that= can * be culled. @@ -68,9 +78,7 @@ void cachefiles_unmark_inode_in_use(struct cachefiles_obj= ect *object, struct inode *inode =3D file_inode(file); =20 if (inode) { - inode_lock(inode); - __cachefiles_unmark_inode_in_use(object, file->f_path.dentry); - inode_unlock(inode); + cachefiles_do_unmark_inode_in_use(object, file->f_path.dentry); =20 if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { atomic_long_add(inode->i_blocks, &cache->b_released); @@ -484,7 +492,7 @@ struct file *cachefiles_create_tmpfile(struct cachefile= s_object *object) object, d_backing_inode(path.dentry), ret, cachefiles_trace_trunc_error); file =3D ERR_PTR(ret); - goto out_dput; + goto out_unuse; } } =20 @@ -494,15 +502,20 @@ struct file *cachefiles_create_tmpfile(struct cachefi= les_object *object) trace_cachefiles_vfs_error(object, d_backing_inode(path.dentry), PTR_ERR(file), cachefiles_trace_open_error); - goto out_dput; + goto out_unuse; } if (unlikely(!file->f_op->read_iter) || unlikely(!file->f_op->write_iter)) { fput(file); pr_notice("Cache does not support read_iter and write_iter\n"); file =3D ERR_PTR(-EINVAL); + goto out_unuse; } =20 + goto out_dput; + +out_unuse: + cachefiles_do_unmark_inode_in_use(object, path.dentry); out_dput: dput(path.dentry); out: @@ -590,14 +603,16 @@ static bool cachefiles_open_file(struct cachefiles_ob= ject *object, check_failed: fscache_cookie_lookup_negative(object->cookie); cachefiles_unmark_inode_in_use(object, file); - if (ret =3D=3D -ESTALE) { - fput(file); - dput(dentry); + fput(file); + dput(dentry); + if (ret =3D=3D -ESTALE) return cachefiles_create_file(object); - } + return false; + error_fput: fput(file); error: + cachefiles_do_unmark_inode_in_use(object, dentry); dput(dentry); return false; } --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 85210C433F5 for ; Wed, 6 Apr 2022 12:12:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231607AbiDFMOR (ORCPT ); Wed, 6 Apr 2022 08:14:17 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:39798 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229512AbiDFMNy (ORCPT ); Wed, 6 Apr 2022 08:13:54 -0400 Received: from out30-131.freemail.mail.aliyun.com (out30-131.freemail.mail.aliyun.com [115.124.30.131]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1C56E42540C; Wed, 6 Apr 2022 00:56:20 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R781e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04423;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9L1BIW_1649231775; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9L1BIW_1649231775) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:16 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 02/20] cachefiles: extract write routine Date: Wed, 6 Apr 2022 15:55:54 +0800 Message-Id: <20220406075612.60298-3-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Extract the generic routine of writing data to cache files, and make it generally available. This will be used by the following patch implementing on-demand read mode. Since it's called inside cachefiles module in this case, make the interface generic and unrelated to netfs_cache_resources. It is worth nothing that, ki->inval_counter is not initialized after this cleanup. It shall not make any visible difference, since inval_counter is no longer used in the write completion routine, i.e. cachefiles_write_complete(). Signed-off-by: Jeffle Xu Tested-by: Zichen Tian --- fs/cachefiles/internal.h | 10 +++++++ fs/cachefiles/io.c | 61 +++++++++++++++++++++++----------------- 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index c793d33b0224..e80673d0ab97 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -201,6 +201,16 @@ extern void cachefiles_put_object(struct cachefiles_ob= ject *object, */ extern bool cachefiles_begin_operation(struct netfs_cache_resources *cres, enum fscache_want_state want_state); +extern int __cachefiles_prepare_write(struct cachefiles_object *object, + struct file *file, + loff_t *_start, size_t *_len, + bool no_space_allocated_yet); +extern int __cachefiles_write(struct cachefiles_object *object, + struct file *file, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv); =20 /* * key.c diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index 9dc81e781f2b..50a14e8f0aac 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -277,36 +277,33 @@ static void cachefiles_write_complete(struct kiocb *i= ocb, long ret) /* * Initiate a write to the cache. */ -static int cachefiles_write(struct netfs_cache_resources *cres, - loff_t start_pos, - struct iov_iter *iter, - netfs_io_terminated_t term_func, - void *term_func_priv) +int __cachefiles_write(struct cachefiles_object *object, + struct file *file, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv) { - struct cachefiles_object *object; struct cachefiles_cache *cache; struct cachefiles_kiocb *ki; struct inode *inode; - struct file *file; unsigned int old_nofs; - ssize_t ret =3D -ENOBUFS; + ssize_t ret; size_t len =3D iov_iter_count(iter); =20 - if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) - goto presubmission_error; fscache_count_write(); - object =3D cachefiles_cres_object(cres); cache =3D object->volume->cache; - file =3D cachefiles_cres_file(cres); =20 _enter("%pD,%li,%llx,%zx/%llx", file, file_inode(file)->i_ino, start_pos, len, i_size_read(file_inode(file))); =20 - ret =3D -ENOMEM; ki =3D kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); - if (!ki) - goto presubmission_error; + if (!ki) { + if (term_func) + term_func(term_func_priv, -ENOMEM, false); + return -ENOMEM; + } =20 refcount_set(&ki->ki_refcnt, 2); ki->iocb.ki_filp =3D file; @@ -314,7 +311,6 @@ static int cachefiles_write(struct netfs_cache_resource= s *cres, ki->iocb.ki_flags =3D IOCB_DIRECT | IOCB_WRITE; ki->iocb.ki_ioprio =3D get_current_ioprio(); ki->object =3D object; - ki->inval_counter =3D cres->inval_counter; ki->start =3D start_pos; ki->len =3D len; ki->term_func =3D term_func; @@ -369,11 +365,24 @@ static int cachefiles_write(struct netfs_cache_resour= ces *cres, cachefiles_put_kiocb(ki); _leave(" =3D %zd", ret); return ret; +} =20 -presubmission_error: - if (term_func) - term_func(term_func_priv, ret, false); - return ret; +static int cachefiles_write(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) { + if (term_func) + term_func(term_func_priv, -ENOBUFS, false); + return -ENOBUFS; + } + + return __cachefiles_write(cachefiles_cres_object(cres), + cachefiles_cres_file(cres), + start_pos, iter, + term_func, term_func_priv); } =20 /* @@ -484,13 +493,12 @@ static enum netfs_io_source cachefiles_prepare_read(s= truct netfs_io_subrequest * /* * Prepare for a write to occur. */ -static int __cachefiles_prepare_write(struct netfs_cache_resources *cres, - loff_t *_start, size_t *_len, loff_t i_size, - bool no_space_allocated_yet) +int __cachefiles_prepare_write(struct cachefiles_object *object, + struct file *file, + loff_t *_start, size_t *_len, + bool no_space_allocated_yet) { - struct cachefiles_object *object =3D cachefiles_cres_object(cres); struct cachefiles_cache *cache =3D object->volume->cache; - struct file *file =3D cachefiles_cres_file(cres); loff_t start =3D *_start, pos; size_t len =3D *_len, down; int ret; @@ -577,7 +585,8 @@ static int cachefiles_prepare_write(struct netfs_cache_= resources *cres, } =20 cachefiles_begin_secure(cache, &saved_cred); - ret =3D __cachefiles_prepare_write(cres, _start, _len, i_size, + ret =3D __cachefiles_prepare_write(object, cachefiles_cres_file(cres), + _start, _len, no_space_allocated_yet); cachefiles_end_secure(cache, saved_cred); return ret; --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 022E3C433FE for ; Wed, 6 Apr 2022 12:12:55 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231912AbiDFMOy (ORCPT ); Wed, 6 Apr 2022 08:14:54 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:38162 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230082AbiDFMOB (ORCPT ); Wed, 6 Apr 2022 08:14:01 -0400 Received: from out30-133.freemail.mail.aliyun.com (out30-133.freemail.mail.aliyun.com [115.124.30.133]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E115A35A87; Wed, 6 Apr 2022 00:56:23 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R651e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04400;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9LC82N_1649231777; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9LC82N_1649231777) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:18 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 03/20] cachefiles: notify user daemon with anon_fd when looking up cookie Date: Wed, 6 Apr 2022 15:55:55 +0800 Message-Id: <20220406075612.60298-4-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Fscache/cachefiles used to serve as a local cache for remote fs. This patch, along with the following patches, introduces a new on-demand read mode for cachefiles, which can boost the scenario where on-demand read semantics is needed, e.g. container image distribution. The essential difference between the original mode and on-demand read mode is that, in the original mode, when cache miss, netfs itself will fetch data from remote, and then write the fetched data into cache file. While in on-demand read mode, a user daemon is responsible for fetching data and then writing to the cache file. As the first step, notify user daemon with anon_fd when looking up cookie. Send the anonymous fd to user daemon when looking up cookie, no matter whether the cache file exists there or not. With the given anonymous fd, user daemon can fetch and then write data into cache file in advance, even when cache miss has not happened yet. Also add one advisory flag (FSCACHE_ADV_WANT_CACHE_SIZE) suggesting that cache file size shall be retrieved at runtime. This helps the scenario where one cache file can contain multiple netfs files for the purpose of deduplication, e.g. In this case, netfs itself has no idea the cache file size, whilst user daemon needs to offer the hint on the cache file size. Signed-off-by: Jeffle Xu Tested-by: Zichen Tian --- fs/cachefiles/Kconfig | 11 + fs/cachefiles/Makefile | 1 + fs/cachefiles/daemon.c | 77 +++++-- fs/cachefiles/internal.h | 43 ++++ fs/cachefiles/namei.c | 16 +- fs/cachefiles/ondemand.c | 360 ++++++++++++++++++++++++++++++ include/linux/fscache.h | 1 + include/trace/events/cachefiles.h | 2 + include/uapi/linux/cachefiles.h | 49 ++++ 9 files changed, 545 insertions(+), 15 deletions(-) create mode 100644 fs/cachefiles/ondemand.c create mode 100644 include/uapi/linux/cachefiles.h diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig index 719faeeda168..58aad1fb4c5c 100644 --- a/fs/cachefiles/Kconfig +++ b/fs/cachefiles/Kconfig @@ -26,3 +26,14 @@ config CACHEFILES_ERROR_INJECTION help This permits error injection to be enabled in cachefiles whilst a cache is in service. + +config CACHEFILES_ONDEMAND + bool "Support for on-demand read" + depends on CACHEFILES + default n + help + This permits on-demand read mode of cachefiles. In this mode, when + cache miss, the cachefiles backend instead of netfs, is responsible + for fetching data, e.g. through user daemon. + + If unsure, say N. diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile index 16d811f1a2fa..c37a7a9af10b 100644 --- a/fs/cachefiles/Makefile +++ b/fs/cachefiles/Makefile @@ -16,5 +16,6 @@ cachefiles-y :=3D \ xattr.o =20 cachefiles-$(CONFIG_CACHEFILES_ERROR_INJECTION) +=3D error_inject.o +cachefiles-$(CONFIG_CACHEFILES_ONDEMAND) +=3D ondemand.o =20 obj-$(CONFIG_CACHEFILES) :=3D cachefiles.o diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 7ac04ee2c0a0..d155a6da90d3 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -75,6 +75,9 @@ static const struct cachefiles_daemon_cmd cachefiles_daem= on_cmds[] =3D { { "inuse", cachefiles_daemon_inuse }, { "secctx", cachefiles_daemon_secctx }, { "tag", cachefiles_daemon_tag }, +#ifdef CONFIG_CACHEFILES_ONDEMAND + { "copen", cachefiles_ondemand_copen }, +#endif { "", NULL } }; =20 @@ -108,6 +111,9 @@ static int cachefiles_daemon_open(struct inode *inode, = struct file *file) INIT_LIST_HEAD(&cache->volumes); INIT_LIST_HEAD(&cache->object_list); spin_lock_init(&cache->object_list_lock); +#ifdef CONFIG_CACHEFILES_ONDEMAND + xa_init_flags(&cache->reqs, XA_FLAGS_ALLOC); +#endif =20 /* set default caching limits * - limit at 1% free space and/or free files @@ -126,6 +132,27 @@ static int cachefiles_daemon_open(struct inode *inode,= struct file *file) return 0; } =20 +#ifdef CONFIG_CACHEFILES_ONDEMAND +static inline void cachefiles_flush_reqs(struct cachefiles_cache *cache) +{ + struct xarray *xa =3D &cache->reqs; + struct cachefiles_req *req; + unsigned long index; + + /* + * 1) Cache has been marked as dead state, and then 2) flush all + * pending requests in @reqs xarray. The barrier inside set_bit() + * will ensure that above two ops won't be reordered. + */ + xa_lock(xa); + xa_for_each(xa, index, req) { + req->error =3D -EIO; + complete(&req->done); + } + xa_unlock(xa); +} +#endif + /* * Release a cache. */ @@ -139,6 +166,11 @@ static int cachefiles_daemon_release(struct inode *ino= de, struct file *file) =20 set_bit(CACHEFILES_DEAD, &cache->flags); =20 +#ifdef CONFIG_CACHEFILES_ONDEMAND + cachefiles_flush_reqs(cache); + xa_destroy(&cache->reqs); +#endif + cachefiles_daemon_unbind(cache); =20 /* clean up the control file interface */ @@ -152,23 +184,14 @@ static int cachefiles_daemon_release(struct inode *in= ode, struct file *file) return 0; } =20 -/* - * Read the cache state. - */ -static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buf= fer, - size_t buflen, loff_t *pos) +static ssize_t cachefiles_do_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen) { - struct cachefiles_cache *cache =3D file->private_data; unsigned long long b_released; unsigned f_released; char buffer[256]; int n; =20 - //_enter(",,%zu,", buflen); - - if (!test_bit(CACHEFILES_READY, &cache->flags)) - return 0; - /* check how much space the cache has */ cachefiles_has_space(cache, 0, 0, cachefiles_has_space_check); =20 @@ -206,6 +229,26 @@ static ssize_t cachefiles_daemon_read(struct file *fil= e, char __user *_buffer, return n; } =20 +/* + * Read the cache state. + */ +static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buf= fer, + size_t buflen, loff_t *pos) +{ + struct cachefiles_cache *cache =3D file->private_data; + + //_enter(",,%zu,", buflen); + + if (!test_bit(CACHEFILES_READY, &cache->flags)) + return 0; + + if (IS_ENABLED(CONFIG_CACHEFILES_ONDEMAND) && + test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return cachefiles_ondemand_daemon_read(cache, _buffer, buflen); + else + return cachefiles_do_daemon_read(cache, _buffer, buflen); +} + /* * Take a command from cachefilesd, parse it and act on it. */ @@ -297,8 +340,16 @@ static __poll_t cachefiles_daemon_poll(struct file *fi= le, poll_wait(file, &cache->daemon_pollwq, poll); mask =3D 0; =20 - if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) - mask |=3D EPOLLIN; + if (IS_ENABLED(CONFIG_CACHEFILES_ONDEMAND) && + test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) { +#ifdef CONFIG_CACHEFILES_ONDEMAND + if (!xa_empty(&cache->reqs)) + mask |=3D EPOLLIN; +#endif + } else { + if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) + mask |=3D EPOLLIN; + } =20 if (test_bit(CACHEFILES_CULLING, &cache->flags)) mask |=3D EPOLLOUT; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index e80673d0ab97..7d5c7d391fdb 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -15,6 +15,8 @@ #include #include #include +#include +#include =20 #define CACHEFILES_DIO_BLOCK_SIZE 4096 =20 @@ -58,6 +60,9 @@ struct cachefiles_object { enum cachefiles_content content_info:8; /* Info about content presence */ unsigned long flags; #define CACHEFILES_OBJECT_USING_TMPFILE 0 /* Have an unlinked tmpfile */ +#ifdef CONFIG_CACHEFILES_ONDEMAND + int fd; /* anonymous fd */ +#endif }; =20 /* @@ -98,11 +103,24 @@ struct cachefiles_cache { #define CACHEFILES_DEAD 1 /* T if cache dead */ #define CACHEFILES_CULLING 2 /* T if cull engaged */ #define CACHEFILES_STATE_CHANGED 3 /* T if state changed (poll trigger) */ +#define CACHEFILES_ONDEMAND_MODE 4 /* T if in on-demand read mode */ char *rootdirname; /* name of cache root directory */ char *secctx; /* LSM security context */ char *tag; /* cache binding tag */ +#ifdef CONFIG_CACHEFILES_ONDEMAND + struct xarray reqs; /* xarray of pending on-demand requests */ +#endif }; =20 +struct cachefiles_req { + struct cachefiles_object *object; + struct completion done; + int error; + struct cachefiles_msg msg; +}; + +#define CACHEFILES_REQ_NEW XA_MARK_1 + #include =20 static inline @@ -250,6 +268,31 @@ extern struct file *cachefiles_create_tmpfile(struct c= achefiles_object *object); extern bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache, struct cachefiles_object *object); =20 +/* + * ondemand.c + */ +#ifdef CONFIG_CACHEFILES_ONDEMAND +extern ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *ca= che, + char __user *_buffer, size_t buflen); + +extern int cachefiles_ondemand_copen(struct cachefiles_cache *cache, + char *args); + +extern int cachefiles_ondemand_init_object(struct cachefiles_object *objec= t); + +#else +static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_ca= che *cache, + char __user *_buffer, size_t buflen) +{ + return -EOPNOTSUPP; +} + +static inline int cachefiles_ondemand_init_object(struct cachefiles_object= *object) +{ + return 0; +} +#endif + /* * security.c */ diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index fe1bab0f36d4..68213304e96b 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -452,10 +452,9 @@ struct file *cachefiles_create_tmpfile(struct cachefil= es_object *object) struct dentry *fan =3D volume->fanout[(u8)object->cookie->key_hash]; struct file *file; struct path path; - uint64_t ni_size =3D object->cookie->object_size; + uint64_t ni_size; long ret; =20 - ni_size =3D round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE); =20 cachefiles_begin_secure(cache, &saved_cred); =20 @@ -481,6 +480,15 @@ struct file *cachefiles_create_tmpfile(struct cachefil= es_object *object) goto out_dput; } =20 + ret =3D cachefiles_ondemand_init_object(object); + if (ret < 0) { + file =3D ERR_PTR(ret); + goto out_unuse; + } + + ni_size =3D object->cookie->object_size; + ni_size =3D round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE); + if (ni_size > 0) { trace_cachefiles_trunc(object, d_backing_inode(path.dentry), 0, ni_size, cachefiles_trunc_expand_tmpfile); @@ -586,6 +594,10 @@ static bool cachefiles_open_file(struct cachefiles_obj= ect *object, } _debug("file -> %pd positive", dentry); =20 + ret =3D cachefiles_ondemand_init_object(object); + if (ret < 0) + goto error_fput; + ret =3D cachefiles_check_auxdata(object, file); if (ret < 0) goto check_failed; diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c new file mode 100644 index 000000000000..75180d02af91 --- /dev/null +++ b/fs/cachefiles/ondemand.c @@ -0,0 +1,360 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2022, Alibaba Cloud + */ +#include +#include +#include +#include "internal.h" + +static int cachefiles_ondemand_fd_release(struct inode *inode, + struct file *file) +{ + struct cachefiles_object *object =3D file->private_data; + + /* + * Uninstall anon_fd to the cachefiles object, so that no further + * associated requests will get enqueued. + */ + object->fd =3D -1; + + cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd); + return 0; +} + +static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb, + struct iov_iter *iter) +{ + struct cachefiles_object *object =3D kiocb->ki_filp->private_data; + struct cachefiles_cache *cache =3D object->volume->cache; + struct file *file =3D object->file; + size_t len =3D iter->count; + loff_t pos =3D kiocb->ki_pos; + const struct cred *saved_cred; + int ret; + + if (!file) + return -ENOBUFS; + + cachefiles_begin_secure(cache, &saved_cred); + ret =3D __cachefiles_prepare_write(object, file, &pos, &len, true); + cachefiles_end_secure(cache, saved_cred); + if (ret < 0) + return ret; + + ret =3D __cachefiles_write(object, file, pos, iter, NULL, NULL); + if (!ret) + ret =3D len; + + return ret; +} + +static loff_t cachefiles_ondemand_fd_llseek(struct file *filp, loff_t pos, + int whence) +{ + struct cachefiles_object *object =3D filp->private_data; + struct file *file =3D object->file; + + if (!file) + return -ENOBUFS; + + return vfs_llseek(file, pos, whence); +} + +static const struct file_operations cachefiles_ondemand_fd_fops =3D { + .owner =3D THIS_MODULE, + .release =3D cachefiles_ondemand_fd_release, + .write_iter =3D cachefiles_ondemand_fd_write_iter, + .llseek =3D cachefiles_ondemand_fd_llseek, +}; + +/* + * OPEN request Completion (copen) + * - command: "copen ," + * represents the object size if >=3D0, error code if negat= ive + */ +int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) +{ + struct cachefiles_req *req; + struct fscache_cookie *cookie; + char *pid, *psize; + unsigned long id; + long size; + int ret; + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return -EOPNOTSUPP; + + if (!*args) { + pr_err("Empty id specified\n"); + return -EINVAL; + } + + pid =3D args; + psize =3D strchr(args, ','); + if (!psize) { + pr_err("Cache size is not specified\n"); + return -EINVAL; + } + + *psize =3D 0; + psize++; + + ret =3D kstrtoul(pid, 0, &id); + if (ret) + return ret; + + req =3D xa_erase(&cache->reqs, id); + if (!req) + return -EINVAL; + + /* fail OPEN request if copen format is invalid */ + ret =3D kstrtol(psize, 0, &size); + if (ret) { + req->error =3D ret; + goto out; + } + + /* fail OPEN request if daemon reports an error */ + if (size < 0) { + if (!IS_ERR_VALUE(size)) + size =3D -EINVAL; + req->error =3D size; + goto out; + } + + cookie =3D req->object->cookie; + cookie->object_size =3D size; + if (size) + clear_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); + else + set_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); + +out: + complete(&req->done); + return ret; +} + +static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) +{ + struct cachefiles_object *object; + struct cachefiles_open *load; + struct file *file; + int ret, fd; + + ret =3D get_unused_fd_flags(O_WRONLY); + if (ret < 0) + return ret; + fd =3D ret; + + object =3D cachefiles_grab_object(req->object, + cachefiles_obj_get_ondemand_fd); + + file =3D anon_inode_getfile("[cachefiles]", &cachefiles_ondemand_fd_fops, + object, O_WRONLY); + if (IS_ERR(file)) { + cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd); + put_unused_fd(fd); + return PTR_ERR(file); + } + + file->f_mode |=3D FMODE_PWRITE | FMODE_LSEEK; + fd_install(fd, file); + + load =3D (void *)req->msg.data; + load->fd =3D fd; + object->fd =3D fd; + + return 0; +} + +ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen) +{ + struct cachefiles_req *req; + struct cachefiles_msg *msg; + unsigned long id =3D 0; + size_t n; + int ret =3D 0; + XA_STATE(xas, &cache->reqs, 0); + + /* + * Search for request that has not ever been processed, to prevent + * requests from being sent to user daemon repeatedly. + */ + xa_lock(&cache->reqs); + req =3D xas_find_marked(&xas, UINT_MAX, CACHEFILES_REQ_NEW); + if (!req) { + xa_unlock(&cache->reqs); + return 0; + } + + msg =3D &req->msg; + n =3D msg->len; + + if (n > buflen) { + xa_unlock(&cache->reqs); + return -EMSGSIZE; + } + + xas_clear_mark(&xas, CACHEFILES_REQ_NEW); + xa_unlock(&cache->reqs); + + msg->id =3D id =3D xas.xa_index; + + if (msg->opcode =3D=3D CACHEFILES_OP_OPEN) { + ret =3D cachefiles_ondemand_get_fd(req); + if (ret) + goto error; + } + + if (copy_to_user(_buffer, msg, n) !=3D 0) { + ret =3D -EFAULT; + goto err_put_fd; + } + + return n; + +err_put_fd: + if (msg->opcode =3D=3D CACHEFILES_OP_OPEN) + close_fd(req->object->fd); +error: + xa_erase(&cache->reqs, id); + req->error =3D ret; + complete(&req->done); + return ret; +} + +typedef int (*init_req_fn)(struct cachefiles_req *req, void *private); + +static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + enum cachefiles_opcode opcode, + size_t data_len, + init_req_fn init_req, + void *private) +{ + struct cachefiles_cache *cache =3D object->volume->cache; + struct cachefiles_req *req; + XA_STATE(xas, &cache->reqs, 0); + int ret; + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return 0; + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) + return -EIO; + + req =3D kzalloc(sizeof(*req) + data_len, GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->object =3D object; + init_completion(&req->done); + req->msg.opcode =3D opcode; + req->msg.len =3D sizeof(struct cachefiles_msg) + data_len; + + ret =3D init_req(req, private); + if (ret) + goto out; + + do { + /* + * Stop enqueuing the request when daemon is dying. So we need + * to 1) check cache state, and 2) enqueue request if cache is + * alive. + * + * These two ops need to be atomic as a whole. Otherwise request + * may be enqueued after xarray has been flushed, in which case + * the orphan request will never be completed and thus netfs + * will hang there forever. + */ + xas_lock(&xas); + + /* recheck dead state with lock held */ + if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + xas_unlock(&xas); + ret =3D -EIO; + goto out; + } + + xas.xa_index =3D 0; + xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK); + if (xas.xa_node =3D=3D XAS_RESTART) + xas_set_err(&xas, -EBUSY); + xas_store(&xas, req); + xas_clear_mark(&xas, XA_FREE_MARK); + xas_set_mark(&xas, CACHEFILES_REQ_NEW); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + ret =3D xas_error(&xas); + if (ret) + goto out; + + wake_up_all(&cache->daemon_pollwq); + wait_for_completion(&req->done); + ret =3D req->error; +out: + kfree(req); + return ret; +} + +static int init_open_req(struct cachefiles_req *req, void *private) +{ + struct cachefiles_object *object =3D req->object; + struct fscache_cookie *cookie =3D object->cookie; + struct fscache_volume *volume =3D object->volume->vcookie; + struct cachefiles_open *load =3D (void *)req->msg.data; + size_t volume_key_size, cookie_key_size; + void *volume_key, *cookie_key; + + /* + * Volume key is of string format. + * key[0] stores strlen() of the string, while the remained part stores + * the content of the string (excluding the suffix '\0'). Append the + * suffix '\0' to the output volume_key, so that it's a valid string. + */ + volume_key_size =3D volume->key[0] + 1; + volume_key =3D volume->key + 1; + + /* Cookie key is of binary format, which is netfs specific. */ + cookie_key_size =3D cookie->key_len; + cookie_key =3D fscache_get_key(cookie); + + if (!(object->cookie->advice & FSCACHE_ADV_WANT_CACHE_SIZE)) { + pr_err("WANT_CACHE_SIZE is needed for on-demand mode\n"); + return -EINVAL; + } + + load->volume_key_size =3D volume_key_size; + load->cookie_key_size =3D cookie_key_size; + memcpy(load->data, volume_key, volume_key_size); + memcpy(load->data + volume_key_size, cookie_key, cookie_key_size); + + return 0; +} + +int cachefiles_ondemand_init_object(struct cachefiles_object *object) +{ + struct fscache_cookie *cookie =3D object->cookie; + struct fscache_volume *volume =3D object->volume->vcookie; + size_t volume_key_size, cookie_key_size, data_len; + + /* + * Cachefiles will firstly check cache file under the root cache + * directory. If coherency check failed, it will fallback to creating a + * new tmpfile as the cache file. Reuse the previously created anon_fd + * if any. + */ + if (object->fd > 0) + return 0; + + volume_key_size =3D volume->key[0] + 1; + cookie_key_size =3D cookie->key_len; + data_len =3D sizeof(struct cachefiles_open) + + volume_key_size + cookie_key_size; + + return cachefiles_ondemand_send_req(object, + CACHEFILES_OP_OPEN, data_len, + init_open_req, NULL); +} diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 6727fb0db619..663ab6f2ede6 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -39,6 +39,7 @@ struct fscache_cookie; #define FSCACHE_ADV_SINGLE_CHUNK 0x01 /* The object is a single chunk of d= ata */ #define FSCACHE_ADV_WRITE_CACHE 0x00 /* Do cache if written to locally */ #define FSCACHE_ADV_WRITE_NOCACHE 0x02 /* Don't cache if written to locall= y */ +#define FSCACHE_ADV_WANT_CACHE_SIZE 0x04 /* Retrieve cache size at runtime= */ =20 #define FSCACHE_INVAL_DIO_WRITE 0x01 /* Invalidate due to DIO write */ =20 diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cache= files.h index 311c14a20e70..93df9391bd7f 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -31,6 +31,8 @@ enum cachefiles_obj_ref_trace { cachefiles_obj_see_lookup_failed, cachefiles_obj_see_withdraw_cookie, cachefiles_obj_see_withdrawal, + cachefiles_obj_get_ondemand_fd, + cachefiles_obj_put_ondemand_fd, }; =20 enum fscache_why_object_killed { diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefile= s.h new file mode 100644 index 000000000000..41492f2653c9 --- /dev/null +++ b/include/uapi/linux/cachefiles.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_CACHEFILES_H +#define _LINUX_CACHEFILES_H + +#include + +/* + * Fscache ensures that the maximum length of cookie key is 255. The volum= e key + * is controlled by netfs, and generally no bigger than 255. + */ +#define CACHEFILES_MSG_MAX_SIZE 1024 + +enum cachefiles_opcode { + CACHEFILES_OP_OPEN, +}; + +/* + * Message Header + * + * @id a unique ID identifying this message + * @opcode message type, CACHEFILE_OP_* + * @len message length, including message header and following data + * @data message type specific payload + */ +struct cachefiles_msg { + __u32 id; + __u32 opcode; + __u32 len; + __u8 data[]; +}; + +/* + * @data contains volume_key and cookie_key in sequence. + * + * volume_key is of string format, with a suffix '\0'; + * @volume_key_size identifies size of volume key, in bytes. + * + * cookie_key is of binary format, which is netfs specific; + * @cookie_key_size identifies size of cookie key, in bytes. + */ +struct cachefiles_open { + __u32 volume_key_size; + __u32 cookie_key_size; + __u32 fd; + __u32 flags; + __u8 data[]; +}; + +#endif --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 66ED3C433F5 for ; Wed, 6 Apr 2022 12:13:35 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229972AbiDFMPd (ORCPT ); Wed, 6 Apr 2022 08:15:33 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44444 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231279AbiDFMOE (ORCPT ); Wed, 6 Apr 2022 08:14:04 -0400 Received: from out30-56.freemail.mail.aliyun.com (out30-56.freemail.mail.aliyun.com [115.124.30.56]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BFC111B60E1; Wed, 6 Apr 2022 00:56:26 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R501e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04426;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9L0qp1_1649231779; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9L0qp1_1649231779) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:20 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 04/20] cachefiles: notify user daemon when withdrawing cookie Date: Wed, 6 Apr 2022 15:55:56 +0800 Message-Id: <20220406075612.60298-5-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Notify user daemon that cookie is going to be withdrawn, providing a hint that the associated anon_fd can be closed. The anon_fd attached in the CLOSE request shall be same with that in the previous OPEN request. Be noted that this is only a hint. User daemon can close the anon_fd when receiving the CLOSE request, then it will receive another anon_fd if the cookie gets looked up. Or it can also ignore the CLOSE request, and keep writing data into the anon_fd. However the next time cookie gets looked up, the user daemon will still receive another anon_fd. Signed-off-by: Jeffle Xu Tested-by: Zichen Tian --- fs/cachefiles/interface.c | 2 ++ fs/cachefiles/internal.h | 5 +++++ fs/cachefiles/ondemand.c | 36 +++++++++++++++++++++++++++++++++ include/uapi/linux/cachefiles.h | 5 +++++ 4 files changed, 48 insertions(+) diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index ae93cee9d25d..a69073a1d3f0 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -362,6 +362,8 @@ static void cachefiles_withdraw_cookie(struct fscache_c= ookie *cookie) spin_unlock(&cache->object_list_lock); } =20 + cachefiles_ondemand_clean_object(object); + if (object->file) { cachefiles_begin_secure(cache, &saved_cred); cachefiles_clean_up_object(object, cache); diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 7d5c7d391fdb..8a397d4da560 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -279,6 +279,7 @@ extern int cachefiles_ondemand_copen(struct cachefiles_= cache *cache, char *args); =20 extern int cachefiles_ondemand_init_object(struct cachefiles_object *objec= t); +extern void cachefiles_ondemand_clean_object(struct cachefiles_object *obj= ect); =20 #else static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_ca= che *cache, @@ -291,6 +292,10 @@ static inline int cachefiles_ondemand_init_object(stru= ct cachefiles_object *obje { return 0; } + +static inline void cachefiles_ondemand_clean_object(struct cachefiles_obje= ct *object) +{ +} #endif =20 /* diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 75180d02af91..defd65124052 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -213,6 +213,12 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefi= les_cache *cache, goto err_put_fd; } =20 + /* CLOSE request has no reply */ + if (msg->opcode =3D=3D CACHEFILES_OP_CLOSE) { + xa_erase(&cache->reqs, id); + complete(&req->done); + } + return n; =20 err_put_fd: @@ -334,6 +340,28 @@ static int init_open_req(struct cachefiles_req *req, v= oid *private) return 0; } =20 +static int init_close_req(struct cachefiles_req *req, void *private) +{ + struct cachefiles_object *object =3D req->object; + struct cachefiles_close *load =3D (void *)req->msg.data; + int fd =3D object->fd; + + if (fd =3D=3D -1) { + pr_info_once("CLOSE: anonymous fd closed prematurely.\n"); + return -EIO; + } + + /* + * It's possible if the cookie looking up phase failed before READ + * request has ever been sent. + */ + if (fd =3D=3D 0) + return -ENOENT; + + load->fd =3D fd; + return 0; +} + int cachefiles_ondemand_init_object(struct cachefiles_object *object) { struct fscache_cookie *cookie =3D object->cookie; @@ -358,3 +386,11 @@ int cachefiles_ondemand_init_object(struct cachefiles_= object *object) CACHEFILES_OP_OPEN, data_len, init_open_req, NULL); } + +void cachefiles_ondemand_clean_object(struct cachefiles_object *object) +{ + cachefiles_ondemand_send_req(object, + CACHEFILES_OP_CLOSE, + sizeof(struct cachefiles_close), + init_close_req, NULL); +} diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefile= s.h index 41492f2653c9..73397e142ab3 100644 --- a/include/uapi/linux/cachefiles.h +++ b/include/uapi/linux/cachefiles.h @@ -12,6 +12,7 @@ =20 enum cachefiles_opcode { CACHEFILES_OP_OPEN, + CACHEFILES_OP_CLOSE, }; =20 /* @@ -46,4 +47,8 @@ struct cachefiles_open { __u8 data[]; }; =20 +struct cachefiles_close { + __u32 fd; +}; + #endif --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A876FC433FE for ; Wed, 6 Apr 2022 12:13:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231355AbiDFMPN (ORCPT ); Wed, 6 Apr 2022 08:15:13 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:38252 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231158AbiDFMOD (ORCPT ); Wed, 6 Apr 2022 08:14:03 -0400 Received: from out30-56.freemail.mail.aliyun.com (out30-56.freemail.mail.aliyun.com [115.124.30.56]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E39B4D3712; Wed, 6 Apr 2022 00:56:25 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R171e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04357;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9L0qpr_1649231780; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9L0qpr_1649231780) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:21 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 05/20] cachefiles: implement on-demand read Date: Wed, 6 Apr 2022 15:55:57 +0800 Message-Id: <20220406075612.60298-6-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Implement the data plane of on-demand read mode. A new NETFS_READ_HOLE_ONDEMAND flag is introduced to indicate that on-demand read should be done when a cache miss encountered. In this case, the read routine will send a READ request to user daemon, along with the anonymous fd and the file range that shall be read. Now user daemon is responsible for fetching data in the given file range, and then writing the fetched data into cache file with the given anonymous fd. After sending the READ request, the read routine will hang there, until the READ request is handled by user daemon. Then it will retry to read from the same file range. If a cache miss is encountered again on the same file range, the read routine will fail then. Signed-off-by: Jeffle Xu Tested-by: Zichen Tian --- fs/cachefiles/internal.h | 9 ++++ fs/cachefiles/io.c | 11 +++++ fs/cachefiles/ondemand.c | 83 +++++++++++++++++++++++++++++++++ include/linux/netfs.h | 1 + include/uapi/linux/cachefiles.h | 18 +++++++ 5 files changed, 122 insertions(+) diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 8a397d4da560..b4a834671b6b 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -281,6 +281,9 @@ extern int cachefiles_ondemand_copen(struct cachefiles_= cache *cache, extern int cachefiles_ondemand_init_object(struct cachefiles_object *objec= t); extern void cachefiles_ondemand_clean_object(struct cachefiles_object *obj= ect); =20 +extern int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len); + #else static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_ca= che *cache, char __user *_buffer, size_t buflen) @@ -296,6 +299,12 @@ static inline int cachefiles_ondemand_init_object(stru= ct cachefiles_object *obje static inline void cachefiles_ondemand_clean_object(struct cachefiles_obje= ct *object) { } + +static inline int cachefiles_ondemand_read(struct cachefiles_object *objec= t, + loff_t pos, size_t len) +{ + return -EOPNOTSUPP; +} #endif =20 /* diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index 50a14e8f0aac..6f2e20cd41f4 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -95,6 +95,7 @@ static int cachefiles_read(struct netfs_cache_resources *= cres, file, file_inode(file)->i_ino, start_pos, len, i_size_read(file_inode(file))); =20 +retry: /* If the caller asked us to seek for data before doing the read, then * we should do that now. If we find a gap, we fill it with zeros. */ @@ -119,6 +120,16 @@ static int cachefiles_read(struct netfs_cache_resource= s *cres, if (read_hole =3D=3D NETFS_READ_HOLE_FAIL) goto presubmission_error; =20 + if (read_hole =3D=3D NETFS_READ_HOLE_ONDEMAND) { + ret =3D cachefiles_ondemand_read(object, off, len); + if (ret) + goto presubmission_error; + + /* fail the read if no progress achieved */ + read_hole =3D NETFS_READ_HOLE_FAIL; + goto retry; + } + iov_iter_zero(len, iter); skipped =3D len; ret =3D 0; diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index defd65124052..149ae1923955 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -11,13 +11,30 @@ static int cachefiles_ondemand_fd_release(struct inode = *inode, struct file *file) { struct cachefiles_object *object =3D file->private_data; + struct cachefiles_cache *cache =3D object->volume->cache; + struct xarray *xa =3D &cache->reqs; + struct cachefiles_req *req; + unsigned long index; =20 + xa_lock(xa); /* * Uninstall anon_fd to the cachefiles object, so that no further * associated requests will get enqueued. */ object->fd =3D -1; =20 + /* + * Flush all pending READ requests since their completion depends on + * anon_fd. + */ + xa_for_each(xa, index, req) { + if (req->msg.opcode =3D=3D CACHEFILES_OP_READ) { + req->error =3D -EIO; + complete(&req->done); + } + } + xa_unlock(xa); + cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd); return 0; } @@ -61,11 +78,35 @@ static loff_t cachefiles_ondemand_fd_llseek(struct file= *filp, loff_t pos, return vfs_llseek(file, pos, whence); } =20 +static long cachefiles_ondemand_fd_ioctl(struct file *filp, unsigned int i= octl, + unsigned long arg) +{ + struct cachefiles_object *object =3D filp->private_data; + struct cachefiles_cache *cache =3D object->volume->cache; + struct cachefiles_req *req; + unsigned long id; + + if (ioctl !=3D CACHEFILES_IOC_CREAD) + return -EINVAL; + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return -EOPNOTSUPP; + + id =3D arg; + req =3D xa_erase(&cache->reqs, id); + if (!req) + return -EINVAL; + + complete(&req->done); + return 0; +} + static const struct file_operations cachefiles_ondemand_fd_fops =3D { .owner =3D THIS_MODULE, .release =3D cachefiles_ondemand_fd_release, .write_iter =3D cachefiles_ondemand_fd_write_iter, .llseek =3D cachefiles_ondemand_fd_llseek, + .unlocked_ioctl =3D cachefiles_ondemand_fd_ioctl, }; =20 /* @@ -283,6 +324,13 @@ static int cachefiles_ondemand_send_req(struct cachefi= les_object *object, goto out; } =20 + /* recheck anon_fd for READ request with lock held */ + if (opcode =3D=3D CACHEFILES_OP_READ && object->fd =3D=3D -1) { + xas_unlock(&xas); + ret =3D -EIO; + goto out; + } + xas.xa_index =3D 0; xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK); if (xas.xa_node =3D=3D XAS_RESTART) @@ -362,6 +410,30 @@ static int init_close_req(struct cachefiles_req *req, = void *private) return 0; } =20 +struct cachefiles_read_ctx { + loff_t off; + size_t len; +}; + +static int init_read_req(struct cachefiles_req *req, void *private) +{ + struct cachefiles_object *object =3D req->object; + struct cachefiles_read *load =3D (void *)&req->msg.data; + struct cachefiles_read_ctx *read_ctx =3D private; + int fd =3D object->fd; + + /* Stop enqueuing request when daemon closes anon_fd prematurely. */ + if (fd =3D=3D -1) { + pr_info_once("READ: anonymous fd closed prematurely.\n"); + return -EIO; + } + + load->off =3D read_ctx->off; + load->len =3D read_ctx->len; + load->fd =3D fd; + return 0; +} + int cachefiles_ondemand_init_object(struct cachefiles_object *object) { struct fscache_cookie *cookie =3D object->cookie; @@ -394,3 +466,14 @@ void cachefiles_ondemand_clean_object(struct cachefile= s_object *object) sizeof(struct cachefiles_close), init_close_req, NULL); } + +int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len) +{ + struct cachefiles_read_ctx read_ctx =3D {pos, len}; + + return cachefiles_ondemand_send_req(object, + CACHEFILES_OP_READ, + sizeof(struct cachefiles_read), + init_read_req, &read_ctx); +} diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c7bf1eaf51d5..c1854e92333e 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -222,6 +222,7 @@ enum netfs_read_from_hole { NETFS_READ_HOLE_IGNORE, NETFS_READ_HOLE_CLEAR, NETFS_READ_HOLE_FAIL, + NETFS_READ_HOLE_ONDEMAND, }; =20 /* diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefile= s.h index 73397e142ab3..9506b1697e14 100644 --- a/include/uapi/linux/cachefiles.h +++ b/include/uapi/linux/cachefiles.h @@ -3,6 +3,7 @@ #define _LINUX_CACHEFILES_H =20 #include +#include =20 /* * Fscache ensures that the maximum length of cookie key is 255. The volum= e key @@ -13,6 +14,7 @@ enum cachefiles_opcode { CACHEFILES_OP_OPEN, CACHEFILES_OP_CLOSE, + CACHEFILES_OP_READ, }; =20 /* @@ -51,4 +53,20 @@ struct cachefiles_close { __u32 fd; }; =20 +/* + * @off identifies the starting offset of the requested file range. + * @len identifies the length of the requested file range. + */ +struct cachefiles_read { + __u64 off; + __u64 len; + __u32 fd; +}; + +/* + * Reply for READ request (Completion for READ) + * arg for CACHEFILES_IOC_CREAD ioctl is the @id field of READ request. + */ +#define CACHEFILES_IOC_CREAD _IOW(0x98, 1, int) + #endif --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A6D7CC433EF for ; Wed, 6 Apr 2022 12:13:22 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231598AbiDFMPU (ORCPT ); Wed, 6 Apr 2022 08:15:20 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:39664 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231205AbiDFMOD (ORCPT ); Wed, 6 Apr 2022 08:14:03 -0400 Received: from out30-45.freemail.mail.aliyun.com (out30-45.freemail.mail.aliyun.com [115.124.30.45]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D32E1127592; Wed, 6 Apr 2022 00:56:28 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R111e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04357;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9LC84J_1649231782; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9LC84J_1649231782) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:23 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 06/20] cachefiles: enable on-demand read mode Date: Wed, 6 Apr 2022 15:55:58 +0800 Message-Id: <20220406075612.60298-7-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Enable on-demand read mode by adding an optional parameter to the "bind" command. On-demand mode will be turned on when this parameter is "ondemand", i.e. "bind ondemand". Otherwise cachefiles will work in the original mode. Signed-off-by: Jeffle Xu Tested-by: Zichen Tian --- fs/cachefiles/daemon.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index d155a6da90d3..bd902a4c4cd8 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -738,11 +738,6 @@ static int cachefiles_daemon_bind(struct cachefiles_ca= che *cache, char *args) cache->brun_percent >=3D 100) return -ERANGE; =20 - if (*args) { - pr_err("'bind' command doesn't take an argument\n"); - return -EINVAL; - } - if (!cache->rootdirname) { pr_err("No cache directory specified\n"); return -EINVAL; @@ -754,6 +749,14 @@ static int cachefiles_daemon_bind(struct cachefiles_ca= che *cache, char *args) return -EBUSY; } =20 + if (IS_ENABLED(CONFIG_CACHEFILES_ONDEMAND) && + !strcmp(args, "ondemand")) { + set_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags); + } else if (*args) { + pr_err("'bind' command doesn't take an argument\n"); + return -EINVAL; + } + /* Make sure we have copies of the tag string */ if (!cache->tag) { /* --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id F0BB4C433EF for ; Wed, 6 Apr 2022 12:13:28 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232274AbiDFMP0 (ORCPT ); Wed, 6 Apr 2022 08:15:26 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57138 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231234AbiDFMOD (ORCPT ); Wed, 6 Apr 2022 08:14:03 -0400 Received: from out30-130.freemail.mail.aliyun.com (out30-130.freemail.mail.aliyun.com [115.124.30.130]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D342E12759C; Wed, 6 Apr 2022 00:56:28 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R201e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04395;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9L0qr._1649231783; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9L0qr._1649231783) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:24 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 07/20] cachefiles: document on-demand read mode Date: Wed, 6 Apr 2022 15:55:59 +0800 Message-Id: <20220406075612.60298-8-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Document new user interface introduced by on-demand read mode. Signed-off-by: Jeffle Xu Tested-by: Zichen Tian --- .../filesystems/caching/cachefiles.rst | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) diff --git a/Documentation/filesystems/caching/cachefiles.rst b/Documentati= on/filesystems/caching/cachefiles.rst index 8bf396b76359..386801135027 100644 --- a/Documentation/filesystems/caching/cachefiles.rst +++ b/Documentation/filesystems/caching/cachefiles.rst @@ -28,6 +28,8 @@ Cache on Already Mounted Filesystem =20 (*) Debugging. =20 + (*) On-demand Read. + =20 =20 Overview @@ -482,3 +484,166 @@ the control file. For example:: echo $((1|4|8)) >/sys/module/cachefiles/parameters/debug =20 will turn on all function entry debugging. + + +On-demand Read +=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D + +When working in original mode, cachefiles mainly serves as a local cache f= or +remote networking fs, while in on-demand read mode, cachefiles can boost t= he +scenario where on-demand read semantics is needed, e.g. container image +distribution. + +The essential difference between these two modes is that, in original mode, +when cache miss, netfs itself will fetch data from remote, and then write = the +fetched data into cache file. While in on-demand read mode, a user daemon = is +responsible for fetching data and then writing to the cache file. + +``CONFIG_CACHEFILES_ONDEMAND`` shall be enabled to support on-demand read = mode. + + +Protocol Communication +---------------------- + +The on-demand read mode relies on a simple protocol used for communication +between kernel and user daemon. The model is like:: + + kernel --[request]--> user daemon --[reply]--> kernel + +The cachefiles kernel module will send requests to user daemon when needed. +User daemon needs to poll on the devnode ('/dev/cachefiles') to check if +there's pending request to be processed. A POLLIN event will be returned +when there's pending request. + +Then user daemon needs to read the devnode to fetch one request and proces= s it +accordingly. It is worth nothing that each read only gets one request. When +finished processing the request, user daemon needs to write the reply to t= he +devnode. + +Each request is started with a message header like:: + + struct cachefiles_msg { + __u32 id; + __u32 opcode; + __u32 len; + __u8 data[]; + }; + + * ``id`` is a unique ID identifying this request among all pending + requests. + + * ``opcode`` identifies the type of this request. + + * ``data`` identifies the payload of this request. + + * ``len`` identifies the whole length of this request, including the + header and following type specific payload. + + +Turn on On-demand Mode +---------------------- + +An optional parameter is added to "bind" command:: + + bind [ondemand] + +When "bind" command takes without argument, it defaults to the original mo= de. +When "bind" command takes with "ondemand" argument, i.e. "bind ondemand", +on-demand read mode will be enabled. + + +OPEN Request +------------ + +When netfs opens a cache file for the first time, a request with +CACHEFILES_OP_OPEN opcode, a.k.a OPEN request will be sent to user daemon.= The +payload format is like:: + + struct cachefiles_open { + __u32 volume_key_size; + __u32 cookie_key_size; + __u32 fd; + __u32 flags; + __u8 data[]; + }; + + * ``data`` contains volume_key and cookie_key in sequence. + + * ``volume_key_size`` identifies the size of volume key of the cache + file, in bytes. volume_key is of string format, with a suffix '\0'. + + * ``cookie_key_size`` identifies the size of cookie key of the cache + file, in bytes. cookie_key is of binary format, which is netfs + specific. + + * ``fd`` identifies the anonymous fd of the cache file, with which user + daemon can perform write/llseek file operations on the cache file. + + +OPEN request contains (volume_key, cookie_key, anon_fd) triple for corresp= onding +cache file. With this triple, user daemon could fetch and write data into = the +cache file in the background, even when kernel has not triggered the cache= miss +yet. User daemon is able to distinguish the requested cache file with the = given +(volume_key, cookie_key), and write the fetched data into cache file with = the +given anon_fd. + +After recording the (volume_key, cookie_key, anon_fd) triple, user daemon = shall +reply with "copen" (complete open) command:: + + copen , + + * ``id`` is exactly the id field of the previous OPEN request. + + * When >=3D 0, ``cache_size`` identifies the size of the cache file; + when < 0, ``cache_size`` identifies the error code ecountered by the + user daemon. + + +CLOSE Request +------------- +When cookie withdrawed, a request with CACHEFILES_OP_CLOSE opcode, a.k.a C= LOSE +request, will be sent to user daemon. It will notify user daemon to close = the +attached anon_fd. The payload format is like:: + + struct cachefiles_close { + __u32 fd; + }; + + * ``fd`` identifies the anon_fd to be closed, which is exactly the same + with that in OPEN request. + + +READ Request +------------ + +When on-demand read mode is turned on, and cache miss encountered, kernel = will +send a request with CACHEFILES_OP_READ opcode, a.k.a READ request, to user +daemon. It will notify user daemon to fetch data in the requested file ran= ge. +The payload format is like:: + + struct cachefiles_read { + __u64 off; + __u64 len; + __u32 fd; + }; + + * ``off`` identifies the starting offset of the requested file range. + + * ``len`` identifies the length of the requested file range. + + * ``fd`` identifies the anonymous fd of the requested cache file. It is + guaranteed that it shall be the same with the fd field in the previous + OPEN request. + +When receiving one READ request, user daemon needs to fetch data of the +requested file range, and then write the fetched data into cache file with= the +given anonymous fd. + +When finished processing the READ request, user daemon needs to reply with +CACHEFILES_IOC_CREAD ioctl on the corresponding anon_fd:: + + ioctl(fd, CACHEFILES_IOC_CREAD, id); + + * ``fd`` is exactly the fd field of the previous READ request. + + * ``id`` is exactly the id field of the previous READ request. --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2DE90C433EF for ; Wed, 6 Apr 2022 12:13:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232361AbiDFMPh (ORCPT ); Wed, 6 Apr 2022 08:15:37 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:39798 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231726AbiDFMOI (ORCPT ); Wed, 6 Apr 2022 08:14:08 -0400 Received: from out30-131.freemail.mail.aliyun.com (out30-131.freemail.mail.aliyun.com [115.124.30.131]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B45411AEC9F; Wed, 6 Apr 2022 00:56:30 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R111e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04407;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9KyhrA_1649231785; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9KyhrA_1649231785) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:26 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 08/20] erofs: make erofs_map_blocks() generally available Date: Wed, 6 Apr 2022 15:56:00 +0800 Message-Id: <20220406075612.60298-9-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" ... so that it can be used in the following introduced fscache mode. Signed-off-by: Jeffle Xu Reviewed-by: Gao Xiang Tested-by: Zichen Tian --- fs/erofs/data.c | 4 ++-- fs/erofs/internal.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 780db1e5f4b7..bc22642358ec 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -110,8 +110,8 @@ static int erofs_map_blocks_flatmode(struct inode *inod= e, return 0; } =20 -static int erofs_map_blocks(struct inode *inode, - struct erofs_map_blocks *map, int flags) +int erofs_map_blocks(struct inode *inode, + struct erofs_map_blocks *map, int flags) { struct super_block *sb =3D inode->i_sb; struct erofs_inode *vi =3D EROFS_I(inode); diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 5298c4ee277d..fe9564e5091e 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -486,6 +486,8 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct = super_block *sb, int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); +int erofs_map_blocks(struct inode *inode, + struct erofs_map_blocks *map, int flags); =20 /* inode.c */ static inline unsigned long erofs_inode_hash(erofs_nid_t nid) --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4FD59C433FE for ; Wed, 6 Apr 2022 12:13:44 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232376AbiDFMPl (ORCPT ); Wed, 6 Apr 2022 08:15:41 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:36180 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231766AbiDFMOI (ORCPT ); Wed, 6 Apr 2022 08:14:08 -0400 Received: from out30-43.freemail.mail.aliyun.com (out30-43.freemail.mail.aliyun.com [115.124.30.43]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C0402425432; Wed, 6 Apr 2022 00:56:32 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R191e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e01424;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9L1BLM_1649231786; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9L1BLM_1649231786) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:27 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 09/20] erofs: add mode checking helper Date: Wed, 6 Apr 2022 15:56:01 +0800 Message-Id: <20220406075612.60298-10-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Until then erofs is exactly blockdev based filesystem. A new fscache-based mode is going to be introduced for erofs to support scenarios where on-demand read semantics is needed, e.g. container image distribution. In this case, erofs could be mounted from data blobs through fscache. Add a helper checking which mode erofs works in. Signed-off-by: Jeffle Xu Reviewed-by: Gao Xiang Tested-by: Zichen Tian --- fs/erofs/internal.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index fe9564e5091e..05a97533b1e9 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -161,6 +161,11 @@ struct erofs_sb_info { #define set_opt(opt, option) ((opt)->mount_opt |=3D EROFS_MOUNT_##option) #define test_opt(opt, option) ((opt)->mount_opt & EROFS_MOUNT_##option) =20 +static inline bool erofs_is_fscache_mode(struct super_block *sb) +{ + return IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && !sb->s_bdev; +} + enum { EROFS_ZIP_CACHE_DISABLED, EROFS_ZIP_CACHE_READAHEAD, --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8FB19C433F5 for ; Wed, 6 Apr 2022 12:13:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231545AbiDFMPx (ORCPT ); Wed, 6 Apr 2022 08:15:53 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50690 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231508AbiDFMOW (ORCPT ); Wed, 6 Apr 2022 08:14:22 -0400 Received: from out199-16.us.a.mail.aliyun.com (out199-16.us.a.mail.aliyun.com [47.90.199.16]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2A88D425E01; Wed, 6 Apr 2022 00:56:34 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R351e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04426;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9Ko7Ui_1649231788; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9Ko7Ui_1649231788) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:29 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 10/20] erofs: register fscache volume Date: Wed, 6 Apr 2022 15:56:02 +0800 Message-Id: <20220406075612.60298-11-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" A new fscache based mode is going to be introduced for erofs, in which case on-demand read semantics is implemented through fscache. As the first step, register fscache volume for each erofs filesystem. That means, data blobs can not be shared among erofs filesystems. In the following iteration, we are going to introduce the domain semantics, in which case several erofs filesystems can belong to one domain, and data blobs can be shared among these erofs filesystems of one domain. Signed-off-by: Jeffle Xu Reviewed-by: Gao Xiang Tested-by: Zichen Tian --- fs/erofs/Kconfig | 10 ++++++++++ fs/erofs/Makefile | 1 + fs/erofs/fscache.c | 37 +++++++++++++++++++++++++++++++++++++ fs/erofs/internal.h | 13 +++++++++++++ fs/erofs/super.c | 7 +++++++ 5 files changed, 68 insertions(+) create mode 100644 fs/erofs/fscache.c diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index f57255ab88ed..3d05265e3e8e 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -98,3 +98,13 @@ config EROFS_FS_ZIP_LZMA systems will be readable without selecting this option. =20 If unsure, say N. + +config EROFS_FS_ONDEMAND + bool "EROFS fscache-based ondemand-read" + depends on CACHEFILES_ONDEMAND && (EROFS_FS=3Dm && FSCACHE || EROFS_FS=3D= y && FSCACHE=3Dy) + default n + help + EROFS is mounted from data blobs and on-demand read semantics is + implemented through fscache. + + If unsure, say N. diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile index 8a3317e38e5a..99bbc597a3e9 100644 --- a/fs/erofs/Makefile +++ b/fs/erofs/Makefile @@ -5,3 +5,4 @@ erofs-objs :=3D super.o inode.o data.o namei.o dir.o utils.= o pcpubuf.o sysfs.o erofs-$(CONFIG_EROFS_FS_XATTR) +=3D xattr.o erofs-$(CONFIG_EROFS_FS_ZIP) +=3D decompressor.o zmap.o zdata.o erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) +=3D decompressor_lzma.o +erofs-$(CONFIG_EROFS_FS_ONDEMAND) +=3D fscache.o diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c new file mode 100644 index 000000000000..7a6d0239ebb1 --- /dev/null +++ b/fs/erofs/fscache.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2022, Alibaba Cloud + */ +#include +#include "internal.h" + +int erofs_fscache_register_fs(struct super_block *sb) +{ + struct erofs_sb_info *sbi =3D EROFS_SB(sb); + struct fscache_volume *volume; + char *name; + int ret =3D 0; + + name =3D kasprintf(GFP_KERNEL, "erofs,%s", sbi->opt.fsid); + if (!name) + return -ENOMEM; + + volume =3D fscache_acquire_volume(name, NULL, NULL, 0); + if (IS_ERR_OR_NULL(volume)) { + erofs_err(sb, "failed to register volume for %s", name); + ret =3D volume ? PTR_ERR(volume) : -EOPNOTSUPP; + volume =3D NULL; + } + + sbi->volume =3D volume; + kfree(name); + return ret; +} + +void erofs_fscache_unregister_fs(struct super_block *sb) +{ + struct erofs_sb_info *sbi =3D EROFS_SB(sb); + + fscache_relinquish_volume(sbi->volume, NULL, false); + sbi->volume =3D NULL; +} diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 05a97533b1e9..952a2f483f94 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -74,6 +74,7 @@ struct erofs_mount_opts { unsigned int max_sync_decompress_pages; #endif unsigned int mount_opt; + char *fsid; }; =20 struct erofs_dev_context { @@ -146,6 +147,9 @@ struct erofs_sb_info { /* sysfs support */ struct kobject s_kobj; /* /sys/fs/erofs/ */ struct completion s_kobj_unregister; + + /* fscache support */ + struct fscache_volume *volume; }; =20 #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) @@ -618,6 +622,15 @@ static inline int z_erofs_load_lzma_config(struct supe= r_block *sb, } #endif /* !CONFIG_EROFS_FS_ZIP */ =20 +/* fscache.c */ +#ifdef CONFIG_EROFS_FS_ONDEMAND +int erofs_fscache_register_fs(struct super_block *sb); +void erofs_fscache_unregister_fs(struct super_block *sb); +#else +static inline int erofs_fscache_register_fs(struct super_block *sb) { retu= rn 0; } +static inline void erofs_fscache_unregister_fs(struct super_block *sb) {} +#endif + #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ =20 #endif /* __EROFS_INTERNAL_H */ diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 0c4b41130c2f..6590ed1b7d3b 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -601,6 +601,12 @@ static int erofs_fc_fill_super(struct super_block *sb,= struct fs_context *fc) sbi->devs =3D ctx->devs; ctx->devs =3D NULL; =20 + if (erofs_is_fscache_mode(sb)) { + err =3D erofs_fscache_register_fs(sb); + if (err) + return err; + } + err =3D erofs_read_superblock(sb); if (err) return err; @@ -757,6 +763,7 @@ static void erofs_kill_sb(struct super_block *sb) =20 erofs_free_dev_context(sbi->devs); fs_put_dax(sbi->dax_dev); + erofs_fscache_unregister_fs(sb); kfree(sbi); sb->s_fs_info =3D NULL; } --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6C536C433FE for ; Wed, 6 Apr 2022 12:13:50 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232446AbiDFMPq (ORCPT ); Wed, 6 Apr 2022 08:15:46 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:36178 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231821AbiDFMOJ (ORCPT ); Wed, 6 Apr 2022 08:14:09 -0400 Received: from out30-131.freemail.mail.aliyun.com (out30-131.freemail.mail.aliyun.com [115.124.30.131]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C9A29425E00; Wed, 6 Apr 2022 00:56:34 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R181e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04400;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9LC86s_1649231790; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9LC86s_1649231790) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:31 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 11/20] erofs: add fscache context helper functions Date: Wed, 6 Apr 2022 15:56:03 +0800 Message-Id: <20220406075612.60298-12-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Introduce a context structure for managing data blobs, and helper functions for initializing and cleaning up this context structure. Signed-off-by: Jeffle Xu Reviewed-by: Gao Xiang Tested-by: Zichen Tian --- fs/erofs/fscache.c | 46 +++++++++++++++++++++++++++++++++++++++++++++ fs/erofs/internal.h | 19 +++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 7a6d0239ebb1..67a3c4935245 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -5,6 +5,52 @@ #include #include "internal.h" =20 +/* + * Create an fscache context for data blob. + * Return: 0 on success and allocated fscache context is assigned to @fsca= che, + * negative error number on failure. + */ +int erofs_fscache_register_cookie(struct super_block *sb, + struct erofs_fscache **fscache, char *name) +{ + struct fscache_volume *volume =3D EROFS_SB(sb)->volume; + struct erofs_fscache *ctx; + struct fscache_cookie *cookie; + + ctx =3D kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + cookie =3D fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, + name, strlen(name), NULL, 0, 0); + if (!cookie) { + erofs_err(sb, "failed to get cookie for %s", name); + kfree(name); + return -EINVAL; + } + + fscache_use_cookie(cookie, false); + ctx->cookie =3D cookie; + + *fscache =3D ctx; + return 0; +} + +void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache) +{ + struct erofs_fscache *ctx =3D *fscache; + + if (!ctx) + return; + + fscache_unuse_cookie(ctx->cookie, NULL, NULL); + fscache_relinquish_cookie(ctx->cookie, false); + ctx->cookie =3D NULL; + + kfree(ctx); + *fscache =3D NULL; +} + int erofs_fscache_register_fs(struct super_block *sb) { struct erofs_sb_info *sbi =3D EROFS_SB(sb); diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 952a2f483f94..c6a3351a4d7d 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -97,6 +97,10 @@ struct erofs_sb_lz4_info { u16 max_pclusterblks; }; =20 +struct erofs_fscache { + struct fscache_cookie *cookie; +}; + struct erofs_sb_info { struct erofs_mount_opts opt; /* options */ #ifdef CONFIG_EROFS_FS_ZIP @@ -626,9 +630,24 @@ static inline int z_erofs_load_lzma_config(struct supe= r_block *sb, #ifdef CONFIG_EROFS_FS_ONDEMAND int erofs_fscache_register_fs(struct super_block *sb); void erofs_fscache_unregister_fs(struct super_block *sb); + +int erofs_fscache_register_cookie(struct super_block *sb, + struct erofs_fscache **fscache, char *name); +void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache); #else static inline int erofs_fscache_register_fs(struct super_block *sb) { retu= rn 0; } static inline void erofs_fscache_unregister_fs(struct super_block *sb) {} + +static inline int erofs_fscache_register_cookie(struct super_block *sb, + struct erofs_fscache **fscache, + char *name) +{ + return -EOPNOTSUPP; +} + +static inline void erofs_fscache_unregister_cookie(struct erofs_fscache **= fscache) +{ +} #endif =20 #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1C1A3C433EF for ; Wed, 6 Apr 2022 12:14:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232511AbiDFMQD (ORCPT ); Wed, 6 Apr 2022 08:16:03 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:40102 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232096AbiDFMOX (ORCPT ); Wed, 6 Apr 2022 08:14:23 -0400 Received: from out30-132.freemail.mail.aliyun.com (out30-132.freemail.mail.aliyun.com [115.124.30.132]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D62893713DE; Wed, 6 Apr 2022 00:56:36 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R671e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04426;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9L3PCZ_1649231791; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9L3PCZ_1649231791) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:32 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 12/20] erofs: add anonymous inode managing page cache for data blob Date: Wed, 6 Apr 2022 15:56:04 +0800 Message-Id: <20220406075612.60298-13-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Introduce one anonymous inode managing page cache for data blob. Then erofs could read directly from the address space of the anonymous inode when cache hit. Signed-off-by: Jeffle Xu Reviewed-by: Gao Xiang Tested-by: Zichen Tian --- fs/erofs/fscache.c | 39 ++++++++++++++++++++++++++++++++++++--- fs/erofs/internal.h | 6 ++++-- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 67a3c4935245..1c88614203d2 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -5,17 +5,22 @@ #include #include "internal.h" =20 +static const struct address_space_operations erofs_fscache_meta_aops =3D { +}; + /* * Create an fscache context for data blob. * Return: 0 on success and allocated fscache context is assigned to @fsca= che, * negative error number on failure. */ int erofs_fscache_register_cookie(struct super_block *sb, - struct erofs_fscache **fscache, char *name) + struct erofs_fscache **fscache, + char *name, bool need_inode) { struct fscache_volume *volume =3D EROFS_SB(sb)->volume; struct erofs_fscache *ctx; struct fscache_cookie *cookie; + int ret; =20 ctx =3D kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -25,15 +30,40 @@ int erofs_fscache_register_cookie(struct super_block *s= b, name, strlen(name), NULL, 0, 0); if (!cookie) { erofs_err(sb, "failed to get cookie for %s", name); - kfree(name); - return -EINVAL; + ret =3D -EINVAL; + goto err; } =20 fscache_use_cookie(cookie, false); ctx->cookie =3D cookie; =20 + if (need_inode) { + struct inode *const inode =3D new_inode(sb); + + if (!inode) { + erofs_err(sb, "failed to get anon inode for %s", name); + ret =3D -ENOMEM; + goto err_cookie; + } + + set_nlink(inode, 1); + inode->i_size =3D OFFSET_MAX; + inode->i_mapping->a_ops =3D &erofs_fscache_meta_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + + ctx->inode =3D inode; + } + *fscache =3D ctx; return 0; + +err_cookie: + fscache_unuse_cookie(ctx->cookie, NULL, NULL); + fscache_relinquish_cookie(ctx->cookie, false); + ctx->cookie =3D NULL; +err: + kfree(ctx); + return ret; } =20 void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache) @@ -47,6 +77,9 @@ void erofs_fscache_unregister_cookie(struct erofs_fscache= **fscache) fscache_relinquish_cookie(ctx->cookie, false); ctx->cookie =3D NULL; =20 + iput(ctx->inode); + ctx->inode =3D NULL; + kfree(ctx); *fscache =3D NULL; } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index c6a3351a4d7d..3a4a344cfed3 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -99,6 +99,7 @@ struct erofs_sb_lz4_info { =20 struct erofs_fscache { struct fscache_cookie *cookie; + struct inode *inode; }; =20 struct erofs_sb_info { @@ -632,7 +633,8 @@ int erofs_fscache_register_fs(struct super_block *sb); void erofs_fscache_unregister_fs(struct super_block *sb); =20 int erofs_fscache_register_cookie(struct super_block *sb, - struct erofs_fscache **fscache, char *name); + struct erofs_fscache **fscache, + char *name, bool need_inode); void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache); #else static inline int erofs_fscache_register_fs(struct super_block *sb) { retu= rn 0; } @@ -640,7 +642,7 @@ static inline void erofs_fscache_unregister_fs(struct s= uper_block *sb) {} =20 static inline int erofs_fscache_register_cookie(struct super_block *sb, struct erofs_fscache **fscache, - char *name) + char *name, bool need_inode) { return -EOPNOTSUPP; } --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 741A2C433F5 for ; Wed, 6 Apr 2022 12:10:21 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231251AbiDFMMP (ORCPT ); Wed, 6 Apr 2022 08:12:15 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48134 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230015AbiDFMLw (ORCPT ); Wed, 6 Apr 2022 08:11:52 -0400 Received: from out30-56.freemail.mail.aliyun.com (out30-56.freemail.mail.aliyun.com [115.124.30.56]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id ED063425E30; Wed, 6 Apr 2022 00:56:38 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R591e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e01424;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9Ko7Vp_1649231793; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9Ko7Vp_1649231793) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:34 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 13/20] erofs: add erofs_fscache_read_folios() helper Date: Wed, 6 Apr 2022 15:56:05 +0800 Message-Id: <20220406075612.60298-14-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Add erofs_fscache_read_folios() helper reading from fscache. It supports on-demand read semantics. That is, it will make the backend prepare for the data when cache miss. Once data ready, it will reinitiate a read from the cache. This helper can then be used to implement .readpage()/.readahead() of on-demand read semantics. Signed-off-by: Jeffle Xu Reviewed-by: Gao Xiang Tested-by: Zichen Tian --- fs/erofs/fscache.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 1c88614203d2..d38a6efc8e50 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -5,6 +5,35 @@ #include #include "internal.h" =20 +/* + * Read data from fscache and fill the read data into page cache described= by + * @start/len, which shall be both aligned with PAGE_SIZE. @pstart describ= es + * the start physical address in the cache file. + */ +static int erofs_fscache_read_folios(struct fscache_cookie *cookie, + struct address_space *mapping, + loff_t start, size_t len, + loff_t pstart) +{ + struct netfs_cache_resources cres; + struct iov_iter iter; + int ret; + + memset(&cres, 0, sizeof(cres)); + + ret =3D fscache_begin_read_operation(&cres, cookie); + if (ret) + return ret; + + iov_iter_xarray(&iter, READ, &mapping->i_pages, start, len); + + ret =3D fscache_read(&cres, pstart, &iter, + NETFS_READ_HOLE_ONDEMAND, NULL, NULL); + + fscache_end_operation(&cres); + return ret; +} + static const struct address_space_operations erofs_fscache_meta_aops =3D { }; =20 --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 72D77C433F5 for ; Wed, 6 Apr 2022 12:14:27 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232316AbiDFMQZ (ORCPT ); Wed, 6 Apr 2022 08:16:25 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:35658 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229935AbiDFMOb (ORCPT ); Wed, 6 Apr 2022 08:14:31 -0400 Received: from out30-132.freemail.mail.aliyun.com (out30-132.freemail.mail.aliyun.com [115.124.30.132]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B80F61480CF; Wed, 6 Apr 2022 00:56:39 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R111e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04426;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9LC885_1649231794; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9LC885_1649231794) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:35 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 14/20] erofs: register fscache context for primary data blob Date: Wed, 6 Apr 2022 15:56:06 +0800 Message-Id: <20220406075612.60298-15-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Registers fscache context for primary data blob. Also move the initialization of s_op and related fields forward, since anonymous inode will be allocated under the super block when registering the fscache context. Something worth mentioning about the cleanup routine. 1. The fscache context will instantiate anonymous inodes under the super block. Release these anonymous inodes when .put_super() is called, or we'll get "VFS: Busy inodes after unmount." warning. 2. The fscache context is initialized prior to the root inode. If .kill_sb() is called when mount failed, .put_super() won't be called when root inode has not been initialized yet. Thus .kill_sb() shall also contain the cleanup routine. Signed-off-by: Jeffle Xu Reviewed-by: Gao Xiang Tested-by: Zichen Tian --- fs/erofs/internal.h | 1 + fs/erofs/super.c | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 3a4a344cfed3..eb37b33bce37 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -155,6 +155,7 @@ struct erofs_sb_info { =20 /* fscache support */ struct fscache_volume *volume; + struct erofs_fscache *s_fscache; }; =20 #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 6590ed1b7d3b..9498b899b73b 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -585,6 +585,9 @@ static int erofs_fc_fill_super(struct super_block *sb, = struct fs_context *fc) int err; =20 sb->s_magic =3D EROFS_SUPER_MAGIC; + sb->s_flags |=3D SB_RDONLY | SB_NOATIME; + sb->s_maxbytes =3D MAX_LFS_FILESIZE; + sb->s_op =3D &erofs_sops; =20 if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) { erofs_err(sb, "failed to set erofs blksize"); @@ -605,6 +608,11 @@ static int erofs_fc_fill_super(struct super_block *sb,= struct fs_context *fc) err =3D erofs_fscache_register_fs(sb); if (err) return err; + + err =3D erofs_fscache_register_cookie(sb, &sbi->s_fscache, + sbi->opt.fsid, true); + if (err) + return err; } =20 err =3D erofs_read_superblock(sb); @@ -619,11 +627,8 @@ static int erofs_fc_fill_super(struct super_block *sb,= struct fs_context *fc) clear_opt(&sbi->opt, DAX_ALWAYS); } } - sb->s_flags |=3D SB_RDONLY | SB_NOATIME; - sb->s_maxbytes =3D MAX_LFS_FILESIZE; - sb->s_time_gran =3D 1; =20 - sb->s_op =3D &erofs_sops; + sb->s_time_gran =3D 1; sb->s_xattr =3D erofs_xattr_handlers; =20 if (test_opt(&sbi->opt, POSIX_ACL)) @@ -763,6 +768,7 @@ static void erofs_kill_sb(struct super_block *sb) =20 erofs_free_dev_context(sbi->devs); fs_put_dax(sbi->dax_dev); + erofs_fscache_unregister_cookie(&sbi->s_fscache); erofs_fscache_unregister_fs(sb); kfree(sbi); sb->s_fs_info =3D NULL; @@ -781,6 +787,7 @@ static void erofs_put_super(struct super_block *sb) iput(sbi->managed_cache); sbi->managed_cache =3D NULL; #endif + erofs_fscache_unregister_cookie(&sbi->s_fscache); } =20 static struct file_system_type erofs_fs_type =3D { --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 3C4B4C433F5 for ; Wed, 6 Apr 2022 12:11:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230099AbiDFMNX (ORCPT ); Wed, 6 Apr 2022 08:13:23 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57486 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230354AbiDFMLx (ORCPT ); Wed, 6 Apr 2022 08:11:53 -0400 Received: from out30-54.freemail.mail.aliyun.com (out30-54.freemail.mail.aliyun.com [115.124.30.54]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id CF216432D4C; Wed, 6 Apr 2022 00:56:41 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R201e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04423;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9LC88e_1649231796; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9LC88e_1649231796) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:37 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 15/20] erofs: register fscache context for extra data blobs Date: Wed, 6 Apr 2022 15:56:07 +0800 Message-Id: <20220406075612.60298-16-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Similar to the multi device mode, erofs could be mounted from one primary data blob (mandatory) and multiple extra data blobs (optional). Register fscache context for each extra data blob. Signed-off-by: Jeffle Xu Reviewed-by: Gao Xiang Tested-by: Zichen Tian --- fs/erofs/data.c | 3 +++ fs/erofs/internal.h | 2 ++ fs/erofs/super.c | 25 +++++++++++++++++-------- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/fs/erofs/data.c b/fs/erofs/data.c index bc22642358ec..14b64d960541 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -199,6 +199,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_= map_dev *map) map->m_bdev =3D sb->s_bdev; map->m_daxdev =3D EROFS_SB(sb)->dax_dev; map->m_dax_part_off =3D EROFS_SB(sb)->dax_part_off; + map->m_fscache =3D EROFS_SB(sb)->s_fscache; =20 if (map->m_deviceid) { down_read(&devs->rwsem); @@ -210,6 +211,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_= map_dev *map) map->m_bdev =3D dif->bdev; map->m_daxdev =3D dif->dax_dev; map->m_dax_part_off =3D dif->dax_part_off; + map->m_fscache =3D dif->fscache; up_read(&devs->rwsem); } else if (devs->extra_devices) { down_read(&devs->rwsem); @@ -227,6 +229,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_= map_dev *map) map->m_bdev =3D dif->bdev; map->m_daxdev =3D dif->dax_dev; map->m_dax_part_off =3D dif->dax_part_off; + map->m_fscache =3D dif->fscache; break; } } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index eb37b33bce37..90f7d6286a4f 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -49,6 +49,7 @@ typedef u32 erofs_blk_t; =20 struct erofs_device_info { char *path; + struct erofs_fscache *fscache; struct block_device *bdev; struct dax_device *dax_dev; u64 dax_part_off; @@ -482,6 +483,7 @@ static inline int z_erofs_map_blocks_iter(struct inode = *inode, #endif /* !CONFIG_EROFS_FS_ZIP */ =20 struct erofs_map_dev { + struct erofs_fscache *m_fscache; struct block_device *m_bdev; struct dax_device *m_daxdev; u64 m_dax_part_off; diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 9498b899b73b..8c7181cd37e6 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -259,15 +259,23 @@ static int erofs_init_devices(struct super_block *sb, } dis =3D ptr + erofs_blkoff(pos); =20 - bdev =3D blkdev_get_by_path(dif->path, - FMODE_READ | FMODE_EXCL, - sb->s_type); - if (IS_ERR(bdev)) { - err =3D PTR_ERR(bdev); - break; + if (erofs_is_fscache_mode(sb)) { + err =3D erofs_fscache_register_cookie(sb, &dif->fscache, + dif->path, false); + if (err) + break; + } else { + bdev =3D blkdev_get_by_path(dif->path, + FMODE_READ | FMODE_EXCL, + sb->s_type); + if (IS_ERR(bdev)) { + err =3D PTR_ERR(bdev); + break; + } + dif->bdev =3D bdev; + dif->dax_dev =3D fs_dax_get_by_bdev(bdev, &dif->dax_part_off); } - dif->bdev =3D bdev; - dif->dax_dev =3D fs_dax_get_by_bdev(bdev, &dif->dax_part_off); + dif->blocks =3D le32_to_cpu(dis->blocks); dif->mapped_blkaddr =3D le32_to_cpu(dis->mapped_blkaddr); sbi->total_blocks +=3D dif->blocks; @@ -701,6 +709,7 @@ static int erofs_release_device_info(int id, void *ptr,= void *data) fs_put_dax(dif->dax_dev); if (dif->bdev) blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); + erofs_fscache_unregister_cookie(&dif->fscache); kfree(dif->path); kfree(dif); return 0; --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5FB8DC433F5 for ; Wed, 6 Apr 2022 12:11:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230433AbiDFMM3 (ORCPT ); Wed, 6 Apr 2022 08:12:29 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57488 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230509AbiDFMLx (ORCPT ); Wed, 6 Apr 2022 08:11:53 -0400 Received: from out30-133.freemail.mail.aliyun.com (out30-133.freemail.mail.aliyun.com [115.124.30.133]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AA816432D4D; Wed, 6 Apr 2022 00:56:42 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R131e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04357;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9L3PE9_1649231797; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9L3PE9_1649231797) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:38 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 16/20] erofs: implement fscache-based metadata read Date: Wed, 6 Apr 2022 15:56:08 +0800 Message-Id: <20220406075612.60298-17-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Implement the data plane of reading metadata from primary data blob over fscache. Signed-off-by: Jeffle Xu Tested-by: Zichen Tian --- fs/erofs/data.c | 20 ++++++++++++++++++-- fs/erofs/fscache.c | 38 ++++++++++++++++++++++++++++++++++++++ fs/erofs/internal.h | 9 +++++++++ 3 files changed, 65 insertions(+), 2 deletions(-) diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 14b64d960541..cb8fe299ad67 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -31,15 +31,26 @@ void erofs_put_metabuf(struct erofs_buf *buf) void *erofs_bread(struct erofs_buf *buf, struct inode *inode, erofs_blk_t blkaddr, enum erofs_kmap_type type) { - struct address_space *const mapping =3D inode->i_mapping; erofs_off_t offset =3D blknr_to_addr(blkaddr); pgoff_t index =3D offset >> PAGE_SHIFT; struct page *page =3D buf->page; =20 if (!page || page->index !=3D index) { erofs_put_metabuf(buf); - page =3D read_cache_page_gfp(mapping, index, + if (buf->sb) { + struct folio *folio; + + folio =3D erofs_fscache_get_folio(buf->sb, index); + if (IS_ERR(folio)) + page =3D ERR_CAST(folio); + else + page =3D folio_page(folio, 0); + } else { + struct address_space *const mapping =3D inode->i_mapping; + + page =3D read_cache_page_gfp(mapping, index, mapping_gfp_constraint(mapping, ~__GFP_FS)); + } if (IS_ERR(page)) return page; /* should already be PageUptodate, no need to lock page */ @@ -63,6 +74,11 @@ void *erofs_bread(struct erofs_buf *buf, struct inode *i= node, void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, erofs_blk_t blkaddr, enum erofs_kmap_type type) { + if (erofs_is_fscache_mode(sb)) { + buf->sb =3D sb; + return erofs_bread(buf, NULL, blkaddr, type); + } + return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type); } =20 diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index d38a6efc8e50..158cc273f8fb 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -34,9 +34,47 @@ static int erofs_fscache_read_folios(struct fscache_cook= ie *cookie, return ret; } =20 +static int erofs_fscache_meta_readpage(struct file *data, struct page *pag= e) +{ + int ret; + struct super_block *sb =3D (struct super_block *)data; + struct folio *folio =3D page_folio(page); + struct erofs_map_dev mdev =3D { + .m_deviceid =3D 0, + .m_pa =3D folio_pos(folio), + }; + + ret =3D erofs_map_dev(sb, &mdev); + if (ret) + goto out; + + ret =3D erofs_fscache_read_folios(mdev.m_fscache->cookie, + folio_file_mapping(folio), folio_pos(folio), + folio_size(folio), mdev.m_pa); + if (ret) + goto out; + + folio_mark_uptodate(folio); +out: + folio_unlock(folio); + return ret; +} + static const struct address_space_operations erofs_fscache_meta_aops =3D { + .readpage =3D erofs_fscache_meta_readpage, }; =20 +/* + * Get the page cache of data blob at the index offset. + * Return: up to date page on success, ERR_PTR() on failure. + */ +struct folio *erofs_fscache_get_folio(struct super_block *sb, pgoff_t inde= x) +{ + struct erofs_fscache *ctx =3D EROFS_SB(sb)->s_fscache; + + return read_mapping_folio(ctx->inode->i_mapping, index, (void *)sb); +} + /* * Create an fscache context for data blob. * Return: 0 on success and allocated fscache context is assigned to @fsca= che, diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 90f7d6286a4f..e186051f0640 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -276,6 +276,7 @@ enum erofs_kmap_type { }; =20 struct erofs_buf { + struct super_block *sb; struct page *page; void *base; enum erofs_kmap_type kmap_type; @@ -639,6 +640,8 @@ int erofs_fscache_register_cookie(struct super_block *s= b, struct erofs_fscache **fscache, char *name, bool need_inode); void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache); + +struct folio *erofs_fscache_get_folio(struct super_block *sb, pgoff_t inde= x); #else static inline int erofs_fscache_register_fs(struct super_block *sb) { retu= rn 0; } static inline void erofs_fscache_unregister_fs(struct super_block *sb) {} @@ -653,6 +656,12 @@ static inline int erofs_fscache_register_cookie(struct= super_block *sb, static inline void erofs_fscache_unregister_cookie(struct erofs_fscache **= fscache) { } + +static inline struct folio *erofs_fscache_get_folio(struct super_block *sb, + pgoff_t index) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif =20 #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id CBBB1C433EF for ; Wed, 6 Apr 2022 12:14:21 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230315AbiDFMQT (ORCPT ); Wed, 6 Apr 2022 08:16:19 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:36272 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229951AbiDFMOj (ORCPT ); Wed, 6 Apr 2022 08:14:39 -0400 Received: from out30-57.freemail.mail.aliyun.com (out30-57.freemail.mail.aliyun.com [115.124.30.57]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A4632370667; Wed, 6 Apr 2022 00:56:44 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R161e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04395;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9KyhvE_1649231799; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9KyhvE_1649231799) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:40 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 17/20] erofs: implement fscache-based data read for non-inline layout Date: Wed, 6 Apr 2022 15:56:09 +0800 Message-Id: <20220406075612.60298-18-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Implement the data plane of reading data from data blobs over fscache for non-inline layout. Signed-off-by: Jeffle Xu Reviewed-by: Gao Xiang Tested-by: Zichen Tian --- fs/erofs/fscache.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ fs/erofs/inode.c | 5 +++++ fs/erofs/internal.h | 2 ++ 3 files changed, 59 insertions(+) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 158cc273f8fb..65de1c754e80 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -60,10 +60,62 @@ static int erofs_fscache_meta_readpage(struct file *dat= a, struct page *page) return ret; } =20 +static int erofs_fscache_readpage(struct file *file, struct page *page) +{ + struct folio *folio =3D page_folio(page); + struct inode *inode =3D folio_file_mapping(folio)->host; + struct super_block *sb =3D inode->i_sb; + struct erofs_map_blocks map; + struct erofs_map_dev mdev; + erofs_off_t pos; + loff_t pstart; + int ret =3D 0; + + DBG_BUGON(folio_size(folio) !=3D EROFS_BLKSIZ); + + pos =3D folio_pos(folio); + map.m_la =3D pos; + + ret =3D erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); + if (ret) + goto out_unlock; + + if (!(map.m_flags & EROFS_MAP_MAPPED)) { + folio_zero_range(folio, 0, folio_size(folio)); + goto out_uptodate; + } + + /* no-inline readpage */ + mdev =3D (struct erofs_map_dev) { + .m_deviceid =3D map.m_deviceid, + .m_pa =3D map.m_pa, + }; + + ret =3D erofs_map_dev(sb, &mdev); + if (ret) + goto out_unlock; + + pstart =3D mdev.m_pa + (pos - map.m_la); + ret =3D erofs_fscache_read_folios(mdev.m_fscache->cookie, + folio_file_mapping(folio), folio_pos(folio), + folio_size(folio), pstart); + +out_uptodate: + if (!ret) + folio_mark_uptodate(folio); +out_unlock: + folio_unlock(folio); + return ret; +} + static const struct address_space_operations erofs_fscache_meta_aops =3D { .readpage =3D erofs_fscache_meta_readpage, }; =20 +const struct address_space_operations erofs_fscache_access_aops =3D { + .readpage =3D erofs_fscache_readpage, +}; + /* * Get the page cache of data blob at the index offset. * Return: up to date page on success, ERR_PTR() on failure. diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index e8b37ba5e9ad..88b51b5fb53f 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -296,7 +296,12 @@ static int erofs_fill_inode(struct inode *inode, int i= sdir) err =3D z_erofs_fill_inode(inode); goto out_unlock; } + inode->i_mapping->a_ops =3D &erofs_raw_access_aops; +#ifdef CONFIG_EROFS_FS_ONDEMAND + if (erofs_is_fscache_mode(inode->i_sb)) + inode->i_mapping->a_ops =3D &erofs_fscache_access_aops; +#endif =20 out_unlock: erofs_put_metabuf(&buf); diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index e186051f0640..336d19647c96 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -642,6 +642,8 @@ int erofs_fscache_register_cookie(struct super_block *s= b, void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache); =20 struct folio *erofs_fscache_get_folio(struct super_block *sb, pgoff_t inde= x); + +extern const struct address_space_operations erofs_fscache_access_aops; #else static inline int erofs_fscache_register_fs(struct super_block *sb) { retu= rn 0; } static inline void erofs_fscache_unregister_fs(struct super_block *sb) {} --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6C744C433EF for ; Wed, 6 Apr 2022 12:14:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232541AbiDFMQF (ORCPT ); Wed, 6 Apr 2022 08:16:05 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:36192 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231404AbiDFMOi (ORCPT ); Wed, 6 Apr 2022 08:14:38 -0400 Received: from out30-42.freemail.mail.aliyun.com (out30-42.freemail.mail.aliyun.com [115.124.30.42]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C666C43370A; Wed, 6 Apr 2022 00:56:45 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R611e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04423;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9L3PF9_1649231800; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9L3PF9_1649231800) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:41 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 18/20] erofs: implement fscache-based data read for inline layout Date: Wed, 6 Apr 2022 15:56:10 +0800 Message-Id: <20220406075612.60298-19-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Implement the data plane of reading data from data blobs over fscache for inline layout. For the heading non-inline part, the data plane for non-inline layout is reused, while only the tail packing part needs special handling. Signed-off-by: Jeffle Xu Tested-by: Zichen Tian --- fs/erofs/fscache.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 65de1c754e80..d32cb5840c6d 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -60,6 +60,40 @@ static int erofs_fscache_meta_readpage(struct file *data= , struct page *page) return ret; } =20 +static int erofs_fscache_readpage_inline(struct folio *folio, + struct erofs_map_blocks *map) +{ + struct inode *inode =3D folio_file_mapping(folio)->host; + struct super_block *sb =3D inode->i_sb; + struct erofs_buf buf =3D __EROFS_BUF_INITIALIZER; + erofs_blk_t blknr; + size_t offset, len; + void *src, *dst; + + /* + * For inline (tail packing) layout, the offset may be non-zero, which + * can be calculated from corresponding physical address directly. + */ + offset =3D erofs_blkoff(map->m_pa); + blknr =3D erofs_blknr(map->m_pa); + len =3D map->m_llen; + + src =3D erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); + if (IS_ERR(src)) + return PTR_ERR(src); + + DBG_BUGON(folio_size(folio) !=3D PAGE_SIZE); + + dst =3D kmap(folio_page(folio, 0)); + memcpy(dst, src + offset, len); + memset(dst + len, 0, PAGE_SIZE - len); + kunmap(folio_page(folio, 0)); + + erofs_put_metabuf(&buf); + + return 0; +} + static int erofs_fscache_readpage(struct file *file, struct page *page) { struct folio *folio =3D page_folio(page); @@ -85,6 +119,12 @@ static int erofs_fscache_readpage(struct file *file, st= ruct page *page) goto out_uptodate; } =20 + /* inline readpage */ + if (map.m_flags & EROFS_MAP_META) { + ret =3D erofs_fscache_readpage_inline(folio, &map); + goto out_uptodate; + } + /* no-inline readpage */ mdev =3D (struct erofs_map_dev) { .m_deviceid =3D map.m_deviceid, --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 409ABC433F5 for ; Wed, 6 Apr 2022 12:14:54 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232599AbiDFMQq (ORCPT ); Wed, 6 Apr 2022 08:16:46 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44130 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231575AbiDFMOk (ORCPT ); Wed, 6 Apr 2022 08:14:40 -0400 Received: from out30-132.freemail.mail.aliyun.com (out30-132.freemail.mail.aliyun.com [115.124.30.132]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 9693643371F; Wed, 6 Apr 2022 00:56:48 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R551e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04357;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9LC8Ab_1649231802; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9LC8Ab_1649231802) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:43 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 19/20] erofs: implement fscache-based data readahead Date: Wed, 6 Apr 2022 15:56:11 +0800 Message-Id: <20220406075612.60298-20-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Implement fscache-based data readahead. Also registers an individual bdi for each erofs instance to enable readahead. Signed-off-by: Jeffle Xu Tested-by: Zichen Tian --- fs/erofs/fscache.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++ fs/erofs/super.c | 4 ++ 2 files changed, 98 insertions(+) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index d32cb5840c6d..620d44210809 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -148,12 +148,106 @@ static int erofs_fscache_readpage(struct file *file,= struct page *page) return ret; } =20 +static inline void erofs_fscache_unlock_folios(struct readahead_control *r= ac, + size_t len) +{ + while (len) { + struct folio *folio =3D readahead_folio(rac); + + len -=3D folio_size(folio); + folio_mark_uptodate(folio); + folio_unlock(folio); + } +} + +static void erofs_fscache_readahead(struct readahead_control *rac) +{ + struct inode *inode =3D rac->mapping->host; + struct super_block *sb =3D inode->i_sb; + size_t len, count, done =3D 0; + erofs_off_t pos; + loff_t start, offset; + int ret; + + if (!readahead_count(rac)) + return; + + start =3D readahead_pos(rac); + len =3D readahead_length(rac); + + do { + struct erofs_map_blocks map; + struct erofs_map_dev mdev; + + pos =3D start + done; + map.m_la =3D pos; + + ret =3D erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); + if (ret) + return; + + /* + * 1) For CHUNK_BASED layout, the output m_la is rounded down to + * the nearest chunk boundary, and the output m_llen actually + * starts from the start of the containing chunk. + * 2) For other cases, the output m_la is equal to o_la. + */ + offset =3D start + done; + count =3D min_t(size_t, map.m_llen - (pos - map.m_la), len - done); + + /* Read-ahead Hole */ + if (!(map.m_flags & EROFS_MAP_MAPPED)) { + struct iov_iter iter; + + iov_iter_xarray(&iter, READ, &rac->mapping->i_pages, + offset, count); + iov_iter_zero(count, &iter); + + erofs_fscache_unlock_folios(rac, count); + ret =3D count; + continue; + } + + /* Read-ahead Inline */ + if (map.m_flags & EROFS_MAP_META) { + struct folio *folio =3D readahead_folio(rac); + + ret =3D erofs_fscache_readpage_inline(folio, &map); + if (!ret) { + folio_mark_uptodate(folio); + ret =3D folio_size(folio); + } + + folio_unlock(folio); + continue; + } + + /* Read-ahead No-inline */ + mdev =3D (struct erofs_map_dev) { + .m_deviceid =3D map.m_deviceid, + .m_pa =3D map.m_pa, + }; + ret =3D erofs_map_dev(sb, &mdev); + if (ret) + return; + + ret =3D erofs_fscache_read_folios(mdev.m_fscache->cookie, + rac->mapping, offset, count, + mdev.m_pa + (pos - map.m_la)); + if (!ret) { + erofs_fscache_unlock_folios(rac, count); + ret =3D count; + } + } while (ret > 0 && ((done +=3D ret) < len)); +} + static const struct address_space_operations erofs_fscache_meta_aops =3D { .readpage =3D erofs_fscache_meta_readpage, }; =20 const struct address_space_operations erofs_fscache_access_aops =3D { .readpage =3D erofs_fscache_readpage, + .readahead =3D erofs_fscache_readahead, }; =20 /* diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 8c7181cd37e6..a5e4de60a0d8 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -621,6 +621,10 @@ static int erofs_fc_fill_super(struct super_block *sb,= struct fs_context *fc) sbi->opt.fsid, true); if (err) return err; + + err =3D super_setup_bdi(sb); + if (err) + return err; } =20 err =3D erofs_read_superblock(sb); --=20 2.27.0 From nobody Fri Jun 19 08:29:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id AD626C433EF for ; Wed, 6 Apr 2022 12:14:50 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232555AbiDFMQ3 (ORCPT ); Wed, 6 Apr 2022 08:16:29 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51990 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231673AbiDFMOj (ORCPT ); Wed, 6 Apr 2022 08:14:39 -0400 Received: from out30-42.freemail.mail.aliyun.com (out30-42.freemail.mail.aliyun.com [115.124.30.42]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 087881EA5F9; Wed, 6 Apr 2022 00:56:48 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R191e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=e01e04400;MF=jefflexu@linux.alibaba.com;NM=1;PH=DS;RN=18;SR=0;TI=SMTPD_---0V9L1BOu_1649231803; Received: from localhost(mailfrom:jefflexu@linux.alibaba.com fp:SMTPD_---0V9L1BOu_1649231803) by smtp.aliyun-inc.com(127.0.0.1); Wed, 06 Apr 2022 15:56:44 +0800 From: Jeffle Xu To: dhowells@redhat.com, linux-cachefs@redhat.com, xiang@kernel.org, chao@kernel.org, linux-erofs@lists.ozlabs.org Cc: torvalds@linux-foundation.org, gregkh@linuxfoundation.org, willy@infradead.org, linux-fsdevel@vger.kernel.org, joseph.qi@linux.alibaba.com, bo.liu@linux.alibaba.com, tao.peng@linux.alibaba.com, gerry@linux.alibaba.com, eguan@linux.alibaba.com, linux-kernel@vger.kernel.org, luodaowen.backend@bytedance.com, tianzichen@kuaishou.com, fannaihao@baidu.com Subject: [PATCH v8 20/20] erofs: add 'fsid' mount option Date: Wed, 6 Apr 2022 15:56:12 +0800 Message-Id: <20220406075612.60298-21-jefflexu@linux.alibaba.com> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20220406075612.60298-1-jefflexu@linux.alibaba.com> References: <20220406075612.60298-1-jefflexu@linux.alibaba.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Introduce 'fsid' mount option to enable on-demand read sementics, in which case, erofs will be mounted from data blobs. Users could specify the name of primary data blob by this mount option. Signed-off-by: Jeffle Xu Tested-by: Zichen Tian --- fs/erofs/super.c | 48 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index a5e4de60a0d8..292b4a70ce19 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -398,6 +398,7 @@ enum { Opt_dax, Opt_dax_enum, Opt_device, + Opt_fsid, Opt_err }; =20 @@ -422,6 +423,7 @@ static const struct fs_parameter_spec erofs_fs_paramete= rs[] =3D { fsparam_flag("dax", Opt_dax), fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums), fsparam_string("device", Opt_device), + fsparam_string("fsid", Opt_fsid), {} }; =20 @@ -517,6 +519,16 @@ static int erofs_fc_parse_param(struct fs_context *fc, } ++ctx->devs->extra_devices; break; + case Opt_fsid: +#ifdef CONFIG_EROFS_FS_ONDEMAND + kfree(ctx->opt.fsid); + ctx->opt.fsid =3D kstrdup(param->string, GFP_KERNEL); + if (!ctx->opt.fsid) + return -ENOMEM; +#else + errorfc(fc, "fsid option not supported"); +#endif + break; default: return -ENOPARAM; } @@ -597,9 +609,14 @@ static int erofs_fc_fill_super(struct super_block *sb,= struct fs_context *fc) sb->s_maxbytes =3D MAX_LFS_FILESIZE; sb->s_op =3D &erofs_sops; =20 - if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) { - erofs_err(sb, "failed to set erofs blksize"); - return -EINVAL; + if (erofs_is_fscache_mode(sb)) { + sb->s_blocksize =3D EROFS_BLKSIZ; + sb->s_blocksize_bits =3D LOG_BLOCK_SIZE; + } else { + if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) { + erofs_err(sb, "failed to set erofs blksize"); + return -EINVAL; + } } =20 sbi =3D kzalloc(sizeof(*sbi), GFP_KERNEL); @@ -608,7 +625,7 @@ static int erofs_fc_fill_super(struct super_block *sb, = struct fs_context *fc) =20 sb->s_fs_info =3D sbi; sbi->opt =3D ctx->opt; - sbi->dax_dev =3D fs_dax_get_by_bdev(sb->s_bdev, &sbi->dax_part_off); + ctx->opt.fsid =3D NULL; sbi->devs =3D ctx->devs; ctx->devs =3D NULL; =20 @@ -625,6 +642,8 @@ static int erofs_fc_fill_super(struct super_block *sb, = struct fs_context *fc) err =3D super_setup_bdi(sb); if (err) return err; + } else { + sbi->dax_dev =3D fs_dax_get_by_bdev(sb->s_bdev, &sbi->dax_part_off); } =20 err =3D erofs_read_superblock(sb); @@ -684,6 +703,11 @@ static int erofs_fc_fill_super(struct super_block *sb,= struct fs_context *fc) =20 static int erofs_fc_get_tree(struct fs_context *fc) { + struct erofs_fs_context *ctx =3D fc->fs_private; + + if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->opt.fsid) + return get_tree_nodev(fc, erofs_fc_fill_super); + return get_tree_bdev(fc, erofs_fc_fill_super); } =20 @@ -733,6 +757,7 @@ static void erofs_fc_free(struct fs_context *fc) struct erofs_fs_context *ctx =3D fc->fs_private; =20 erofs_free_dev_context(ctx->devs); + kfree(ctx->opt.fsid); kfree(ctx); } =20 @@ -773,7 +798,10 @@ static void erofs_kill_sb(struct super_block *sb) =20 WARN_ON(sb->s_magic !=3D EROFS_SUPER_MAGIC); =20 - kill_block_super(sb); + if (erofs_is_fscache_mode(sb)) + generic_shutdown_super(sb); + else + kill_block_super(sb); =20 sbi =3D EROFS_SB(sb); if (!sbi) @@ -783,6 +811,7 @@ static void erofs_kill_sb(struct super_block *sb) fs_put_dax(sbi->dax_dev); erofs_fscache_unregister_cookie(&sbi->s_fscache); erofs_fscache_unregister_fs(sb); + kfree(sbi->opt.fsid); kfree(sbi); sb->s_fs_info =3D NULL; } @@ -884,7 +913,10 @@ static int erofs_statfs(struct dentry *dentry, struct = kstatfs *buf) { struct super_block *sb =3D dentry->d_sb; struct erofs_sb_info *sbi =3D EROFS_SB(sb); - u64 id =3D huge_encode_dev(sb->s_bdev->bd_dev); + u64 id =3D 0; + + if (!erofs_is_fscache_mode(sb)) + id =3D huge_encode_dev(sb->s_bdev->bd_dev); =20 buf->f_type =3D sb->s_magic; buf->f_bsize =3D EROFS_BLKSIZ; @@ -929,6 +961,10 @@ static int erofs_show_options(struct seq_file *seq, st= ruct dentry *root) seq_puts(seq, ",dax=3Dalways"); if (test_opt(opt, DAX_NEVER)) seq_puts(seq, ",dax=3Dnever"); +#ifdef CONFIG_EROFS_FS_ONDEMAND + if (opt->fsid) + seq_printf(seq, ",fsid=3D%s", opt->fsid); +#endif return 0; } =20 --=20 2.27.0