fs/f2fs/data.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-)
For consecutive large hole mapping across {d,id,did}nodes , we don't
need to call f2fs_map_blocks() to check one hole block per one time,
instead, we can use map.m_next_pgofs as a hint of next potential valid
block, so that we can skip calling f2fs_map_blocks the range of
[cur_pgofs + 1, .m_next_pgofs).
1) regular case
touch /mnt/f2fs/file
truncate -s $((1024*1024*1024)) /mnt/f2fs/file
time dd if=/mnt/f2fs/file of=/dev/null bs=1M count=1024
Before:
real 0m0.706s
user 0m0.000s
sys 0m0.706s
After:
real 0m0.620s
user 0m0.008s
sys 0m0.611s
2) large folio case
touch /mnt/f2fs/file
truncate -s $((1024*1024*1024)) /mnt/f2fs/file
f2fs_io setflags immutable /mnt/f2fs/file
sync
echo 3 > /proc/sys/vm/drop_caches
time dd if=/mnt/f2fs/file of=/dev/null bs=1M count=1024
Before:
real 0m0.438s
user 0m0.004s
sys 0m0.433s
After:
real 0m0.368s
user 0m0.004s
sys 0m0.364s
Signed-off-by: Chao Yu <chao@kernel.org>
---
fs/f2fs/data.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a2c4769d0ae1..5b0642cd27ff 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2176,10 +2176,13 @@ static int f2fs_read_single_page(struct inode *inode, struct folio *folio,
/*
* Map blocks using the previous result first.
*/
- if ((map->m_flags & F2FS_MAP_MAPPED) &&
- block_in_file > map->m_lblk &&
+ if (map->m_flags & F2FS_MAP_MAPPED) {
+ if (block_in_file > map->m_lblk &&
block_in_file < (map->m_lblk + map->m_len))
+ goto got_it;
+ } else if (block_in_file < *map->m_next_pgofs) {
goto got_it;
+ }
/*
* Then do more f2fs_map_blocks() calls until we are
@@ -2454,7 +2457,7 @@ static int f2fs_read_data_large_folio(struct inode *inode,
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
struct f2fs_map_blocks map = {0, };
- pgoff_t index, offset;
+ pgoff_t index, offset, next_pgofs = 0;
unsigned max_nr_pages = rac ? readahead_count(rac) :
folio_nr_pages(folio);
unsigned nrpages;
@@ -2487,16 +2490,21 @@ static int f2fs_read_data_large_folio(struct inode *inode,
/*
* Map blocks using the previous result first.
*/
- if ((map.m_flags & F2FS_MAP_MAPPED) &&
- index > map.m_lblk &&
+ if (map.m_flags & F2FS_MAP_MAPPED) {
+ if (index > map.m_lblk &&
index < (map.m_lblk + map.m_len))
+ goto got_it;
+ } else if (index < next_pgofs) {
+ /* hole case */
goto got_it;
+ }
/*
* Then do more f2fs_map_blocks() calls until we are
* done with this page.
*/
memset(&map, 0, sizeof(map));
+ map.m_next_pgofs = &next_pgofs;
map.m_seg_type = NO_CHECK_TYPE;
map.m_lblk = index;
map.m_len = max_nr_pages;
@@ -2617,6 +2625,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
pgoff_t nc_cluster_idx = NULL_CLUSTER;
pgoff_t index;
#endif
+ pgoff_t next_pgofs = 0;
unsigned nr_pages = rac ? readahead_count(rac) : 1;
struct address_space *mapping = rac ? rac->mapping : folio->mapping;
unsigned max_nr_pages = nr_pages;
@@ -2637,7 +2646,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
map.m_lblk = 0;
map.m_len = 0;
map.m_flags = 0;
- map.m_next_pgofs = NULL;
+ map.m_next_pgofs = &next_pgofs;
map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
map.m_may_create = false;
--
2.40.1
Hi Chao:
On 2026/1/12 09:33, Chao Yu via Linux-f2fs-devel wrote:
> For consecutive large hole mapping across {d,id,did}nodes , we don't
> need to call f2fs_map_blocks() to check one hole block per one time,
> instead, we can use map.m_next_pgofs as a hint of next potential valid
> block, so that we can skip calling f2fs_map_blocks the range of
> [cur_pgofs + 1, .m_next_pgofs).
>
> 1) regular case
>
> touch /mnt/f2fs/file
> truncate -s $((1024*1024*1024)) /mnt/f2fs/file
> time dd if=/mnt/f2fs/file of=/dev/null bs=1M count=1024
>
> Before:
> real 0m0.706s
> user 0m0.000s
> sys 0m0.706s
>
> After:
> real 0m0.620s
> user 0m0.008s
> sys 0m0.611s
>
> 2) large folio case
>
> touch /mnt/f2fs/file
> truncate -s $((1024*1024*1024)) /mnt/f2fs/file
> f2fs_io setflags immutable /mnt/f2fs/file
> sync
> echo 3 > /proc/sys/vm/drop_caches
> time dd if=/mnt/f2fs/file of=/dev/null bs=1M count=1024
>
> Before:
> real 0m0.438s
> user 0m0.004s
> sys 0m0.433s
>
> After:
> real 0m0.368s
> user 0m0.004s
> sys 0m0.364s
>
> Signed-off-by: Chao Yu <chao@kernel.org>
> ---
> fs/f2fs/data.c | 21 +++++++++++++++------
> 1 file changed, 15 insertions(+), 6 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index a2c4769d0ae1..5b0642cd27ff 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2176,10 +2176,13 @@ static int f2fs_read_single_page(struct inode *inode, struct folio *folio,
> /*
> * Map blocks using the previous result first.
> */
> - if ((map->m_flags & F2FS_MAP_MAPPED) &&
> - block_in_file > map->m_lblk &&
> + if (map->m_flags & F2FS_MAP_MAPPED) {
> + if (block_in_file > map->m_lblk &&
> block_in_file < (map->m_lblk + map->m_len))
> + goto got_it;
> + } else if (block_in_file < *map->m_next_pgofs) {
> goto got_it;
> + }
>
> /*
> * Then do more f2fs_map_blocks() calls until we are
> @@ -2454,7 +2457,7 @@ static int f2fs_read_data_large_folio(struct inode *inode,
> struct bio *bio = NULL;
> sector_t last_block_in_bio = 0;
> struct f2fs_map_blocks map = {0, };
> - pgoff_t index, offset;
> + pgoff_t index, offset, next_pgofs = 0;
> unsigned max_nr_pages = rac ? readahead_count(rac) :
> folio_nr_pages(folio);
> unsigned nrpages;
> @@ -2487,16 +2490,21 @@ static int f2fs_read_data_large_folio(struct inode *inode,
> /*
> * Map blocks using the previous result first.
> */
> - if ((map.m_flags & F2FS_MAP_MAPPED) &&
> - index > map.m_lblk &&
> + if (map.m_flags & F2FS_MAP_MAPPED) {
> + if (index > map.m_lblk &&
> index < (map.m_lblk + map.m_len))
> + goto got_it;
> + } else if (index < next_pgofs) {
> + /* hole case */
> goto got_it;
> + }
>
> /*
> * Then do more f2fs_map_blocks() calls until we are
> * done with this page.
> */
> memset(&map, 0, sizeof(map));
> + map.m_next_pgofs = &next_pgofs;
> map.m_seg_type = NO_CHECK_TYPE;
> map.m_lblk = index;
> map.m_len = max_nr_pages;
> @@ -2617,6 +2625,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
> pgoff_t nc_cluster_idx = NULL_CLUSTER;
> pgoff_t index;
> #endif
> + pgoff_t next_pgofs = 0;
> unsigned nr_pages = rac ? readahead_count(rac) : 1;
> struct address_space *mapping = rac ? rac->mapping : folio->mapping;
> unsigned max_nr_pages = nr_pages;
> @@ -2637,7 +2646,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
> map.m_lblk = 0;
> map.m_len = 0;
> map.m_flags = 0;
> - map.m_next_pgofs = NULL;
> + map.m_next_pgofs = &next_pgofs;
> map.m_next_extent = NULL;
> map.m_seg_type = NO_CHECK_TYPE;
> map.m_may_create = false;
Do we have plans to also support reducing f2fs_map_blocks() calls for
consectives holes in logical file postion with dnode have already been
allocated in buffered large folio read?
Such as consective NULL_ADDR or NEW_ADDR?
Thanks,
On 1/15/2026 8:48 PM, Nanzhe Zhao wrote:
> Hi Chao:
>
> On 2026/1/12 09:33, Chao Yu via Linux-f2fs-devel wrote:
>> For consecutive large hole mapping across {d,id,did}nodes , we don't
>> need to call f2fs_map_blocks() to check one hole block per one time,
>> instead, we can use map.m_next_pgofs as a hint of next potential valid
>> block, so that we can skip calling f2fs_map_blocks the range of
>> [cur_pgofs + 1, .m_next_pgofs).
>>
>> 1) regular case
>>
>> touch /mnt/f2fs/file
>> truncate -s $((1024*1024*1024)) /mnt/f2fs/file
>> time dd if=/mnt/f2fs/file of=/dev/null bs=1M count=1024
>>
>> Before:
>> real 0m0.706s
>> user 0m0.000s
>> sys 0m0.706s
>>
>> After:
>> real 0m0.620s
>> user 0m0.008s
>> sys 0m0.611s
>>
>> 2) large folio case
>>
>> touch /mnt/f2fs/file
>> truncate -s $((1024*1024*1024)) /mnt/f2fs/file
>> f2fs_io setflags immutable /mnt/f2fs/file
>> sync
>> echo 3 > /proc/sys/vm/drop_caches
>> time dd if=/mnt/f2fs/file of=/dev/null bs=1M count=1024
>>
>> Before:
>> real 0m0.438s
>> user 0m0.004s
>> sys 0m0.433s
>>
>> After:
>> real 0m0.368s
>> user 0m0.004s
>> sys 0m0.364s
>>
>> Signed-off-by: Chao Yu <chao@kernel.org>
>> ---
>> fs/f2fs/data.c | 21 +++++++++++++++------
>> 1 file changed, 15 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> index a2c4769d0ae1..5b0642cd27ff 100644
>> --- a/fs/f2fs/data.c
>> +++ b/fs/f2fs/data.c
>> @@ -2176,10 +2176,13 @@ static int f2fs_read_single_page(struct inode *inode, struct folio *folio,
>> /*
>> * Map blocks using the previous result first.
>> */
>> - if ((map->m_flags & F2FS_MAP_MAPPED) &&
>> - block_in_file > map->m_lblk &&
>> + if (map->m_flags & F2FS_MAP_MAPPED) {
>> + if (block_in_file > map->m_lblk &&
>> block_in_file < (map->m_lblk + map->m_len))
>> + goto got_it;
>> + } else if (block_in_file < *map->m_next_pgofs) {
>> goto got_it;
>> + }
>>
>> /*
>> * Then do more f2fs_map_blocks() calls until we are
>> @@ -2454,7 +2457,7 @@ static int f2fs_read_data_large_folio(struct inode *inode,
>> struct bio *bio = NULL;
>> sector_t last_block_in_bio = 0;
>> struct f2fs_map_blocks map = {0, };
>> - pgoff_t index, offset;
>> + pgoff_t index, offset, next_pgofs = 0;
>> unsigned max_nr_pages = rac ? readahead_count(rac) :
>> folio_nr_pages(folio);
>> unsigned nrpages;
>> @@ -2487,16 +2490,21 @@ static int f2fs_read_data_large_folio(struct inode *inode,
>> /*
>> * Map blocks using the previous result first.
>> */
>> - if ((map.m_flags & F2FS_MAP_MAPPED) &&
>> - index > map.m_lblk &&
>> + if (map.m_flags & F2FS_MAP_MAPPED) {
>> + if (index > map.m_lblk &&
>> index < (map.m_lblk + map.m_len))
>> + goto got_it;
>> + } else if (index < next_pgofs) {
>> + /* hole case */
>> goto got_it;
>> + }
>>
>> /*
>> * Then do more f2fs_map_blocks() calls until we are
>> * done with this page.
>> */
>> memset(&map, 0, sizeof(map));
>> + map.m_next_pgofs = &next_pgofs;
>> map.m_seg_type = NO_CHECK_TYPE;
>> map.m_lblk = index;
>> map.m_len = max_nr_pages;
>> @@ -2617,6 +2625,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
>> pgoff_t nc_cluster_idx = NULL_CLUSTER;
>> pgoff_t index;
>> #endif
>> + pgoff_t next_pgofs = 0;
>> unsigned nr_pages = rac ? readahead_count(rac) : 1;
>> struct address_space *mapping = rac ? rac->mapping : folio->mapping;
>> unsigned max_nr_pages = nr_pages;
>> @@ -2637,7 +2646,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
>> map.m_lblk = 0;
>> map.m_len = 0;
>> map.m_flags = 0;
>> - map.m_next_pgofs = NULL;
>> + map.m_next_pgofs = &next_pgofs;
>> map.m_next_extent = NULL;
>> map.m_seg_type = NO_CHECK_TYPE;
>> map.m_may_create = false;
>
> Do we have plans to also support reducing f2fs_map_blocks() calls for
> consectives holes in logical file postion with dnode have already been
> allocated in buffered large folio read?
> Such as consective NULL_ADDR or NEW_ADDR?
Nanzhe,
We have supported that for large folio read w/ this patch?
Thanks,
>
> Thanks,
>
Hi Chao: At 2026-01-16 16:52:02, "Chao Yu" <chao@kernel.org> wrote: >> >> Do we have plans to also support reducing f2fs_map_blocks() calls for >> consectives holes in logical file postion with dnode have already been >> allocated in buffered large folio read? >> Such as consective NULL_ADDR or NEW_ADDR? > >Nanzhe, > >We have supported that for large folio read w/ this patch? > >Thanks, > Sorry, I'm a bit confused. In the condition of F2FS_MAP_BLOCK_DEFAULT, the default: case will only set map->m_next_pgofs to pgofs + 1 then sync out. When we enter next iteration and the index advanced, currrent index now turns to pgofs + 1 and index < next_pgofs become false.In consequence, we won't reduce f2fs_map_blocks() calls for hole with dnode allocated. Also, for NEW_ADDR, the default: case will directly go to sync out and bypass map_is_mergeable, so it will also not reduce f2fs_map_blocks calls. Or am I missing something? Thanks, Nanzhe Zhao
On 1/19/2026 3:43 PM, Nanzhe Zhao wrote: > Hi Chao: > At 2026-01-16 16:52:02, "Chao Yu" <chao@kernel.org> wrote: >>> >>> Do we have plans to also support reducing f2fs_map_blocks() calls for >>> consectives holes in logical file postion with dnode have already been >>> allocated in buffered large folio read? >>> Such as consective NULL_ADDR or NEW_ADDR? >> >> Nanzhe, >> >> We have supported that for large folio read w/ this patch? >> >> Thanks, >> > > Sorry, I'm a bit confused. > In the condition of F2FS_MAP_BLOCK_DEFAULT, the default: case will only > set map->m_next_pgofs to pgofs + 1 then sync out. When we enter > next iteration and the index advanced, currrent index now turns to pgofs + 1 > and index < next_pgofs become false.In consequence, we won't reduce > f2fs_map_blocks() calls for hole with dnode allocated. > > Also, for NEW_ADDR, the default: case will directly go to sync out and bypass > map_is_mergeable, so it will also not reduce f2fs_map_blocks calls. > > Or am I missing something? I guess f2fs_map_no_dnode() will update map->m_next_pgofs to pgofs of next potential valid dnode. Thanks, > > Thanks, > Nanzhe Zhao
Hi Chao: At 2026-01-19 21:44:48, "Chao Yu" <chao@kernel.org> wrote: > >I guess f2fs_map_no_dnode() will update map->m_next_pgofs to pgofs of next >potential valid dnode. > >Thanks, > I guess we were discussing the cases that f2fs_get_dnode_of_data won't return -ENOENT in f2fs_map_blocks but dn.blkaddr is still NULL_ADDR or NEW_ADDR ? I think I might understand the intention behind your repeated emphasis on the f2fs_map_no_dnode case? Are you saying that, on F2FS, the vast majority of sparse files fall into holes where the dnode hasn't been allocated at all, and that within the dnode the blkaddr values NULL_ADDR and NEW_ADDR—especially the latter on the read path —are relatively uncommon? Thanks, Nanzhe Zhao
Nanzhe,
On 1/20/2026 8:24 AM, Nanzhe Zhao wrote:
> Hi Chao:
> At 2026-01-19 21:44:48, "Chao Yu" <chao@kernel.org> wrote:
>>
>> I guess f2fs_map_no_dnode() will update map->m_next_pgofs to pgofs of next
>> potential valid dnode.
>>
>> Thanks,
>>
>
> I guess we were discussing the cases that f2fs_get_dnode_of_data won't return
> -ENOENT in f2fs_map_blocks but dn.blkaddr is still NULL_ADDR or NEW_ADDR ?
I may misunderstand your question at first place, sorry about that.
>
> I think I might understand the intention behind your repeated emphasis on the
> f2fs_map_no_dnode case? Are you saying that, on F2FS, the vast majority of sparse
> files fall into holes where the dnode hasn't been allocated at all, and that within the
> dnode the blkaddr values NULL_ADDR and NEW_ADDR—especially the latter on the read path
> —are relatively uncommon?
Actually, no, we'd better investigate into details of hole data pattern in device
to see whether there are 1) more small holes in dnode (NULL_ADDR/NEW_ADDR case) or
2) there are more large hole in {di,i}dnode, do you have free slot to check that? :)
Thanks,
>
> Thanks,
> Nanzhe Zhao
© 2016 - 2026 Red Hat, Inc.