f2fs: avoid f2fs_map_blocks() for consecutive holes in readpages

[PATCH] f2fs: avoid f2fs_map_blocks() for consecutive holes in readpages

Posted by Chao Yu 3 weeks, 5 days ago

For consecutive large hole mapping across {d,id,did}nodes , we don't
need to call f2fs_map_blocks() to check one hole block per one time,
instead, we can use map.m_next_pgofs as a hint of next potential valid
block, so that we can skip calling f2fs_map_blocks the range of
[cur_pgofs + 1, .m_next_pgofs).

1) regular case

touch /mnt/f2fs/file
truncate -s $((1024*1024*1024)) /mnt/f2fs/file
time dd if=/mnt/f2fs/file of=/dev/null bs=1M count=1024

Before:
real    0m0.706s
user    0m0.000s
sys     0m0.706s

After:
real    0m0.620s
user    0m0.008s
sys     0m0.611s

2) large folio case

touch /mnt/f2fs/file
truncate -s $((1024*1024*1024)) /mnt/f2fs/file
f2fs_io setflags immutable /mnt/f2fs/file
sync
echo 3 > /proc/sys/vm/drop_caches
time dd if=/mnt/f2fs/file of=/dev/null bs=1M count=1024

Before:
real    0m0.438s
user    0m0.004s
sys     0m0.433s

After:
real    0m0.368s
user    0m0.004s
sys     0m0.364s

Signed-off-by: Chao Yu <chao@kernel.org>
---
 fs/f2fs/data.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a2c4769d0ae1..5b0642cd27ff 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2176,10 +2176,13 @@ static int f2fs_read_single_page(struct inode *inode, struct folio *folio,
 	/*
 	 * Map blocks using the previous result first.
 	 */
-	if ((map->m_flags & F2FS_MAP_MAPPED) &&
-			block_in_file > map->m_lblk &&
+	if (map->m_flags & F2FS_MAP_MAPPED) {
+		if (block_in_file > map->m_lblk &&
 			block_in_file < (map->m_lblk + map->m_len))
+			goto got_it;
+	} else if (block_in_file < *map->m_next_pgofs) {
 		goto got_it;
+	}
 
 	/*
 	 * Then do more f2fs_map_blocks() calls until we are
@@ -2454,7 +2457,7 @@ static int f2fs_read_data_large_folio(struct inode *inode,
 	struct bio *bio = NULL;
 	sector_t last_block_in_bio = 0;
 	struct f2fs_map_blocks map = {0, };
-	pgoff_t index, offset;
+	pgoff_t index, offset, next_pgofs = 0;
 	unsigned max_nr_pages = rac ? readahead_count(rac) :
 				folio_nr_pages(folio);
 	unsigned nrpages;
@@ -2487,16 +2490,21 @@ static int f2fs_read_data_large_folio(struct inode *inode,
 		/*
 		 * Map blocks using the previous result first.
 		 */
-		if ((map.m_flags & F2FS_MAP_MAPPED) &&
-				index > map.m_lblk &&
+		if (map.m_flags & F2FS_MAP_MAPPED) {
+			if (index > map.m_lblk &&
 				index < (map.m_lblk + map.m_len))
+				goto got_it;
+		} else if (index < next_pgofs) {
+			/* hole case */
 			goto got_it;
+		}
 
 		/*
 		 * Then do more f2fs_map_blocks() calls until we are
 		 * done with this page.
 		 */
 		memset(&map, 0, sizeof(map));
+		map.m_next_pgofs = &next_pgofs;
 		map.m_seg_type = NO_CHECK_TYPE;
 		map.m_lblk = index;
 		map.m_len = max_nr_pages;
@@ -2617,6 +2625,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
 	pgoff_t nc_cluster_idx = NULL_CLUSTER;
 	pgoff_t index;
 #endif
+	pgoff_t next_pgofs = 0;
 	unsigned nr_pages = rac ? readahead_count(rac) : 1;
 	struct address_space *mapping = rac ? rac->mapping : folio->mapping;
 	unsigned max_nr_pages = nr_pages;
@@ -2637,7 +2646,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
 	map.m_lblk = 0;
 	map.m_len = 0;
 	map.m_flags = 0;
-	map.m_next_pgofs = NULL;
+	map.m_next_pgofs = &next_pgofs;
 	map.m_next_extent = NULL;
 	map.m_seg_type = NO_CHECK_TYPE;
 	map.m_may_create = false;
-- 
2.40.1

Re: [f2fs-dev] [PATCH] f2fs: avoid f2fs_map_blocks() for consecutive holes in readpages

Posted by Nanzhe Zhao 3 weeks, 2 days ago

Hi Chao:

On 2026/1/12 09:33, Chao Yu via Linux-f2fs-devel wrote:
> For consecutive large hole mapping across {d,id,did}nodes , we don't
> need to call f2fs_map_blocks() to check one hole block per one time,
> instead, we can use map.m_next_pgofs as a hint of next potential valid
> block, so that we can skip calling f2fs_map_blocks the range of
> [cur_pgofs + 1, .m_next_pgofs).
> 
> 1) regular case
> 
> touch /mnt/f2fs/file
> truncate -s $((1024*1024*1024)) /mnt/f2fs/file
> time dd if=/mnt/f2fs/file of=/dev/null bs=1M count=1024
> 
> Before:
> real    0m0.706s
> user    0m0.000s
> sys     0m0.706s
> 
> After:
> real    0m0.620s
> user    0m0.008s
> sys     0m0.611s
> 
> 2) large folio case
> 
> touch /mnt/f2fs/file
> truncate -s $((1024*1024*1024)) /mnt/f2fs/file
> f2fs_io setflags immutable /mnt/f2fs/file
> sync
> echo 3 > /proc/sys/vm/drop_caches
> time dd if=/mnt/f2fs/file of=/dev/null bs=1M count=1024
> 
> Before:
> real    0m0.438s
> user    0m0.004s
> sys     0m0.433s
> 
> After:
> real    0m0.368s
> user    0m0.004s
> sys     0m0.364s
> 
> Signed-off-by: Chao Yu <chao@kernel.org>
> ---
>   fs/f2fs/data.c | 21 +++++++++++++++------
>   1 file changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index a2c4769d0ae1..5b0642cd27ff 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2176,10 +2176,13 @@ static int f2fs_read_single_page(struct inode *inode, struct folio *folio,
>   	/*
>   	 * Map blocks using the previous result first.
>   	 */
> -	if ((map->m_flags & F2FS_MAP_MAPPED) &&
> -			block_in_file > map->m_lblk &&
> +	if (map->m_flags & F2FS_MAP_MAPPED) {
> +		if (block_in_file > map->m_lblk &&
>   			block_in_file < (map->m_lblk + map->m_len))
> +			goto got_it;
> +	} else if (block_in_file < *map->m_next_pgofs) {
>   		goto got_it;
> +	}
>   
>   	/*
>   	 * Then do more f2fs_map_blocks() calls until we are
> @@ -2454,7 +2457,7 @@ static int f2fs_read_data_large_folio(struct inode *inode,
>   	struct bio *bio = NULL;
>   	sector_t last_block_in_bio = 0;
>   	struct f2fs_map_blocks map = {0, };
> -	pgoff_t index, offset;
> +	pgoff_t index, offset, next_pgofs = 0;
>   	unsigned max_nr_pages = rac ? readahead_count(rac) :
>   				folio_nr_pages(folio);
>   	unsigned nrpages;
> @@ -2487,16 +2490,21 @@ static int f2fs_read_data_large_folio(struct inode *inode,
>   		/*
>   		 * Map blocks using the previous result first.
>   		 */
> -		if ((map.m_flags & F2FS_MAP_MAPPED) &&
> -				index > map.m_lblk &&
> +		if (map.m_flags & F2FS_MAP_MAPPED) {
> +			if (index > map.m_lblk &&
>   				index < (map.m_lblk + map.m_len))
> +				goto got_it;
> +		} else if (index < next_pgofs) {
> +			/* hole case */
>   			goto got_it;
> +		}
>   
>   		/*
>   		 * Then do more f2fs_map_blocks() calls until we are
>   		 * done with this page.
>   		 */
>   		memset(&map, 0, sizeof(map));
> +		map.m_next_pgofs = &next_pgofs;
>   		map.m_seg_type = NO_CHECK_TYPE;
>   		map.m_lblk = index;
>   		map.m_len = max_nr_pages;
> @@ -2617,6 +2625,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
>   	pgoff_t nc_cluster_idx = NULL_CLUSTER;
>   	pgoff_t index;
>   #endif
> +	pgoff_t next_pgofs = 0;
>   	unsigned nr_pages = rac ? readahead_count(rac) : 1;
>   	struct address_space *mapping = rac ? rac->mapping : folio->mapping;
>   	unsigned max_nr_pages = nr_pages;
> @@ -2637,7 +2646,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
>   	map.m_lblk = 0;
>   	map.m_len = 0;
>   	map.m_flags = 0;
> -	map.m_next_pgofs = NULL;
> +	map.m_next_pgofs = &next_pgofs;
>   	map.m_next_extent = NULL;
>   	map.m_seg_type = NO_CHECK_TYPE;
>   	map.m_may_create = false;

Do we have plans to also support reducing f2fs_map_blocks() calls for 
consectives holes in logical file postion with dnode have already been
allocated in buffered large folio read?
Such as consective NULL_ADDR or NEW_ADDR?

Thanks,

Re: [f2fs-dev] [PATCH] f2fs: avoid f2fs_map_blocks() for consecutive holes in readpages

Posted by Chao Yu 3 weeks, 1 day ago

On 1/15/2026 8:48 PM, Nanzhe Zhao wrote:
> Hi Chao:
> 
> On 2026/1/12 09:33, Chao Yu via Linux-f2fs-devel wrote:
>> For consecutive large hole mapping across {d,id,did}nodes , we don't
>> need to call f2fs_map_blocks() to check one hole block per one time,
>> instead, we can use map.m_next_pgofs as a hint of next potential valid
>> block, so that we can skip calling f2fs_map_blocks the range of
>> [cur_pgofs + 1, .m_next_pgofs).
>>
>> 1) regular case
>>
>> touch /mnt/f2fs/file
>> truncate -s $((1024*1024*1024)) /mnt/f2fs/file
>> time dd if=/mnt/f2fs/file of=/dev/null bs=1M count=1024
>>
>> Before:
>> real    0m0.706s
>> user    0m0.000s
>> sys     0m0.706s
>>
>> After:
>> real    0m0.620s
>> user    0m0.008s
>> sys     0m0.611s
>>
>> 2) large folio case
>>
>> touch /mnt/f2fs/file
>> truncate -s $((1024*1024*1024)) /mnt/f2fs/file
>> f2fs_io setflags immutable /mnt/f2fs/file
>> sync
>> echo 3 > /proc/sys/vm/drop_caches
>> time dd if=/mnt/f2fs/file of=/dev/null bs=1M count=1024
>>
>> Before:
>> real    0m0.438s
>> user    0m0.004s
>> sys     0m0.433s
>>
>> After:
>> real    0m0.368s
>> user    0m0.004s
>> sys     0m0.364s
>>
>> Signed-off-by: Chao Yu <chao@kernel.org>
>> ---
>>    fs/f2fs/data.c | 21 +++++++++++++++------
>>    1 file changed, 15 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> index a2c4769d0ae1..5b0642cd27ff 100644
>> --- a/fs/f2fs/data.c
>> +++ b/fs/f2fs/data.c
>> @@ -2176,10 +2176,13 @@ static int f2fs_read_single_page(struct inode *inode, struct folio *folio,
>>    	/*
>>    	 * Map blocks using the previous result first.
>>    	 */
>> -	if ((map->m_flags & F2FS_MAP_MAPPED) &&
>> -			block_in_file > map->m_lblk &&
>> +	if (map->m_flags & F2FS_MAP_MAPPED) {
>> +		if (block_in_file > map->m_lblk &&
>>    			block_in_file < (map->m_lblk + map->m_len))
>> +			goto got_it;
>> +	} else if (block_in_file < *map->m_next_pgofs) {
>>    		goto got_it;
>> +	}
>>    
>>    	/*
>>    	 * Then do more f2fs_map_blocks() calls until we are
>> @@ -2454,7 +2457,7 @@ static int f2fs_read_data_large_folio(struct inode *inode,
>>    	struct bio *bio = NULL;
>>    	sector_t last_block_in_bio = 0;
>>    	struct f2fs_map_blocks map = {0, };
>> -	pgoff_t index, offset;
>> +	pgoff_t index, offset, next_pgofs = 0;
>>    	unsigned max_nr_pages = rac ? readahead_count(rac) :
>>    				folio_nr_pages(folio);
>>    	unsigned nrpages;
>> @@ -2487,16 +2490,21 @@ static int f2fs_read_data_large_folio(struct inode *inode,
>>    		/*
>>    		 * Map blocks using the previous result first.
>>    		 */
>> -		if ((map.m_flags & F2FS_MAP_MAPPED) &&
>> -				index > map.m_lblk &&
>> +		if (map.m_flags & F2FS_MAP_MAPPED) {
>> +			if (index > map.m_lblk &&
>>    				index < (map.m_lblk + map.m_len))
>> +				goto got_it;
>> +		} else if (index < next_pgofs) {
>> +			/* hole case */
>>    			goto got_it;
>> +		}
>>    
>>    		/*
>>    		 * Then do more f2fs_map_blocks() calls until we are
>>    		 * done with this page.
>>    		 */
>>    		memset(&map, 0, sizeof(map));
>> +		map.m_next_pgofs = &next_pgofs;
>>    		map.m_seg_type = NO_CHECK_TYPE;
>>    		map.m_lblk = index;
>>    		map.m_len = max_nr_pages;
>> @@ -2617,6 +2625,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
>>    	pgoff_t nc_cluster_idx = NULL_CLUSTER;
>>    	pgoff_t index;
>>    #endif
>> +	pgoff_t next_pgofs = 0;
>>    	unsigned nr_pages = rac ? readahead_count(rac) : 1;
>>    	struct address_space *mapping = rac ? rac->mapping : folio->mapping;
>>    	unsigned max_nr_pages = nr_pages;
>> @@ -2637,7 +2646,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
>>    	map.m_lblk = 0;
>>    	map.m_len = 0;
>>    	map.m_flags = 0;
>> -	map.m_next_pgofs = NULL;
>> +	map.m_next_pgofs = &next_pgofs;
>>    	map.m_next_extent = NULL;
>>    	map.m_seg_type = NO_CHECK_TYPE;
>>    	map.m_may_create = false;
> 
> Do we have plans to also support reducing f2fs_map_blocks() calls for
> consectives holes in logical file postion with dnode have already been
> allocated in buffered large folio read?
> Such as consective NULL_ADDR or NEW_ADDR?

Nanzhe,

We have supported that for large folio read w/ this patch?

Thanks,

> 
> Thanks,
>

Re:Re: [f2fs-dev] [PATCH] f2fs: avoid f2fs_map_blocks() for consecutive holes in readpages

Posted by Nanzhe Zhao 2 weeks, 5 days ago

Hi Chao:
At 2026-01-16 16:52:02, "Chao Yu" <chao@kernel.org> wrote:
>> 
>> Do we have plans to also support reducing f2fs_map_blocks() calls for
>> consectives holes in logical file postion with dnode have already been
>> allocated in buffered large folio read?
>> Such as consective NULL_ADDR or NEW_ADDR?
>
>Nanzhe,
>
>We have supported that for large folio read w/ this patch?
>
>Thanks,
>

Sorry, I'm a bit confused.
In the condition of F2FS_MAP_BLOCK_DEFAULT, the default: case will only
set map->m_next_pgofs to pgofs + 1 then sync out. When we enter
next iteration and the index advanced, currrent index now turns to pgofs + 1
and index < next_pgofs become false.In consequence, we won't reduce
f2fs_map_blocks() calls for hole with dnode allocated.

Also, for NEW_ADDR, the default: case will directly go to sync out and bypass
map_is_mergeable, so it will also not reduce f2fs_map_blocks calls.

Or am I missing something?

Thanks,
Nanzhe Zhao

Re: [f2fs-dev] [PATCH] f2fs: avoid f2fs_map_blocks() for consecutive holes in readpages

Posted by Chao Yu 2 weeks, 5 days ago

On 1/19/2026 3:43 PM, Nanzhe Zhao wrote:
> Hi Chao:
> At 2026-01-16 16:52:02, "Chao Yu" <chao@kernel.org> wrote:
>>>
>>> Do we have plans to also support reducing f2fs_map_blocks() calls for
>>> consectives holes in logical file postion with dnode have already been
>>> allocated in buffered large folio read?
>>> Such as consective NULL_ADDR or NEW_ADDR?
>>
>> Nanzhe,
>>
>> We have supported that for large folio read w/ this patch?
>>
>> Thanks,
>>
> 
> Sorry, I'm a bit confused.
> In the condition of F2FS_MAP_BLOCK_DEFAULT, the default: case will only
> set map->m_next_pgofs to pgofs + 1 then sync out. When we enter
> next iteration and the index advanced, currrent index now turns to pgofs + 1
> and index < next_pgofs become false.In consequence, we won't reduce
> f2fs_map_blocks() calls for hole with dnode allocated.
> 
> Also, for NEW_ADDR, the default: case will directly go to sync out and bypass
> map_is_mergeable, so it will also not reduce f2fs_map_blocks calls.
> 
> Or am I missing something?

I guess f2fs_map_no_dnode() will update map->m_next_pgofs to pgofs of next
potential valid dnode.

Thanks,

> 
> Thanks,
> Nanzhe Zhao

Re:Re: [f2fs-dev] [PATCH] f2fs: avoid f2fs_map_blocks() for consecutive holes in readpages

Posted by Nanzhe Zhao 2 weeks, 4 days ago

Hi Chao:
At 2026-01-19 21:44:48, "Chao Yu" <chao@kernel.org> wrote:
>
>I guess f2fs_map_no_dnode() will update map->m_next_pgofs to pgofs of next
>potential valid dnode.
>
>Thanks,
>

I guess we were discussing the cases that f2fs_get_dnode_of_data won't return
-ENOENT in f2fs_map_blocks but dn.blkaddr is still NULL_ADDR or NEW_ADDR ?

I think I might understand the intention behind your repeated emphasis on the 
f2fs_map_no_dnode case?  Are you saying that, on F2FS, the vast majority of sparse 
files fall into holes where the dnode hasn't been allocated at all, and that within the 
dnode the blkaddr values NULL_ADDR and NEW_ADDR—especially the latter on the read path
—are relatively uncommon?

Thanks,
Nanzhe Zhao

Re: [f2fs-dev] [PATCH] f2fs: avoid f2fs_map_blocks() for consecutive holes in readpages

Posted by Chao Yu 2 weeks, 4 days ago

Nanzhe,

On 1/20/2026 8:24 AM, Nanzhe Zhao wrote:
> Hi Chao:
> At 2026-01-19 21:44:48, "Chao Yu" <chao@kernel.org> wrote:
>>
>> I guess f2fs_map_no_dnode() will update map->m_next_pgofs to pgofs of next
>> potential valid dnode.
>>
>> Thanks,
>>
> 
> I guess we were discussing the cases that f2fs_get_dnode_of_data won't return
> -ENOENT in f2fs_map_blocks but dn.blkaddr is still NULL_ADDR or NEW_ADDR ?

I may misunderstand your question at first place, sorry about that.

> 
> I think I might understand the intention behind your repeated emphasis on the
> f2fs_map_no_dnode case?  Are you saying that, on F2FS, the vast majority of sparse
> files fall into holes where the dnode hasn't been allocated at all, and that within the
> dnode the blkaddr values NULL_ADDR and NEW_ADDR—especially the latter on the read path
> —are relatively uncommon?

Actually, no, we'd better investigate into details of hole data pattern in device
to see whether there are 1) more small holes in dnode (NULL_ADDR/NEW_ADDR case) or
2) there are more large hole in {di,i}dnode, do you have free slot to check that? :)

Thanks,

> 
> Thanks,
> Nanzhe Zhao