fs/f2fs/data.c | 3 +++ fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 4 +++- 3 files changed, 7 insertions(+), 1 deletion(-)
In f2fs_precache_extents(), For large files, It requires reading many
node blocks. Instead of reading each node block with synchronous I/O,
this patch applies readahead so that node blocks can be fetched in
advance.
It reduces the overhead of repeated sync reads and improves efficiency
when precaching extents of large files.
I created a file with the same largest extent and executed the test.
For this experiment, I set the file's largest extent with an offset of 0
and a size of 1GB. I configured the remaining area with 100MB extents.
5GB test file:
dd if=/dev/urandom of=test1 bs=1m count=5120
cp test1 test2
fsync test1
dd if=test1 of=test2 bs=1m skip=1024 seek=1024 count=100 conv=notrunc
dd if=test1 of=test2 bs=1m skip=1224 seek=1224 count=100 conv=notrunc
...
dd if=test1 of=test2 bs=1m skip=5024 seek=5024 count=100 conv=notrunc
reboot
I also created 10GB and 20GB files with large extents using the same
method.
ioctl(F2FS_IOC_PRECACHE_EXTENTS) test results are as follows:
+-----------+---------+---------+-----------+
| File size | Before | After | Reduction |
+-----------+---------+---------+-----------+
| 5GB | 101.8ms | 37.0ms | 72.1% |
| 10GB | 222.9ms | 56.0ms | 74.9% |
| 20GB | 446.2ms | 116.4ms | 73.9% |
+-----------+---------+---------+-----------+
Tested on a 256GB mobile device with an SM8750 chipset.
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Reviewed-by: Sunmin Jeong <s_min.jeong@samsung.com>
Signed-off-by: Yunji Kang <yunji0.kang@samsung.com>
---
v2:
- Modify the readahead condition check routine for better code
readability.
- Update the title from 'node block' to 'node blocks'.
v3:
- Bug fix to allow more node pages to be readahead.
- Updated with test results.
fs/f2fs/data.c | 3 +++
fs/f2fs/f2fs.h | 1 +
fs/f2fs/node.c | 4 +++-
3 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7961e0ddfca3..ab3117e3b24a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1572,6 +1572,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
pgofs = (pgoff_t)map->m_lblk;
end = pgofs + maxblocks;
+ if (flag == F2FS_GET_BLOCK_PRECACHE)
+ mode = LOOKUP_NODE_PRECACHE;
+
next_dnode:
if (map->m_may_create) {
if (f2fs_lfs_mode(sbi))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9d3bc9633c1d..3ce41528d48e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -651,6 +651,7 @@ enum {
* look up a node with readahead called
* by get_data_block.
*/
+ LOOKUP_NODE_PRECACHE, /* look up a node for F2FS_GET_BLOCK_PRECACHE */
};
#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO or flush count */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4254db453b2d..63e9ee7ab911 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -860,7 +860,9 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
set_nid(parent, offset[i - 1], nids[i], i == 1);
f2fs_alloc_nid_done(sbi, nids[i]);
done = true;
- } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
+ } else if ((mode == LOOKUP_NODE_RA ||
+ (mode == LOOKUP_NODE_PRECACHE))
+ && i == level && level > 1) {
nfolio[i] = f2fs_get_node_folio_ra(parent, offset[i - 1]);
if (IS_ERR(nfolio[i])) {
err = PTR_ERR(nfolio[i]);
--
2.34.1
On 9/24/25 13:58, Yunji Kang wrote:
> In f2fs_precache_extents(), For large files, It requires reading many
> node blocks. Instead of reading each node block with synchronous I/O,
> this patch applies readahead so that node blocks can be fetched in
> advance.
>
> It reduces the overhead of repeated sync reads and improves efficiency
> when precaching extents of large files.
>
> I created a file with the same largest extent and executed the test.
> For this experiment, I set the file's largest extent with an offset of 0
> and a size of 1GB. I configured the remaining area with 100MB extents.
>
> 5GB test file:
> dd if=/dev/urandom of=test1 bs=1m count=5120
> cp test1 test2
> fsync test1
> dd if=test1 of=test2 bs=1m skip=1024 seek=1024 count=100 conv=notrunc
> dd if=test1 of=test2 bs=1m skip=1224 seek=1224 count=100 conv=notrunc
> ...
> dd if=test1 of=test2 bs=1m skip=5024 seek=5024 count=100 conv=notrunc
> reboot
>
> I also created 10GB and 20GB files with large extents using the same
> method.
>
> ioctl(F2FS_IOC_PRECACHE_EXTENTS) test results are as follows:
> +-----------+---------+---------+-----------+
> | File size | Before | After | Reduction |
> +-----------+---------+---------+-----------+
> | 5GB | 101.8ms | 37.0ms | 72.1% |
> | 10GB | 222.9ms | 56.0ms | 74.9% |
> | 20GB | 446.2ms | 116.4ms | 73.9% |
> +-----------+---------+---------+-----------+
> Tested on a 256GB mobile device with an SM8750 chipset.
>
> Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
> Reviewed-by: Sunmin Jeong <s_min.jeong@samsung.com>
> Signed-off-by: Yunji Kang <yunji0.kang@samsung.com>
> ---
> v2:
> - Modify the readahead condition check routine for better code
> readability.
> - Update the title from 'node block' to 'node blocks'.
>
> v3:
> - Bug fix to allow more node pages to be readahead.
> - Updated with test results.
>
> fs/f2fs/data.c | 3 +++
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/node.c | 4 +++-
> 3 files changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 7961e0ddfca3..ab3117e3b24a 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1572,6 +1572,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
> pgofs = (pgoff_t)map->m_lblk;
> end = pgofs + maxblocks;
>
> + if (flag == F2FS_GET_BLOCK_PRECACHE)
> + mode = LOOKUP_NODE_PRECACHE;
If trigger condition of LOOKUP_NODE_RA and LOOKUP_NODE_PRECACHE is the same,
What about?
if (flag == F2FS_GET_BLOCK_PRECACHE)
mode = LOOKUP_NODE_RA;
Thanks,
> +
> next_dnode:
> if (map->m_may_create) {
> if (f2fs_lfs_mode(sbi))
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 9d3bc9633c1d..3ce41528d48e 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -651,6 +651,7 @@ enum {
> * look up a node with readahead called
> * by get_data_block.
> */
> + LOOKUP_NODE_PRECACHE, /* look up a node for F2FS_GET_BLOCK_PRECACHE */
> };
>
> #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO or flush count */
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 4254db453b2d..63e9ee7ab911 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -860,7 +860,9 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
> set_nid(parent, offset[i - 1], nids[i], i == 1);
> f2fs_alloc_nid_done(sbi, nids[i]);
> done = true;
> - } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
> + } else if ((mode == LOOKUP_NODE_RA ||
> + (mode == LOOKUP_NODE_PRECACHE))
> + && i == level && level > 1) {
> nfolio[i] = f2fs_get_node_folio_ra(parent, offset[i - 1]);
> if (IS_ERR(nfolio[i])) {
> err = PTR_ERR(nfolio[i]);
>On 9/24/25 13:58, Yunji Kang wrote:
>> In f2fs_precache_extents(), For large files, It requires reading many
>> node blocks. Instead of reading each node block with synchronous I/O,
>> this patch applies readahead so that node blocks can be fetched in
>> advance.
>>
>> It reduces the overhead of repeated sync reads and improves efficiency
>> when precaching extents of large files.
>>
>> I created a file with the same largest extent and executed the test.
>> For this experiment, I set the file's largest extent with an offset of 0
>> and a size of 1GB. I configured the remaining area with 100MB extents.
>>
>> 5GB test file:
>> dd if=/dev/urandom of=test1 bs=1m count=5120
>> cp test1 test2
>> fsync test1
>> dd if=test1 of=test2 bs=1m skip=1024 seek=1024 count=100 conv=notrunc
>> dd if=test1 of=test2 bs=1m skip=1224 seek=1224 count=100 conv=notrunc
>> ...
>> dd if=test1 of=test2 bs=1m skip=5024 seek=5024 count=100 conv=notrunc
>> reboot
>>
>> I also created 10GB and 20GB files with large extents using the same
>> method.
>>
>> ioctl(F2FS_IOC_PRECACHE_EXTENTS) test results are as follows:
>> +-----------+---------+---------+-----------+
>> | File size | Before | After | Reduction |
>> +-----------+---------+---------+-----------+
>> | 5GB | 101.8ms | 37.0ms | 72.1% |
>> | 10GB | 222.9ms | 56.0ms | 74.9% |
>> | 20GB | 446.2ms | 116.4ms | 73.9% |
>> +-----------+---------+---------+-----------+
>> Tested on a 256GB mobile device with an SM8750 chipset.
>>
>> Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
>> Reviewed-by: Sunmin Jeong <s_min.jeong@samsung.com>
>> Signed-off-by: Yunji Kang <yunji0.kang@samsung.com>
>> ---
>> v2:
>> - Modify the readahead condition check routine for better code
>> readability.
>> - Update the title from 'node block' to 'node blocks'.
>>
>> v3:
>> - Bug fix to allow more node pages to be readahead.
>> - Updated with test results.
>>
>> fs/f2fs/data.c | 3 +++
>> fs/f2fs/f2fs.h | 1 +
>> fs/f2fs/node.c | 4 +++-
>> 3 files changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> index 7961e0ddfca3..ab3117e3b24a 100644
>> --- a/fs/f2fs/data.c
>> +++ b/fs/f2fs/data.c
>> @@ -1572,6 +1572,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
>> pgofs = (pgoff_t)map->m_lblk;
>> end = pgofs + maxblocks;
>>
>> + if (flag == F2FS_GET_BLOCK_PRECACHE)
>> + mode = LOOKUP_NODE_PRECACHE;
>
>If trigger condition of LOOKUP_NODE_RA and LOOKUP_NODE_PRECACHE is the same,
>What about?
>
> if (flag == F2FS_GET_BLOCK_PRECACHE)
> mode = LOOKUP_NODE_RA;
>
>Thanks,
Hi Yunji and Chao,
I think so, we can directly use LOOKUP_NODE_RA for precache extents.
BTW, maybe there are more cases that LOOKUP_NODE_RA will be better? Like
defragment/compress(need to call __f2fs_cluster_blocks() ) ioctl..
>> +
>> next_dnode:
>> if (map->m_may_create) {
>> if (f2fs_lfs_mode(sbi))
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 9d3bc9633c1d..3ce41528d48e 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -651,6 +651,7 @@ enum {
>> * look up a node with readahead called
>> * by get_data_block.
>> */
>> + LOOKUP_NODE_PRECACHE, /* look up a node for F2FS_GET_BLOCK_PRECACHE */
>> };
>>
>> #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO or flush count */
>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>> index 4254db453b2d..63e9ee7ab911 100644
>> --- a/fs/f2fs/node.c
>> +++ b/fs/f2fs/node.c
>> @@ -860,7 +860,9 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
>> set_nid(parent, offset[i - 1], nids[i], i == 1);
>> f2fs_alloc_nid_done(sbi, nids[i]);
>> done = true;
>> - } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
>> + } else if ((mode == LOOKUP_NODE_RA ||
>> + (mode == LOOKUP_NODE_PRECACHE))
>> + && i == level && level > 1) {
>> nfolio[i] = f2fs_get_node_folio_ra(parent, offset[i - 1]);
>> if (IS_ERR(nfolio[i])) {
>> err = PTR_ERR(nfolio[i]);
On 9/24/25 14:53, wangzijie wrote:
>> On 9/24/25 13:58, Yunji Kang wrote:
>>> In f2fs_precache_extents(), For large files, It requires reading many
>>> node blocks. Instead of reading each node block with synchronous I/O,
>>> this patch applies readahead so that node blocks can be fetched in
>>> advance.
>>>
>>> It reduces the overhead of repeated sync reads and improves efficiency
>>> when precaching extents of large files.
>>>
>>> I created a file with the same largest extent and executed the test.
>>> For this experiment, I set the file's largest extent with an offset of 0
>>> and a size of 1GB. I configured the remaining area with 100MB extents.
>>>
>>> 5GB test file:
>>> dd if=/dev/urandom of=test1 bs=1m count=5120
>>> cp test1 test2
>>> fsync test1
>>> dd if=test1 of=test2 bs=1m skip=1024 seek=1024 count=100 conv=notrunc
>>> dd if=test1 of=test2 bs=1m skip=1224 seek=1224 count=100 conv=notrunc
>>> ...
>>> dd if=test1 of=test2 bs=1m skip=5024 seek=5024 count=100 conv=notrunc
>>> reboot
>>>
>>> I also created 10GB and 20GB files with large extents using the same
>>> method.
>>>
>>> ioctl(F2FS_IOC_PRECACHE_EXTENTS) test results are as follows:
>>> +-----------+---------+---------+-----------+
>>> | File size | Before | After | Reduction |
>>> +-----------+---------+---------+-----------+
>>> | 5GB | 101.8ms | 37.0ms | 72.1% |
>>> | 10GB | 222.9ms | 56.0ms | 74.9% |
>>> | 20GB | 446.2ms | 116.4ms | 73.9% |
>>> +-----------+---------+---------+-----------+
>>> Tested on a 256GB mobile device with an SM8750 chipset.
>>>
>>> Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
>>> Reviewed-by: Sunmin Jeong <s_min.jeong@samsung.com>
>>> Signed-off-by: Yunji Kang <yunji0.kang@samsung.com>
>>> ---
>>> v2:
>>> - Modify the readahead condition check routine for better code
>>> readability.
>>> - Update the title from 'node block' to 'node blocks'.
>>>
>>> v3:
>>> - Bug fix to allow more node pages to be readahead.
>>> - Updated with test results.
>>>
>>> fs/f2fs/data.c | 3 +++
>>> fs/f2fs/f2fs.h | 1 +
>>> fs/f2fs/node.c | 4 +++-
>>> 3 files changed, 7 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>> index 7961e0ddfca3..ab3117e3b24a 100644
>>> --- a/fs/f2fs/data.c
>>> +++ b/fs/f2fs/data.c
>>> @@ -1572,6 +1572,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
>>> pgofs = (pgoff_t)map->m_lblk;
>>> end = pgofs + maxblocks;
>>>
>>> + if (flag == F2FS_GET_BLOCK_PRECACHE)
>>> + mode = LOOKUP_NODE_PRECACHE;
>>
>> If trigger condition of LOOKUP_NODE_RA and LOOKUP_NODE_PRECACHE is the same,
>> What about?
>>
>> if (flag == F2FS_GET_BLOCK_PRECACHE)
>> mode = LOOKUP_NODE_RA;
>>
>> Thanks,
>
> Hi Yunji and Chao,
> I think so, we can directly use LOOKUP_NODE_RA for precache extents.
> BTW, maybe there are more cases that LOOKUP_NODE_RA will be better? Like
> defragment/compress(need to call __f2fs_cluster_blocks() ) ioctl..
Yeah, we can have a try to check the benefits. :)
Thaks,
>
>
>>> +
>>> next_dnode:
>>> if (map->m_may_create) {
>>> if (f2fs_lfs_mode(sbi))
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 9d3bc9633c1d..3ce41528d48e 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -651,6 +651,7 @@ enum {
>>> * look up a node with readahead called
>>> * by get_data_block.
>>> */
>>> + LOOKUP_NODE_PRECACHE, /* look up a node for F2FS_GET_BLOCK_PRECACHE */
>>> };
>>>
>>> #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO or flush count */
>>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>>> index 4254db453b2d..63e9ee7ab911 100644
>>> --- a/fs/f2fs/node.c
>>> +++ b/fs/f2fs/node.c
>>> @@ -860,7 +860,9 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
>>> set_nid(parent, offset[i - 1], nids[i], i == 1);
>>> f2fs_alloc_nid_done(sbi, nids[i]);
>>> done = true;
>>> - } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
>>> + } else if ((mode == LOOKUP_NODE_RA ||
>>> + (mode == LOOKUP_NODE_PRECACHE))
>>> + && i == level && level > 1) {
>>> nfolio[i] = f2fs_get_node_folio_ra(parent, offset[i - 1]);
>>> if (IS_ERR(nfolio[i])) {
>>> err = PTR_ERR(nfolio[i]);
© 2016 - 2026 Red Hat, Inc.