fs/f2fs/data.c | 3 +++ fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 4 +++- 3 files changed, 7 insertions(+), 1 deletion(-)
In f2fs_precache_extents(), For large files, It requires reading many
node blocks. Instead of reading each node block with synchronous I/O,
this patch applies readahead so that node blocks can be fetched in
advance.
It reduces the overhead of repeated sync reads and improves efficiency
when precaching extents of large files.
I created a file with the same largest extent and executed the test.
For this experiment, I set the file's largest extent with an offset of 0
and a size of 1GB. I configured the remaining area with 100MB extents.
5GB test file:
dd if=/dev/urandom of=test1 bs=1m count=5120
cp test1 test2
fsync test1
dd if=test1 of=test2 bs=1m skip=1024 seek=1024 count=100 conv=notrunc
dd if=test1 of=test2 bs=1m skip=1224 seek=1224 count=100 conv=notrunc
...
dd if=test1 of=test2 bs=1m skip=5024 seek=5024 count=100 conv=notrunc
reboot
I also created 10GB and 20GB files with large extents using the same
method.
ioctl(F2FS_IOC_PRECACHE_EXTENTS) test results are as follows:
+-----------+---------+---------+-----------+
| File size | Before | After | Reduction |
+-----------+---------+---------+-----------+
| 5GB | 101.8ms | 37.0ms | 72.1% |
| 10GB | 222.9ms | 56.0ms | 74.9% |
| 20GB | 446.2ms | 116.4ms | 73.9% |
+-----------+---------+---------+-----------+
Tested on a 256GB mobile device with an SM8750 chipset.
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Reviewed-by: Sunmin Jeong <s_min.jeong@samsung.com>
Signed-off-by: Yunji Kang <yunji0.kang@samsung.com>
---
v2:
- Modify the readahead condition check routine for better code
readability.
- Update the title from 'node block' to 'node blocks'.
v3:
- Bug fix to allow more node pages to be readahead.
- Updated with test results.
fs/f2fs/data.c | 3 +++
fs/f2fs/f2fs.h | 1 +
fs/f2fs/node.c | 4 +++-
3 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7961e0ddfca3..ab3117e3b24a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1572,6 +1572,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
pgofs = (pgoff_t)map->m_lblk;
end = pgofs + maxblocks;
+ if (flag == F2FS_GET_BLOCK_PRECACHE)
+ mode = LOOKUP_NODE_PRECACHE;
+
next_dnode:
if (map->m_may_create) {
if (f2fs_lfs_mode(sbi))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9d3bc9633c1d..3ce41528d48e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -651,6 +651,7 @@ enum {
* look up a node with readahead called
* by get_data_block.
*/
+ LOOKUP_NODE_PRECACHE, /* look up a node for F2FS_GET_BLOCK_PRECACHE */
};
#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO or flush count */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4254db453b2d..63e9ee7ab911 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -860,7 +860,9 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
set_nid(parent, offset[i - 1], nids[i], i == 1);
f2fs_alloc_nid_done(sbi, nids[i]);
done = true;
- } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
+ } else if ((mode == LOOKUP_NODE_RA ||
+ (mode == LOOKUP_NODE_PRECACHE))
+ && i == level && level > 1) {
nfolio[i] = f2fs_get_node_folio_ra(parent, offset[i - 1]);
if (IS_ERR(nfolio[i])) {
err = PTR_ERR(nfolio[i]);
--
2.34.1
On 9/24/25 13:58, Yunji Kang wrote: > In f2fs_precache_extents(), For large files, It requires reading many > node blocks. Instead of reading each node block with synchronous I/O, > this patch applies readahead so that node blocks can be fetched in > advance. > > It reduces the overhead of repeated sync reads and improves efficiency > when precaching extents of large files. > > I created a file with the same largest extent and executed the test. > For this experiment, I set the file's largest extent with an offset of 0 > and a size of 1GB. I configured the remaining area with 100MB extents. > > 5GB test file: > dd if=/dev/urandom of=test1 bs=1m count=5120 > cp test1 test2 > fsync test1 > dd if=test1 of=test2 bs=1m skip=1024 seek=1024 count=100 conv=notrunc > dd if=test1 of=test2 bs=1m skip=1224 seek=1224 count=100 conv=notrunc > ... > dd if=test1 of=test2 bs=1m skip=5024 seek=5024 count=100 conv=notrunc > reboot > > I also created 10GB and 20GB files with large extents using the same > method. > > ioctl(F2FS_IOC_PRECACHE_EXTENTS) test results are as follows: > +-----------+---------+---------+-----------+ > | File size | Before | After | Reduction | > +-----------+---------+---------+-----------+ > | 5GB | 101.8ms | 37.0ms | 72.1% | > | 10GB | 222.9ms | 56.0ms | 74.9% | > | 20GB | 446.2ms | 116.4ms | 73.9% | > +-----------+---------+---------+-----------+ > Tested on a 256GB mobile device with an SM8750 chipset. > > Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com> > Reviewed-by: Sunmin Jeong <s_min.jeong@samsung.com> > Signed-off-by: Yunji Kang <yunji0.kang@samsung.com> > --- > v2: > - Modify the readahead condition check routine for better code > readability. > - Update the title from 'node block' to 'node blocks'. > > v3: > - Bug fix to allow more node pages to be readahead. > - Updated with test results. > > fs/f2fs/data.c | 3 +++ > fs/f2fs/f2fs.h | 1 + > fs/f2fs/node.c | 4 +++- > 3 files changed, 7 insertions(+), 1 deletion(-) > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index 7961e0ddfca3..ab3117e3b24a 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -1572,6 +1572,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) > pgofs = (pgoff_t)map->m_lblk; > end = pgofs + maxblocks; > > + if (flag == F2FS_GET_BLOCK_PRECACHE) > + mode = LOOKUP_NODE_PRECACHE; If trigger condition of LOOKUP_NODE_RA and LOOKUP_NODE_PRECACHE is the same, What about? if (flag == F2FS_GET_BLOCK_PRECACHE) mode = LOOKUP_NODE_RA; Thanks, > + > next_dnode: > if (map->m_may_create) { > if (f2fs_lfs_mode(sbi)) > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 9d3bc9633c1d..3ce41528d48e 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -651,6 +651,7 @@ enum { > * look up a node with readahead called > * by get_data_block. > */ > + LOOKUP_NODE_PRECACHE, /* look up a node for F2FS_GET_BLOCK_PRECACHE */ > }; > > #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO or flush count */ > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c > index 4254db453b2d..63e9ee7ab911 100644 > --- a/fs/f2fs/node.c > +++ b/fs/f2fs/node.c > @@ -860,7 +860,9 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) > set_nid(parent, offset[i - 1], nids[i], i == 1); > f2fs_alloc_nid_done(sbi, nids[i]); > done = true; > - } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { > + } else if ((mode == LOOKUP_NODE_RA || > + (mode == LOOKUP_NODE_PRECACHE)) > + && i == level && level > 1) { > nfolio[i] = f2fs_get_node_folio_ra(parent, offset[i - 1]); > if (IS_ERR(nfolio[i])) { > err = PTR_ERR(nfolio[i]);
>On 9/24/25 13:58, Yunji Kang wrote: >> In f2fs_precache_extents(), For large files, It requires reading many >> node blocks. Instead of reading each node block with synchronous I/O, >> this patch applies readahead so that node blocks can be fetched in >> advance. >> >> It reduces the overhead of repeated sync reads and improves efficiency >> when precaching extents of large files. >> >> I created a file with the same largest extent and executed the test. >> For this experiment, I set the file's largest extent with an offset of 0 >> and a size of 1GB. I configured the remaining area with 100MB extents. >> >> 5GB test file: >> dd if=/dev/urandom of=test1 bs=1m count=5120 >> cp test1 test2 >> fsync test1 >> dd if=test1 of=test2 bs=1m skip=1024 seek=1024 count=100 conv=notrunc >> dd if=test1 of=test2 bs=1m skip=1224 seek=1224 count=100 conv=notrunc >> ... >> dd if=test1 of=test2 bs=1m skip=5024 seek=5024 count=100 conv=notrunc >> reboot >> >> I also created 10GB and 20GB files with large extents using the same >> method. >> >> ioctl(F2FS_IOC_PRECACHE_EXTENTS) test results are as follows: >> +-----------+---------+---------+-----------+ >> | File size | Before | After | Reduction | >> +-----------+---------+---------+-----------+ >> | 5GB | 101.8ms | 37.0ms | 72.1% | >> | 10GB | 222.9ms | 56.0ms | 74.9% | >> | 20GB | 446.2ms | 116.4ms | 73.9% | >> +-----------+---------+---------+-----------+ >> Tested on a 256GB mobile device with an SM8750 chipset. >> >> Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com> >> Reviewed-by: Sunmin Jeong <s_min.jeong@samsung.com> >> Signed-off-by: Yunji Kang <yunji0.kang@samsung.com> >> --- >> v2: >> - Modify the readahead condition check routine for better code >> readability. >> - Update the title from 'node block' to 'node blocks'. >> >> v3: >> - Bug fix to allow more node pages to be readahead. >> - Updated with test results. >> >> fs/f2fs/data.c | 3 +++ >> fs/f2fs/f2fs.h | 1 + >> fs/f2fs/node.c | 4 +++- >> 3 files changed, 7 insertions(+), 1 deletion(-) >> >> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c >> index 7961e0ddfca3..ab3117e3b24a 100644 >> --- a/fs/f2fs/data.c >> +++ b/fs/f2fs/data.c >> @@ -1572,6 +1572,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) >> pgofs = (pgoff_t)map->m_lblk; >> end = pgofs + maxblocks; >> >> + if (flag == F2FS_GET_BLOCK_PRECACHE) >> + mode = LOOKUP_NODE_PRECACHE; > >If trigger condition of LOOKUP_NODE_RA and LOOKUP_NODE_PRECACHE is the same, >What about? > > if (flag == F2FS_GET_BLOCK_PRECACHE) > mode = LOOKUP_NODE_RA; > >Thanks, Hi Yunji and Chao, I think so, we can directly use LOOKUP_NODE_RA for precache extents. BTW, maybe there are more cases that LOOKUP_NODE_RA will be better? Like defragment/compress(need to call __f2fs_cluster_blocks() ) ioctl.. >> + >> next_dnode: >> if (map->m_may_create) { >> if (f2fs_lfs_mode(sbi)) >> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h >> index 9d3bc9633c1d..3ce41528d48e 100644 >> --- a/fs/f2fs/f2fs.h >> +++ b/fs/f2fs/f2fs.h >> @@ -651,6 +651,7 @@ enum { >> * look up a node with readahead called >> * by get_data_block. >> */ >> + LOOKUP_NODE_PRECACHE, /* look up a node for F2FS_GET_BLOCK_PRECACHE */ >> }; >> >> #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO or flush count */ >> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c >> index 4254db453b2d..63e9ee7ab911 100644 >> --- a/fs/f2fs/node.c >> +++ b/fs/f2fs/node.c >> @@ -860,7 +860,9 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) >> set_nid(parent, offset[i - 1], nids[i], i == 1); >> f2fs_alloc_nid_done(sbi, nids[i]); >> done = true; >> - } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { >> + } else if ((mode == LOOKUP_NODE_RA || >> + (mode == LOOKUP_NODE_PRECACHE)) >> + && i == level && level > 1) { >> nfolio[i] = f2fs_get_node_folio_ra(parent, offset[i - 1]); >> if (IS_ERR(nfolio[i])) { >> err = PTR_ERR(nfolio[i]);
On 9/24/25 14:53, wangzijie wrote: >> On 9/24/25 13:58, Yunji Kang wrote: >>> In f2fs_precache_extents(), For large files, It requires reading many >>> node blocks. Instead of reading each node block with synchronous I/O, >>> this patch applies readahead so that node blocks can be fetched in >>> advance. >>> >>> It reduces the overhead of repeated sync reads and improves efficiency >>> when precaching extents of large files. >>> >>> I created a file with the same largest extent and executed the test. >>> For this experiment, I set the file's largest extent with an offset of 0 >>> and a size of 1GB. I configured the remaining area with 100MB extents. >>> >>> 5GB test file: >>> dd if=/dev/urandom of=test1 bs=1m count=5120 >>> cp test1 test2 >>> fsync test1 >>> dd if=test1 of=test2 bs=1m skip=1024 seek=1024 count=100 conv=notrunc >>> dd if=test1 of=test2 bs=1m skip=1224 seek=1224 count=100 conv=notrunc >>> ... >>> dd if=test1 of=test2 bs=1m skip=5024 seek=5024 count=100 conv=notrunc >>> reboot >>> >>> I also created 10GB and 20GB files with large extents using the same >>> method. >>> >>> ioctl(F2FS_IOC_PRECACHE_EXTENTS) test results are as follows: >>> +-----------+---------+---------+-----------+ >>> | File size | Before | After | Reduction | >>> +-----------+---------+---------+-----------+ >>> | 5GB | 101.8ms | 37.0ms | 72.1% | >>> | 10GB | 222.9ms | 56.0ms | 74.9% | >>> | 20GB | 446.2ms | 116.4ms | 73.9% | >>> +-----------+---------+---------+-----------+ >>> Tested on a 256GB mobile device with an SM8750 chipset. >>> >>> Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com> >>> Reviewed-by: Sunmin Jeong <s_min.jeong@samsung.com> >>> Signed-off-by: Yunji Kang <yunji0.kang@samsung.com> >>> --- >>> v2: >>> - Modify the readahead condition check routine for better code >>> readability. >>> - Update the title from 'node block' to 'node blocks'. >>> >>> v3: >>> - Bug fix to allow more node pages to be readahead. >>> - Updated with test results. >>> >>> fs/f2fs/data.c | 3 +++ >>> fs/f2fs/f2fs.h | 1 + >>> fs/f2fs/node.c | 4 +++- >>> 3 files changed, 7 insertions(+), 1 deletion(-) >>> >>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c >>> index 7961e0ddfca3..ab3117e3b24a 100644 >>> --- a/fs/f2fs/data.c >>> +++ b/fs/f2fs/data.c >>> @@ -1572,6 +1572,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) >>> pgofs = (pgoff_t)map->m_lblk; >>> end = pgofs + maxblocks; >>> >>> + if (flag == F2FS_GET_BLOCK_PRECACHE) >>> + mode = LOOKUP_NODE_PRECACHE; >> >> If trigger condition of LOOKUP_NODE_RA and LOOKUP_NODE_PRECACHE is the same, >> What about? >> >> if (flag == F2FS_GET_BLOCK_PRECACHE) >> mode = LOOKUP_NODE_RA; >> >> Thanks, > > Hi Yunji and Chao, > I think so, we can directly use LOOKUP_NODE_RA for precache extents. > BTW, maybe there are more cases that LOOKUP_NODE_RA will be better? Like > defragment/compress(need to call __f2fs_cluster_blocks() ) ioctl.. Yeah, we can have a try to check the benefits. :) Thaks, > > >>> + >>> next_dnode: >>> if (map->m_may_create) { >>> if (f2fs_lfs_mode(sbi)) >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h >>> index 9d3bc9633c1d..3ce41528d48e 100644 >>> --- a/fs/f2fs/f2fs.h >>> +++ b/fs/f2fs/f2fs.h >>> @@ -651,6 +651,7 @@ enum { >>> * look up a node with readahead called >>> * by get_data_block. >>> */ >>> + LOOKUP_NODE_PRECACHE, /* look up a node for F2FS_GET_BLOCK_PRECACHE */ >>> }; >>> >>> #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO or flush count */ >>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c >>> index 4254db453b2d..63e9ee7ab911 100644 >>> --- a/fs/f2fs/node.c >>> +++ b/fs/f2fs/node.c >>> @@ -860,7 +860,9 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) >>> set_nid(parent, offset[i - 1], nids[i], i == 1); >>> f2fs_alloc_nid_done(sbi, nids[i]); >>> done = true; >>> - } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { >>> + } else if ((mode == LOOKUP_NODE_RA || >>> + (mode == LOOKUP_NODE_PRECACHE)) >>> + && i == level && level > 1) { >>> nfolio[i] = f2fs_get_node_folio_ra(parent, offset[i - 1]); >>> if (IS_ERR(nfolio[i])) { >>> err = PTR_ERR(nfolio[i]);
© 2016 - 2025 Red Hat, Inc.