fs/exfat/file.c | 75 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 11 deletions(-)
This is part of my exFAT fragmentation optimisation project. As exFAT is fundamentally based on linked-list structure, cluster allocation is not a cheap operation - it involves walking the cluster chain as well as bitmap so it's not particularly easy on flash devices. The purpose of VDL is in part addressing the issue by allowing users to clear the contents of files without changing the cluster layout by only updating VDL which is O(1) op. I.e. reduced flash wear. The fallocate support should see applications in recording devices like dashcams, IP cameras and DVRs(digital video recorders). Such devices typically implement FIFO-style file rotation. Also, the technique could be potentially utilise to eliminate the chance of fragmentation if the device preallocates clusters for large media files. The `fallocate -d ...` and `cp --sparse=always ...` commands(util-linux) can be used on the files in exFAT to detect zeros leading up to EOF and update the VDL accordingly. David Timber (1): exfat: add limited FALLOC_FL_ZERO_RANGE support fs/exfat/file.c | 75 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 11 deletions(-) -- 2.53.0.1.ga224b40d3f.dirty
syzbot ci has tested the following series
[v0] exfat: add limited FALLOC_FL_ZERO_RANGE support
https://lore.kernel.org/all/20260319043553.301185-1-dxdt@dev.snart.me
* [PATCH v0 1/1] exfat: add limited FALLOC_FL_ZERO_RANGE support
and found the following issue:
WARNING: lock held when returning to user space in exfat_fallocate
Full report is available here:
https://ci.syzbot.org/series/a58a79c9-a4e7-40b7-a5d3-8d4da244a00c
***
WARNING: lock held when returning to user space in exfat_fallocate
tree: linux-next
URL: https://kernel.googlesource.com/pub/scm/linux/kernel/git/next/linux-next
base: b5d083a3ed1e2798396d5e491432e887da8d4a06
arch: amd64
compiler: Debian clang version 21.1.8 (++20251221033036+2078da43e25a-1~exp1~20251221153213.50), Debian LLD 21.1.8
config: https://ci.syzbot.org/builds/6a43cfef-5b9a-4b39-a6b0-7d34bfb0f7b7/config
C repro: https://ci.syzbot.org/findings/87d27fd2-e5b7-4174-8faf-3b746f7aa18d/c_repro
syz repro: https://ci.syzbot.org/findings/87d27fd2-e5b7-4174-8faf-3b746f7aa18d/syz_repro
exFAT-fs (loop0): failed to load upcase table (idx : 0x0000fd4f, chksum : 0x395e47cf, utbl_chksum : 0xe619d30d)
exFAT-fs (loop0): valid_size(150994954) is greater than size(10)
================================================
WARNING: lock held when returning to user space!
syzkaller #0 Not tainted
------------------------------------------------
syz.0.17/5982 is leaving the kernel with locks still held!
1 lock held by syz.0.17/5982:
#0: ffff8881bc75c110 (&sb->s_type->i_mutex_key#24){+.+.}-{4:4}, at: inode_lock include/linux/fs.h:1028 [inline]
#0: ffff8881bc75c110 (&sb->s_type->i_mutex_key#24){+.+.}-{4:4}, at: exfat_fallocate+0x163/0x4e0 fs/exfat/file.c:131
***
If these findings have caused you to resend the series or submit a
separate fix, please add the following tag to your commit message:
Tested-by: syzbot@syzkaller.appspotmail.com
---
This report is generated by a bot. It may contain errors.
syzbot ci engineers can be reached at syzkaller@googlegroups.com.
#syz test
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 2daf0dbabb24..dfa5fc89f77d 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -36,7 +36,8 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
num_clusters = EXFAT_B_TO_CLU(exfat_ondisk_size(inode), sbi);
new_num_clusters = EXFAT_B_TO_CLU_ROUND_UP(size, sbi);
- if (new_num_clusters == num_clusters)
+ WARN_ON(new_num_clusters < num_clusters);
+ if (new_num_clusters <= num_clusters)
goto out;
if (num_clusters) {
@@ -94,35 +95,87 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
/*
* Preallocate space for a file. This implements exfat's fallocate file
* operation, which gets called from sys_fallocate system call. User space
- * requests len bytes at offset. In contrary to fat, we only support
- * FALLOC_FL_ALLOCATE_RANGE because by leaving the valid data length(VDL)
- * field, it is unnecessary to zero out the newly allocated clusters.
+ * requests len bytes at offset.
+ *
+ * In contrary to fat, FALLOC_FL_ALLOCATE_RANGE can be done without zeroing out
+ * the newly allocated clusters by leaving the valid data length(VDL) field
+ * unchanged.
+ *
+ * Due to the inherent limitation of the VDL scheme, FALLOC_FL_ZERO_RANGE is
+ * only possible when the requested range covers EOF.
*/
static long exfat_fallocate(struct file *file, int mode,
loff_t offset, loff_t len)
{
struct inode *inode = file->f_mapping->host;
- loff_t newsize = offset + len;
+ loff_t newsize, isize;
int err = 0;
/* No support for other modes */
- if (mode != FALLOC_FL_ALLOCATE_RANGE)
+ switch (mode) {
+ case FALLOC_FL_ALLOCATE_RANGE:
+ case FALLOC_FL_ZERO_RANGE:
+ case FALLOC_FL_ZERO_RANGE|FALLOC_FL_KEEP_SIZE:
+ break;
+ default:
return -EOPNOTSUPP;
+ }
/* No support for dir */
if (!S_ISREG(inode->i_mode))
- return -EOPNOTSUPP;
+ return mode & FALLOC_FL_ZERO_RANGE ? -EINVAL : -EOPNOTSUPP;
if (unlikely(exfat_forced_shutdown(inode->i_sb)))
return -EIO;
inode_lock(inode);
- if (newsize <= i_size_read(inode))
- goto error;
+ newsize = offset + len;
+ isize = i_size_read(inode);
+
+ if (mode & FALLOC_FL_ZERO_RANGE) {
+ struct exfat_inode_info *ei = EXFAT_I(inode);
+ loff_t saved_validsize = ei->valid_size;
+
+ /* The requested range must span to or past EOF */
+ if (newsize < isize) {
+ err = -EOPNOTSUPP;
+ goto error;
+ }
+
+ /* valid_size can only be truncated */
+ if (offset < ei->valid_size)
+ ei->valid_size = offset;
+ /* If offset >= ei->valid_size, the range is already zeroed so that'd be no-op */
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE) && isize < newsize)
+ err = exfat_cont_expand(inode, newsize);
+ /* inode invalidated in exfat_cont_expand() */
+ else {
+ /* update inode */
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
+ mark_inode_dirty(inode);
+
+ if (IS_SYNC(inode))
+ err = write_inode_now(inode, 1);
+ }
+
+ if (err) {
+ /* inode unchanged - revert valid_size */
+ ei->valid_size = saved_validsize;
+ goto error;
+ }
+
+ /* drop cache after the new valid_size */
+ if (ei->valid_size != saved_validsize)
+ truncate_pagecache(inode, ei->valid_size);
+ } else { /* mode == FALLOC_FL_ALLOCATE_RANGE */
+ if (newsize <= isize)
+ goto error;
- /* This is just an expanding truncate */
- err = exfat_cont_expand(inode, newsize);
+ /* This is just an expanding truncate */
+ err = exfat_cont_expand(inode, newsize);
+ }
error:
inode_unlock(inode);
© 2016 - 2026 Red Hat, Inc.