[PATCH 1/2] f2fs: register inodes which is able to donate pages

Jaegeuk Kim posted 2 patches 11 months ago
There is a newer version of this series
[PATCH 1/2] f2fs: register inodes which is able to donate pages
Posted by Jaegeuk Kim 11 months ago
This patch introduces an inode list to keep the page cache ranges that users
can donate pages together.

 #define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
						struct f2fs_donate_range)
 struct f2fs_donate_range {
	__u64 start;
	__u64 len;
 };

e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c           |  3 +++
 fs/f2fs/f2fs.h            | 12 ++++++++-
 fs/f2fs/file.c            | 52 +++++++++++++++++++++++++++++++++++++++
 fs/f2fs/inode.c           | 14 +++++++++++
 fs/f2fs/super.c           |  1 +
 include/uapi/linux/f2fs.h |  7 ++++++
 6 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 468828288a4a..16c2dfb4f595 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
 	si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
 	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
+	si->ndonate_files = sbi->donate_files;
 	si->nquota_files = sbi->nquota_files;
 	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
 	si->aw_cnt = atomic_read(&sbi->atomic_files);
@@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->compr_inode, si->compr_blocks);
 		seq_printf(s, "  - Swapfile Inode: %u\n",
 			   si->swapfile_inode);
+		seq_printf(s, "  - Donate Inode: %u\n",
+			   si->ndonate_files);
 		seq_printf(s, "  - Orphan/Append/Update Inode: %u, %u, %u\n",
 			   si->orphans, si->append, si->update);
 		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4bfe162eefd3..951fbc3f94c7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -850,6 +850,11 @@ struct f2fs_inode_info {
 #endif
 	struct list_head dirty_list;	/* dirty list for dirs and files */
 	struct list_head gdirty_list;	/* linked in global dirty list */
+
+	/* linked in global inode list for cache donation */
+	struct list_head gdonate_list;
+	loff_t donate_start, donate_end; /* inclusive */
+
 	struct task_struct *atomic_write_task;	/* store atomic write task */
 	struct extent_tree *extent_tree[NR_EXTENT_CACHES];
 					/* cached extent_tree entry */
@@ -1274,6 +1279,7 @@ enum inode_type {
 	DIR_INODE,			/* for dirty dir inode */
 	FILE_INODE,			/* for dirty regular/symlink inode */
 	DIRTY_META,			/* for all dirtied inode metadata */
+	DONATE_INODE,			/* for all inode to donate pages */
 	NR_INODE_TYPE,
 };
 
@@ -1629,6 +1635,9 @@ struct f2fs_sb_info {
 	unsigned int warm_data_age_threshold;
 	unsigned int last_age_weight;
 
+	/* control donate caches */
+	unsigned int donate_files;
+
 	/* basic filesystem units */
 	unsigned int log_sectors_per_block;	/* log2 sectors per block */
 	unsigned int log_blocksize;		/* log2 block size */
@@ -3984,7 +3993,8 @@ struct f2fs_stat_info {
 	unsigned long long allocated_data_blocks;
 	int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
 	int ndirty_data, ndirty_qdata;
-	unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
+	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
+	unsigned int nquota_files, ndonate_files;
 	int nats, dirty_nats, sits, dirty_sits;
 	int free_nids, avail_nids, alloc_nids;
 	int total_count, utilization;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 81764b10840b..c43d64898d8b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2429,6 +2429,55 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
 	return ret;
 }
 
+static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct mnt_idmap *idmap = file_mnt_idmap(filp);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct f2fs_donate_range range;
+	int ret;
+
+	if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
+							sizeof(range)))
+		return -EFAULT;
+
+	if (!inode_owner_or_capable(idmap, inode))
+		return -EACCES;
+
+	if (!S_ISREG(inode->i_mode))
+		return -EINVAL;
+
+	if (unlikely((range.start + range.len) >> PAGE_SHIFT >
+					max_file_blocks(inode)))
+		return -EINVAL;
+
+	ret = mnt_want_write_file(filp);
+	if (ret)
+		return ret;
+
+	inode_lock(inode);
+
+	if (f2fs_is_atomic_file(inode))
+		goto out;
+
+	spin_lock(&sbi->inode_lock[DONATE_INODE]);
+	if (list_empty(&F2FS_I(inode)->gdonate_list)) {
+		list_add_tail(&F2FS_I(inode)->gdonate_list,
+				&sbi->inode_list[DONATE_INODE]);
+		sbi->donate_files++;
+	} else {
+		list_move_tail(&F2FS_I(inode)->gdonate_list,
+				&sbi->inode_list[DONATE_INODE]);
+	}
+	F2FS_I(inode)->donate_start = range.start;
+	F2FS_I(inode)->donate_end = range.start + range.len - 1;
+	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+out:
+	inode_unlock(inode);
+	mnt_drop_write_file(filp);
+	return ret;
+}
+
 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
@@ -4458,6 +4507,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return -EOPNOTSUPP;
 	case F2FS_IOC_SHUTDOWN:
 		return f2fs_ioc_shutdown(filp, arg);
+	case F2FS_IOC_DONATE_RANGE:
+		return f2fs_ioc_donate_range(filp, arg);
 	case FITRIM:
 		return f2fs_ioc_fitrim(filp, arg);
 	case FS_IOC_SET_ENCRYPTION_POLICY:
@@ -5209,6 +5260,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
 	case F2FS_IOC_ABORT_ATOMIC_WRITE:
 	case F2FS_IOC_SHUTDOWN:
+	case F2FS_IOC_DONATE_RANGE:
 	case FITRIM:
 	case FS_IOC_SET_ENCRYPTION_POLICY:
 	case FS_IOC_GET_ENCRYPTION_PWSALT:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 7de33da8b3ea..f9fc58f313f2 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return 0;
 }
 
+static void f2fs_remove_donate_inode(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+	if (list_empty(&F2FS_I(inode)->gdonate_list))
+		return;
+
+	spin_lock(&sbi->inode_lock[DONATE_INODE]);
+	list_del_init(&F2FS_I(inode)->gdonate_list);
+	sbi->donate_files--;
+	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+}
+
 /*
  * Called at the last iput() if i_nlink is zero
  */
@@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
 
 	f2fs_bug_on(sbi, get_dirty_pages(inode));
 	f2fs_remove_dirty_inode(inode);
+	f2fs_remove_donate_inode(inode);
 
 	if (!IS_DEVICE_ALIASING(inode))
 		f2fs_destroy_extent_tree(inode);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fc7d463dee15..ef639a6d82e5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 	spin_lock_init(&fi->i_size_lock);
 	INIT_LIST_HEAD(&fi->dirty_list);
 	INIT_LIST_HEAD(&fi->gdirty_list);
+	INIT_LIST_HEAD(&fi->gdonate_list);
 	init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
 	init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
 	init_f2fs_rwsem(&fi->i_xattr_sem);
diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
index f7aaf8d23e20..cd38a7c166e6 100644
--- a/include/uapi/linux/f2fs.h
+++ b/include/uapi/linux/f2fs.h
@@ -44,6 +44,8 @@
 #define F2FS_IOC_COMPRESS_FILE		_IO(F2FS_IOCTL_MAGIC, 24)
 #define F2FS_IOC_START_ATOMIC_REPLACE	_IO(F2FS_IOCTL_MAGIC, 25)
 #define F2FS_IOC_GET_DEV_ALIAS_FILE	_IOR(F2FS_IOCTL_MAGIC, 26, __u32)
+#define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
+						struct f2fs_donate_range)
 
 /*
  * should be same as XFS_IOC_GOINGDOWN.
@@ -97,4 +99,9 @@ struct f2fs_comp_option {
 	__u8 log_cluster_size;
 };
 
+struct f2fs_donate_range {
+	__u64 start;
+	__u64 len;
+};
+
 #endif /* _UAPI_LINUX_F2FS_H */
-- 
2.48.0.rc2.279.g1de40edade-goog
Re: [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages
Posted by Chao Yu 11 months ago
On 1/15/25 06:39, Jaegeuk Kim via Linux-f2fs-devel wrote:
> This patch introduces an inode list to keep the page cache ranges that users
> can donate pages together.
> 
>   #define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
> 						struct f2fs_donate_range)
>   struct f2fs_donate_range {
> 	__u64 start;
> 	__u64 len;
>   };
> 
> e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>   fs/f2fs/debug.c           |  3 +++
>   fs/f2fs/f2fs.h            | 12 ++++++++-
>   fs/f2fs/file.c            | 52 +++++++++++++++++++++++++++++++++++++++
>   fs/f2fs/inode.c           | 14 +++++++++++
>   fs/f2fs/super.c           |  1 +
>   include/uapi/linux/f2fs.h |  7 ++++++
>   6 files changed, 88 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index 468828288a4a..16c2dfb4f595 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>   	si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
>   	si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
>   	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
> +	si->ndonate_files = sbi->donate_files;
>   	si->nquota_files = sbi->nquota_files;
>   	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
>   	si->aw_cnt = atomic_read(&sbi->atomic_files);
> @@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
>   			   si->compr_inode, si->compr_blocks);
>   		seq_printf(s, "  - Swapfile Inode: %u\n",
>   			   si->swapfile_inode);
> +		seq_printf(s, "  - Donate Inode: %u\n",
> +			   si->ndonate_files);
>   		seq_printf(s, "  - Orphan/Append/Update Inode: %u, %u, %u\n",
>   			   si->orphans, si->append, si->update);
>   		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 4bfe162eefd3..951fbc3f94c7 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -850,6 +850,11 @@ struct f2fs_inode_info {
>   #endif
>   	struct list_head dirty_list;	/* dirty list for dirs and files */
>   	struct list_head gdirty_list;	/* linked in global dirty list */
> +
> +	/* linked in global inode list for cache donation */
> +	struct list_head gdonate_list;
> +	loff_t donate_start, donate_end; /* inclusive */
> +
>   	struct task_struct *atomic_write_task;	/* store atomic write task */
>   	struct extent_tree *extent_tree[NR_EXTENT_CACHES];
>   					/* cached extent_tree entry */
> @@ -1274,6 +1279,7 @@ enum inode_type {
>   	DIR_INODE,			/* for dirty dir inode */
>   	FILE_INODE,			/* for dirty regular/symlink inode */
>   	DIRTY_META,			/* for all dirtied inode metadata */
> +	DONATE_INODE,			/* for all inode to donate pages */
>   	NR_INODE_TYPE,
>   };
>   
> @@ -1629,6 +1635,9 @@ struct f2fs_sb_info {
>   	unsigned int warm_data_age_threshold;
>   	unsigned int last_age_weight;
>   
> +	/* control donate caches */
> +	unsigned int donate_files;
> +
>   	/* basic filesystem units */
>   	unsigned int log_sectors_per_block;	/* log2 sectors per block */
>   	unsigned int log_blocksize;		/* log2 block size */
> @@ -3984,7 +3993,8 @@ struct f2fs_stat_info {
>   	unsigned long long allocated_data_blocks;
>   	int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
>   	int ndirty_data, ndirty_qdata;
> -	unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
> +	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
> +	unsigned int nquota_files, ndonate_files;
>   	int nats, dirty_nats, sits, dirty_sits;
>   	int free_nids, avail_nids, alloc_nids;
>   	int total_count, utilization;
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 81764b10840b..c43d64898d8b 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -2429,6 +2429,55 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>   	return ret;
>   }
>   
> +static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
> +{
> +	struct inode *inode = file_inode(filp);
> +	struct mnt_idmap *idmap = file_mnt_idmap(filp);
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +	struct f2fs_donate_range range;
> +	int ret;
> +
> +	if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
> +							sizeof(range)))
> +		return -EFAULT;
> +
> +	if (!inode_owner_or_capable(idmap, inode))
> +		return -EACCES;
> +
> +	if (!S_ISREG(inode->i_mode))
> +		return -EINVAL;
> +
> +	if (unlikely((range.start + range.len) >> PAGE_SHIFT >
> +					max_file_blocks(inode)))

What about below case?

range.start = ULLONG_MAX / 2;
range.len = ULLONG_MAX / 2 + 1;

Maybe this one?

if (unlikely(range.start >> PAGE_SHIFT >= max_file_blocks() ||
	range.len >> PAGE_SHIFT > max_file_blocks() ||
	(range.start + range.len) >> PAGE_SHIFT > max_file_blocks()))

Thanks,

> +		return -EINVAL;
> +
> +	ret = mnt_want_write_file(filp);
> +	if (ret)
> +		return ret;
> +
> +	inode_lock(inode);
> +
> +	if (f2fs_is_atomic_file(inode))
> +		goto out;
> +
> +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
> +	if (list_empty(&F2FS_I(inode)->gdonate_list)) {
> +		list_add_tail(&F2FS_I(inode)->gdonate_list,
> +				&sbi->inode_list[DONATE_INODE]);
> +		sbi->donate_files++;
> +	} else {
> +		list_move_tail(&F2FS_I(inode)->gdonate_list,
> +				&sbi->inode_list[DONATE_INODE]);
> +	}
> +	F2FS_I(inode)->donate_start = range.start;
> +	F2FS_I(inode)->donate_end = range.start + range.len - 1;
> +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> +out:
> +	inode_unlock(inode);
> +	mnt_drop_write_file(filp);
> +	return ret;
> +}
> +
>   static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
>   {
>   	struct inode *inode = file_inode(filp);
> @@ -4458,6 +4507,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>   		return -EOPNOTSUPP;
>   	case F2FS_IOC_SHUTDOWN:
>   		return f2fs_ioc_shutdown(filp, arg);
> +	case F2FS_IOC_DONATE_RANGE:
> +		return f2fs_ioc_donate_range(filp, arg);
>   	case FITRIM:
>   		return f2fs_ioc_fitrim(filp, arg);
>   	case FS_IOC_SET_ENCRYPTION_POLICY:
> @@ -5209,6 +5260,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>   	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
>   	case F2FS_IOC_ABORT_ATOMIC_WRITE:
>   	case F2FS_IOC_SHUTDOWN:
> +	case F2FS_IOC_DONATE_RANGE:
>   	case FITRIM:
>   	case FS_IOC_SET_ENCRYPTION_POLICY:
>   	case FS_IOC_GET_ENCRYPTION_PWSALT:
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index 7de33da8b3ea..f9fc58f313f2 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
>   	return 0;
>   }
>   
> +static void f2fs_remove_donate_inode(struct inode *inode)
> +{
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +
> +	if (list_empty(&F2FS_I(inode)->gdonate_list))
> +		return;
> +
> +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
> +	list_del_init(&F2FS_I(inode)->gdonate_list);
> +	sbi->donate_files--;
> +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> +}
> +
>   /*
>    * Called at the last iput() if i_nlink is zero
>    */
> @@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
>   
>   	f2fs_bug_on(sbi, get_dirty_pages(inode));
>   	f2fs_remove_dirty_inode(inode);
> +	f2fs_remove_donate_inode(inode);
>   
>   	if (!IS_DEVICE_ALIASING(inode))
>   		f2fs_destroy_extent_tree(inode);
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index fc7d463dee15..ef639a6d82e5 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
>   	spin_lock_init(&fi->i_size_lock);
>   	INIT_LIST_HEAD(&fi->dirty_list);
>   	INIT_LIST_HEAD(&fi->gdirty_list);
> +	INIT_LIST_HEAD(&fi->gdonate_list);
>   	init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
>   	init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
>   	init_f2fs_rwsem(&fi->i_xattr_sem);
> diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
> index f7aaf8d23e20..cd38a7c166e6 100644
> --- a/include/uapi/linux/f2fs.h
> +++ b/include/uapi/linux/f2fs.h
> @@ -44,6 +44,8 @@
>   #define F2FS_IOC_COMPRESS_FILE		_IO(F2FS_IOCTL_MAGIC, 24)
>   #define F2FS_IOC_START_ATOMIC_REPLACE	_IO(F2FS_IOCTL_MAGIC, 25)
>   #define F2FS_IOC_GET_DEV_ALIAS_FILE	_IOR(F2FS_IOCTL_MAGIC, 26, __u32)
> +#define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
> +						struct f2fs_donate_range)
>   
>   /*
>    * should be same as XFS_IOC_GOINGDOWN.
> @@ -97,4 +99,9 @@ struct f2fs_comp_option {
>   	__u8 log_cluster_size;
>   };
>   
> +struct f2fs_donate_range {
> +	__u64 start;
> +	__u64 len;
> +};
> +
>   #endif /* _UAPI_LINUX_F2FS_H */