[PATCH 1/2] f2fs: register inodes which is able to donate pages

Jaegeuk Kim posted 2 patches 1 year ago
There is a newer version of this series
[PATCH 1/2] f2fs: register inodes which is able to donate pages
Posted by Jaegeuk Kim 1 year ago
This patch introduces an inode list to keep the page cache ranges that users
can donate pages together.

 #define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
						struct f2fs_donate_range)
 struct f2fs_donate_range {
	__u64 start;
	__u64 len;
 };

e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c           |  3 ++
 fs/f2fs/f2fs.h            | 12 +++++++-
 fs/f2fs/file.c            | 65 +++++++++++++++++++++++++++++++++++++++
 fs/f2fs/inode.c           | 14 +++++++++
 fs/f2fs/super.c           |  1 +
 include/uapi/linux/f2fs.h |  7 +++++
 6 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 468828288a4a..16c2dfb4f595 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
 	si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
 	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
+	si->ndonate_files = sbi->donate_files;
 	si->nquota_files = sbi->nquota_files;
 	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
 	si->aw_cnt = atomic_read(&sbi->atomic_files);
@@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->compr_inode, si->compr_blocks);
 		seq_printf(s, "  - Swapfile Inode: %u\n",
 			   si->swapfile_inode);
+		seq_printf(s, "  - Donate Inode: %u\n",
+			   si->ndonate_files);
 		seq_printf(s, "  - Orphan/Append/Update Inode: %u, %u, %u\n",
 			   si->orphans, si->append, si->update);
 		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4bfe162eefd3..951fbc3f94c7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -850,6 +850,11 @@ struct f2fs_inode_info {
 #endif
 	struct list_head dirty_list;	/* dirty list for dirs and files */
 	struct list_head gdirty_list;	/* linked in global dirty list */
+
+	/* linked in global inode list for cache donation */
+	struct list_head gdonate_list;
+	loff_t donate_start, donate_end; /* inclusive */
+
 	struct task_struct *atomic_write_task;	/* store atomic write task */
 	struct extent_tree *extent_tree[NR_EXTENT_CACHES];
 					/* cached extent_tree entry */
@@ -1274,6 +1279,7 @@ enum inode_type {
 	DIR_INODE,			/* for dirty dir inode */
 	FILE_INODE,			/* for dirty regular/symlink inode */
 	DIRTY_META,			/* for all dirtied inode metadata */
+	DONATE_INODE,			/* for all inode to donate pages */
 	NR_INODE_TYPE,
 };
 
@@ -1629,6 +1635,9 @@ struct f2fs_sb_info {
 	unsigned int warm_data_age_threshold;
 	unsigned int last_age_weight;
 
+	/* control donate caches */
+	unsigned int donate_files;
+
 	/* basic filesystem units */
 	unsigned int log_sectors_per_block;	/* log2 sectors per block */
 	unsigned int log_blocksize;		/* log2 block size */
@@ -3984,7 +3993,8 @@ struct f2fs_stat_info {
 	unsigned long long allocated_data_blocks;
 	int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
 	int ndirty_data, ndirty_qdata;
-	unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
+	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
+	unsigned int nquota_files, ndonate_files;
 	int nats, dirty_nats, sits, dirty_sits;
 	int free_nids, avail_nids, alloc_nids;
 	int total_count, utilization;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3e06fea9795c..6572970a988a 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2450,6 +2450,68 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
 	return ret;
 }
 
+static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct mnt_idmap *idmap = file_mnt_idmap(filp);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct f2fs_donate_range range;
+	u64 max_pages = F2FS_BLK_TO_BYTES(max_file_blocks(inode)) >> PAGE_SHIFT;
+	u64 start, end;
+	int ret;
+
+	if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
+							sizeof(range)))
+		return -EFAULT;
+
+	if (!inode_owner_or_capable(idmap, inode))
+		return -EACCES;
+
+	if (!S_ISREG(inode->i_mode))
+		return -EINVAL;
+
+	if (range.start >= max_pages || range.len > max_pages ||
+	    (range.start + range.len) > max_pages)
+		return -EINVAL;
+
+	start = range.start >> PAGE_SHIFT;
+	end = DIV_ROUND_UP(range.start + range.len, PAGE_SIZE);
+
+	ret = mnt_want_write_file(filp);
+	if (ret)
+		return ret;
+
+	inode_lock(inode);
+
+	if (f2fs_is_atomic_file(inode))
+		goto out;
+
+	spin_lock(&sbi->inode_lock[DONATE_INODE]);
+	/* let's remove the range, if len = 0 */
+	if (!range.len) {
+		if (!list_empty(&F2FS_I(inode)->gdonate_list)) {
+			list_del_init(&F2FS_I(inode)->gdonate_list);
+			sbi->donate_files--;
+		}
+	} else {
+		if (list_empty(&F2FS_I(inode)->gdonate_list)) {
+			list_add_tail(&F2FS_I(inode)->gdonate_list,
+					&sbi->inode_list[DONATE_INODE]);
+			sbi->donate_files++;
+		} else {
+			list_move_tail(&F2FS_I(inode)->gdonate_list,
+					&sbi->inode_list[DONATE_INODE]);
+		}
+		F2FS_I(inode)->donate_start = start;
+		F2FS_I(inode)->donate_end = end - 1;
+	}
+	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+out:
+	inode_unlock(inode);
+	mnt_drop_write_file(filp);
+	return ret;
+}
+
 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
@@ -4479,6 +4541,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return -EOPNOTSUPP;
 	case F2FS_IOC_SHUTDOWN:
 		return f2fs_ioc_shutdown(filp, arg);
+	case F2FS_IOC_DONATE_RANGE:
+		return f2fs_ioc_donate_range(filp, arg);
 	case FITRIM:
 		return f2fs_ioc_fitrim(filp, arg);
 	case FS_IOC_SET_ENCRYPTION_POLICY:
@@ -5230,6 +5294,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
 	case F2FS_IOC_ABORT_ATOMIC_WRITE:
 	case F2FS_IOC_SHUTDOWN:
+	case F2FS_IOC_DONATE_RANGE:
 	case FITRIM:
 	case FS_IOC_SET_ENCRYPTION_POLICY:
 	case FS_IOC_GET_ENCRYPTION_PWSALT:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 7de33da8b3ea..f9fc58f313f2 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return 0;
 }
 
+static void f2fs_remove_donate_inode(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+	if (list_empty(&F2FS_I(inode)->gdonate_list))
+		return;
+
+	spin_lock(&sbi->inode_lock[DONATE_INODE]);
+	list_del_init(&F2FS_I(inode)->gdonate_list);
+	sbi->donate_files--;
+	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+}
+
 /*
  * Called at the last iput() if i_nlink is zero
  */
@@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
 
 	f2fs_bug_on(sbi, get_dirty_pages(inode));
 	f2fs_remove_dirty_inode(inode);
+	f2fs_remove_donate_inode(inode);
 
 	if (!IS_DEVICE_ALIASING(inode))
 		f2fs_destroy_extent_tree(inode);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fc7d463dee15..ef639a6d82e5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 	spin_lock_init(&fi->i_size_lock);
 	INIT_LIST_HEAD(&fi->dirty_list);
 	INIT_LIST_HEAD(&fi->gdirty_list);
+	INIT_LIST_HEAD(&fi->gdonate_list);
 	init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
 	init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
 	init_f2fs_rwsem(&fi->i_xattr_sem);
diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
index f7aaf8d23e20..cd38a7c166e6 100644
--- a/include/uapi/linux/f2fs.h
+++ b/include/uapi/linux/f2fs.h
@@ -44,6 +44,8 @@
 #define F2FS_IOC_COMPRESS_FILE		_IO(F2FS_IOCTL_MAGIC, 24)
 #define F2FS_IOC_START_ATOMIC_REPLACE	_IO(F2FS_IOCTL_MAGIC, 25)
 #define F2FS_IOC_GET_DEV_ALIAS_FILE	_IOR(F2FS_IOCTL_MAGIC, 26, __u32)
+#define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
+						struct f2fs_donate_range)
 
 /*
  * should be same as XFS_IOC_GOINGDOWN.
@@ -97,4 +99,9 @@ struct f2fs_comp_option {
 	__u8 log_cluster_size;
 };
 
+struct f2fs_donate_range {
+	__u64 start;
+	__u64 len;
+};
+
 #endif /* _UAPI_LINUX_F2FS_H */
-- 
2.48.0.rc2.279.g1de40edade-goog
Re: [f2fs-dev] [PATCH 1/2] f2fs: register inodes which is able to donate pages
Posted by Chao Yu 1 year ago
On 1/17/25 06:51, Jaegeuk Kim via Linux-f2fs-devel wrote:
> This patch introduces an inode list to keep the page cache ranges that users
> can donate pages together.
> 
>   #define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
> 						struct f2fs_donate_range)
>   struct f2fs_donate_range {
> 	__u64 start;
> 	__u64 len;
>   };
> 
> e.g., ioctl(F2FS_IOC_DONATE_RANGE, &range);
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>   fs/f2fs/debug.c           |  3 ++
>   fs/f2fs/f2fs.h            | 12 +++++++-
>   fs/f2fs/file.c            | 65 +++++++++++++++++++++++++++++++++++++++
>   fs/f2fs/inode.c           | 14 +++++++++
>   fs/f2fs/super.c           |  1 +
>   include/uapi/linux/f2fs.h |  7 +++++
>   6 files changed, 101 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index 468828288a4a..16c2dfb4f595 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -164,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>   	si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
>   	si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
>   	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
> +	si->ndonate_files = sbi->donate_files;
>   	si->nquota_files = sbi->nquota_files;
>   	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
>   	si->aw_cnt = atomic_read(&sbi->atomic_files);
> @@ -501,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v)
>   			   si->compr_inode, si->compr_blocks);
>   		seq_printf(s, "  - Swapfile Inode: %u\n",
>   			   si->swapfile_inode);
> +		seq_printf(s, "  - Donate Inode: %u\n",
> +			   si->ndonate_files);
>   		seq_printf(s, "  - Orphan/Append/Update Inode: %u, %u, %u\n",
>   			   si->orphans, si->append, si->update);
>   		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 4bfe162eefd3..951fbc3f94c7 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -850,6 +850,11 @@ struct f2fs_inode_info {
>   #endif
>   	struct list_head dirty_list;	/* dirty list for dirs and files */
>   	struct list_head gdirty_list;	/* linked in global dirty list */
> +
> +	/* linked in global inode list for cache donation */
> +	struct list_head gdonate_list;
> +	loff_t donate_start, donate_end; /* inclusive */
> +
>   	struct task_struct *atomic_write_task;	/* store atomic write task */
>   	struct extent_tree *extent_tree[NR_EXTENT_CACHES];
>   					/* cached extent_tree entry */
> @@ -1274,6 +1279,7 @@ enum inode_type {
>   	DIR_INODE,			/* for dirty dir inode */
>   	FILE_INODE,			/* for dirty regular/symlink inode */
>   	DIRTY_META,			/* for all dirtied inode metadata */
> +	DONATE_INODE,			/* for all inode to donate pages */
>   	NR_INODE_TYPE,
>   };
>   
> @@ -1629,6 +1635,9 @@ struct f2fs_sb_info {
>   	unsigned int warm_data_age_threshold;
>   	unsigned int last_age_weight;
>   
> +	/* control donate caches */
> +	unsigned int donate_files;
> +
>   	/* basic filesystem units */
>   	unsigned int log_sectors_per_block;	/* log2 sectors per block */
>   	unsigned int log_blocksize;		/* log2 block size */
> @@ -3984,7 +3993,8 @@ struct f2fs_stat_info {
>   	unsigned long long allocated_data_blocks;
>   	int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
>   	int ndirty_data, ndirty_qdata;
> -	unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
> +	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
> +	unsigned int nquota_files, ndonate_files;
>   	int nats, dirty_nats, sits, dirty_sits;
>   	int free_nids, avail_nids, alloc_nids;
>   	int total_count, utilization;
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 3e06fea9795c..6572970a988a 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -2450,6 +2450,68 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>   	return ret;
>   }
>   
> +static int f2fs_ioc_donate_range(struct file *filp, unsigned long arg)
> +{
> +	struct inode *inode = file_inode(filp);
> +	struct mnt_idmap *idmap = file_mnt_idmap(filp);
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +	struct f2fs_donate_range range;
> +	u64 max_pages = F2FS_BLK_TO_BYTES(max_file_blocks(inode)) >> PAGE_SHIFT;

It should be u64 max_size = F2FS_BLK_TO_BYTES(max_file_blocks(inode));?

> +	u64 start, end;
> +	int ret;
> +
> +	if (copy_from_user(&range, (struct f2fs_donate_range __user *)arg,
> +							sizeof(range)))
> +		return -EFAULT;
> +
> +	if (!inode_owner_or_capable(idmap, inode))
> +		return -EACCES;
> +
> +	if (!S_ISREG(inode->i_mode))
> +		return -EINVAL;
> +
> +	if (range.start >= max_pages || range.len > max_pages ||
> +	    (range.start + range.len) > max_pages)
> +		return -EINVAL;

Use max_size instead of max_pages?

Thanks,

> +
> +	start = range.start >> PAGE_SHIFT;
> +	end = DIV_ROUND_UP(range.start + range.len, PAGE_SIZE);
> +
> +	ret = mnt_want_write_file(filp);
> +	if (ret)
> +		return ret;
> +
> +	inode_lock(inode);
> +
> +	if (f2fs_is_atomic_file(inode))
> +		goto out;
> +
> +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
> +	/* let's remove the range, if len = 0 */
> +	if (!range.len) {
> +		if (!list_empty(&F2FS_I(inode)->gdonate_list)) {
> +			list_del_init(&F2FS_I(inode)->gdonate_list);
> +			sbi->donate_files--;
> +		}
> +	} else {
> +		if (list_empty(&F2FS_I(inode)->gdonate_list)) {
> +			list_add_tail(&F2FS_I(inode)->gdonate_list,
> +					&sbi->inode_list[DONATE_INODE]);
> +			sbi->donate_files++;
> +		} else {
> +			list_move_tail(&F2FS_I(inode)->gdonate_list,
> +					&sbi->inode_list[DONATE_INODE]);
> +		}
> +		F2FS_I(inode)->donate_start = start;
> +		F2FS_I(inode)->donate_end = end - 1;
> +	}
> +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> +out:
> +	inode_unlock(inode);
> +	mnt_drop_write_file(filp);
> +	return ret;
> +}
> +
>   static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
>   {
>   	struct inode *inode = file_inode(filp);
> @@ -4479,6 +4541,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>   		return -EOPNOTSUPP;
>   	case F2FS_IOC_SHUTDOWN:
>   		return f2fs_ioc_shutdown(filp, arg);
> +	case F2FS_IOC_DONATE_RANGE:
> +		return f2fs_ioc_donate_range(filp, arg);
>   	case FITRIM:
>   		return f2fs_ioc_fitrim(filp, arg);
>   	case FS_IOC_SET_ENCRYPTION_POLICY:
> @@ -5230,6 +5294,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>   	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
>   	case F2FS_IOC_ABORT_ATOMIC_WRITE:
>   	case F2FS_IOC_SHUTDOWN:
> +	case F2FS_IOC_DONATE_RANGE:
>   	case FITRIM:
>   	case FS_IOC_SET_ENCRYPTION_POLICY:
>   	case FS_IOC_GET_ENCRYPTION_PWSALT:
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index 7de33da8b3ea..f9fc58f313f2 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -804,6 +804,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
>   	return 0;
>   }
>   
> +static void f2fs_remove_donate_inode(struct inode *inode)
> +{
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +
> +	if (list_empty(&F2FS_I(inode)->gdonate_list))
> +		return;
> +
> +	spin_lock(&sbi->inode_lock[DONATE_INODE]);
> +	list_del_init(&F2FS_I(inode)->gdonate_list);
> +	sbi->donate_files--;
> +	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
> +}
> +
>   /*
>    * Called at the last iput() if i_nlink is zero
>    */
> @@ -838,6 +851,7 @@ void f2fs_evict_inode(struct inode *inode)
>   
>   	f2fs_bug_on(sbi, get_dirty_pages(inode));
>   	f2fs_remove_dirty_inode(inode);
> +	f2fs_remove_donate_inode(inode);
>   
>   	if (!IS_DEVICE_ALIASING(inode))
>   		f2fs_destroy_extent_tree(inode);
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index fc7d463dee15..ef639a6d82e5 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1441,6 +1441,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
>   	spin_lock_init(&fi->i_size_lock);
>   	INIT_LIST_HEAD(&fi->dirty_list);
>   	INIT_LIST_HEAD(&fi->gdirty_list);
> +	INIT_LIST_HEAD(&fi->gdonate_list);
>   	init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
>   	init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
>   	init_f2fs_rwsem(&fi->i_xattr_sem);
> diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
> index f7aaf8d23e20..cd38a7c166e6 100644
> --- a/include/uapi/linux/f2fs.h
> +++ b/include/uapi/linux/f2fs.h
> @@ -44,6 +44,8 @@
>   #define F2FS_IOC_COMPRESS_FILE		_IO(F2FS_IOCTL_MAGIC, 24)
>   #define F2FS_IOC_START_ATOMIC_REPLACE	_IO(F2FS_IOCTL_MAGIC, 25)
>   #define F2FS_IOC_GET_DEV_ALIAS_FILE	_IOR(F2FS_IOCTL_MAGIC, 26, __u32)
> +#define F2FS_IOC_DONATE_RANGE		_IOW(F2FS_IOCTL_MAGIC, 27,	\
> +						struct f2fs_donate_range)
>   
>   /*
>    * should be same as XFS_IOC_GOINGDOWN.
> @@ -97,4 +99,9 @@ struct f2fs_comp_option {
>   	__u8 log_cluster_size;
>   };
>   
> +struct f2fs_donate_range {
> +	__u64 start;
> +	__u64 len;
> +};
> +
>   #endif /* _UAPI_LINUX_F2FS_H */