Implement concurrent buffered write with folio lock

[RFC PATCH 2/2] xfs: Enable concurrency when writing within single block

Posted by Chi Zhiling 9 months, 2 weeks ago

From: Chi Zhiling <chizhiling@kylinos.cn>

For unextending writes, we will only update the pagecache and extent.
In this case, if our write occurs within a single block, that is,
within a single folio, we don't need an exclusive lock to ensure the
atomicity of the write, because we already have the folio lock.

Signed-off-by: Chi Zhiling <chizhiling@kylinos.cn>
---
 fs/xfs/xfs_file.c | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a6f214f57238..8eaa98464328 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -914,6 +914,27 @@ xfs_file_dax_write(
 	return ret;
 }
 
+#define offset_in_block(inode, p) ((unsigned long)(p) & (i_blocksize(inode) - 1))
+
+static inline bool xfs_allow_concurrent(
+	struct kiocb		*iocb,
+	struct iov_iter		*from)
+{
+	struct inode		*inode = iocb->ki_filp->f_mapping->host;
+
+	/* Extending write? */
+	if (iocb->ki_flags & IOCB_APPEND ||
+	    iocb->ki_pos >= i_size_read(inode))
+		return false;
+
+	/* Exceeds a block range? */
+	if (iov_iter_count(from) > i_blocksize(inode) ||
+	    offset_in_block(inode, iocb->ki_pos) + iov_iter_count(from) > i_blocksize(inode))
+		return false;
+
+	return true;
+}
+
 STATIC ssize_t
 xfs_file_buffered_write(
 	struct kiocb		*iocb,
@@ -925,8 +946,12 @@ xfs_file_buffered_write(
 	bool			cleared_space = false;
 	unsigned int		iolock;
 
+	if (xfs_allow_concurrent(iocb, from))
+		iolock = XFS_IOLOCK_SHARED;
+	else
+		iolock = XFS_IOLOCK_EXCL;
+
 write_retry:
-	iolock = XFS_IOLOCK_EXCL;
 	ret = xfs_ilock_iocb_for_write(iocb, &iolock, false);
 	if (ret)
 		return ret;
@@ -935,6 +960,13 @@ xfs_file_buffered_write(
 	if (ret)
 		goto out;
 
+	if (iolock == XFS_IOLOCK_SHARED &&
+	    iocb->ki_pos + iov_iter_count(from) > i_size_read(inode)) {
+		xfs_iunlock(ip, iolock);
+		iolock = XFS_IOLOCK_EXCL;
+		goto write_retry;
+	}
+
 	trace_xfs_file_buffered_write(iocb, from);
 	ret = iomap_file_buffered_write(iocb, from,
 			&xfs_buffered_write_iomap_ops, NULL);
-- 
2.43.0

Re: [RFC PATCH 2/2] xfs: Enable concurrency when writing within single block

Posted by Darrick J. Wong 9 months, 2 weeks ago

On Fri, Apr 25, 2025 at 06:38:41PM +0800, Chi Zhiling wrote:
> From: Chi Zhiling <chizhiling@kylinos.cn>
> 
> For unextending writes, we will only update the pagecache and extent.
> In this case, if our write occurs within a single block, that is,
> within a single folio, we don't need an exclusive lock to ensure the
> atomicity of the write, because we already have the folio lock.
> 
> Signed-off-by: Chi Zhiling <chizhiling@kylinos.cn>
> ---
>  fs/xfs/xfs_file.c | 34 +++++++++++++++++++++++++++++++++-
>  1 file changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index a6f214f57238..8eaa98464328 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -914,6 +914,27 @@ xfs_file_dax_write(
>  	return ret;
>  }
>  
> +#define offset_in_block(inode, p) ((unsigned long)(p) & (i_blocksize(inode) - 1))

Is it correct to cast an loff_t (s64) to unsigned long (u32 on i386)
here?

> +
> +static inline bool xfs_allow_concurrent(

static inline bool
xfs_allow_concurrent(

(separate lines style nit)

> +	struct kiocb		*iocb,
> +	struct iov_iter		*from)
> +{
> +	struct inode		*inode = iocb->ki_filp->f_mapping->host;
> +
> +	/* Extending write? */
> +	if (iocb->ki_flags & IOCB_APPEND ||
> +	    iocb->ki_pos >= i_size_read(inode))
> +		return false;
> +
> +	/* Exceeds a block range? */
> +	if (iov_iter_count(from) > i_blocksize(inode) ||
> +	    offset_in_block(inode, iocb->ki_pos) + iov_iter_count(from) > i_blocksize(inode))
> +		return false;
> +
> +	return true;
> +}

...and since this helper only has one caller, maybe it should be named
xfs_buffered_write_iolock_mode and return the lock mode directly?

> +
>  STATIC ssize_t
>  xfs_file_buffered_write(
>  	struct kiocb		*iocb,
> @@ -925,8 +946,12 @@ xfs_file_buffered_write(
>  	bool			cleared_space = false;
>  	unsigned int		iolock;
>  
> +	if (xfs_allow_concurrent(iocb, from))
> +		iolock = XFS_IOLOCK_SHARED;
> +	else
> +		iolock = XFS_IOLOCK_EXCL;
> +
>  write_retry:
> -	iolock = XFS_IOLOCK_EXCL;
>  	ret = xfs_ilock_iocb_for_write(iocb, &iolock, false);
>  	if (ret)
>  		return ret;
> @@ -935,6 +960,13 @@ xfs_file_buffered_write(
>  	if (ret)
>  		goto out;
>  
> +	if (iolock == XFS_IOLOCK_SHARED &&
> +	    iocb->ki_pos + iov_iter_count(from) > i_size_read(inode)) {
> +		xfs_iunlock(ip, iolock);
> +		iolock = XFS_IOLOCK_EXCL;
> +		goto write_retry;
> +	}
> +
>  	trace_xfs_file_buffered_write(iocb, from);
>  	ret = iomap_file_buffered_write(iocb, from,
>  			&xfs_buffered_write_iomap_ops, NULL);
> -- 
> 2.43.0
> 
>

Re: [RFC PATCH 2/2] xfs: Enable concurrency when writing within single block

Posted by Chi Zhiling 9 months, 2 weeks ago

On 2025/4/25 23:15, Darrick J. Wong wrote:
> On Fri, Apr 25, 2025 at 06:38:41PM +0800, Chi Zhiling wrote:
>> From: Chi Zhiling <chizhiling@kylinos.cn>
>>
>> For unextending writes, we will only update the pagecache and extent.
>> In this case, if our write occurs within a single block, that is,
>> within a single folio, we don't need an exclusive lock to ensure the
>> atomicity of the write, because we already have the folio lock.
>>
>> Signed-off-by: Chi Zhiling <chizhiling@kylinos.cn>
>> ---
>>   fs/xfs/xfs_file.c | 34 +++++++++++++++++++++++++++++++++-
>>   1 file changed, 33 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
>> index a6f214f57238..8eaa98464328 100644
>> --- a/fs/xfs/xfs_file.c
>> +++ b/fs/xfs/xfs_file.c
>> @@ -914,6 +914,27 @@ xfs_file_dax_write(
>>   	return ret;
>>   }
>>   
>> +#define offset_in_block(inode, p) ((unsigned long)(p) & (i_blocksize(inode) - 1))
> 
> Is it correct to cast an loff_t (s64) to unsigned long (u32 on i386)
> here?

I'm not sure if there is an issue here, although there is a type cast,
it shouldn't affect the final result of offset_in_block.

> 
>> +
>> +static inline bool xfs_allow_concurrent(
> 
> static inline bool
> xfs_allow_concurrent(
> 
> (separate lines style nit)

Okay

> 
>> +	struct kiocb		*iocb,
>> +	struct iov_iter		*from)
>> +{
>> +	struct inode		*inode = iocb->ki_filp->f_mapping->host;
>> +
>> +	/* Extending write? */
>> +	if (iocb->ki_flags & IOCB_APPEND ||
>> +	    iocb->ki_pos >= i_size_read(inode))
>> +		return false;
>> +
>> +	/* Exceeds a block range? */
>> +	if (iov_iter_count(from) > i_blocksize(inode) ||
>> +	    offset_in_block(inode, iocb->ki_pos) + iov_iter_count(from) > i_blocksize(inode))
>> +		return false;
>> +
>> +	return true;
>> +}
> 
> ...and since this helper only has one caller, maybe it should be named
> xfs_buffered_write_iolock_mode and return the lock mode directly?

Yes, this is better. I will update it in the next patch.


Thanks

> 
>> +
>>   STATIC ssize_t
>>   xfs_file_buffered_write(
>>   	struct kiocb		*iocb,
>> @@ -925,8 +946,12 @@ xfs_file_buffered_write(
>>   	bool			cleared_space = false;
>>   	unsigned int		iolock;
>>   
>> +	if (xfs_allow_concurrent(iocb, from))
>> +		iolock = XFS_IOLOCK_SHARED;
>> +	else
>> +		iolock = XFS_IOLOCK_EXCL;
>> +
>>   write_retry:
>> -	iolock = XFS_IOLOCK_EXCL;
>>   	ret = xfs_ilock_iocb_for_write(iocb, &iolock, false);
>>   	if (ret)
>>   		return ret;
>> @@ -935,6 +960,13 @@ xfs_file_buffered_write(
>>   	if (ret)
>>   		goto out;
>>   
>> +	if (iolock == XFS_IOLOCK_SHARED &&
>> +	    iocb->ki_pos + iov_iter_count(from) > i_size_read(inode)) {
>> +		xfs_iunlock(ip, iolock);
>> +		iolock = XFS_IOLOCK_EXCL;
>> +		goto write_retry;
>> +	}
>> +
>>   	trace_xfs_file_buffered_write(iocb, from);
>>   	ret = iomap_file_buffered_write(iocb, from,
>>   			&xfs_buffered_write_iomap_ops, NULL);
>> -- 
>> 2.43.0
>>
>>