[PATCH v2 07/11] xfs: iomap CoW-based atomic write support

John Garry posted 11 patches 10 months, 1 week ago
There is a newer version of this series
[PATCH v2 07/11] xfs: iomap CoW-based atomic write support
Posted by John Garry 10 months, 1 week ago
In cases of an atomic write occurs for misaligned or discontiguous disk
blocks, we will use a CoW-based method to issue the atomic write.

So, for that case, return -EAGAIN to request that the write be issued in
CoW atomic write mode. The dio write path should detect this, similar to
how misaligned regalar DIO writes are handled.

For normal HW-based mode, when the range which we are atomic writing to
covers a shared data extent, try to allocate a new CoW fork. However, if
we find that what we allocated does not meet atomic write requirements
in terms of length and alignment, then fallback on the CoW-based mode
for the atomic write.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 fs/xfs/xfs_iomap.c | 72 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ab79f0080288..c5ecfafbba60 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -795,6 +795,23 @@ imap_spans_range(
 	return true;
 }
 
+static bool
+imap_range_valid_for_atomic_write(
+	struct xfs_bmbt_irec	*imap,
+	xfs_fileoff_t		offset_fsb,
+	xfs_fileoff_t		end_fsb)
+{
+	/* Misaligned start block wrt size */
+	if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount))
+		return false;
+
+	/* Discontiguous or mixed extents */
+	if (!imap_spans_range(imap, offset_fsb, end_fsb))
+		return false;
+
+	return true;
+}
+
 static int
 xfs_direct_write_iomap_begin(
 	struct inode		*inode,
@@ -809,12 +826,20 @@ xfs_direct_write_iomap_begin(
 	struct xfs_bmbt_irec	imap, cmap;
 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	xfs_fileoff_t		end_fsb = xfs_iomap_end_fsb(mp, offset, length);
+	bool			atomic_cow = flags & IOMAP_ATOMIC_COW;
+	bool			atomic_hw = flags & IOMAP_ATOMIC_HW;
 	int			nimaps = 1, error = 0;
 	bool			shared = false;
 	u16			iomap_flags = 0;
+	xfs_fileoff_t		orig_offset_fsb;
+	xfs_fileoff_t		orig_end_fsb;
+	bool			needs_alloc;
 	unsigned int		lockmode;
 	u64			seq;
 
+	orig_offset_fsb = offset_fsb;
+	orig_end_fsb = end_fsb;
+
 	ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));
 
 	if (xfs_is_shutdown(mp))
@@ -832,7 +857,7 @@ xfs_direct_write_iomap_begin(
 	 * COW writes may allocate delalloc space or convert unwritten COW
 	 * extents, so we need to make sure to take the lock exclusively here.
 	 */
-	if (xfs_is_cow_inode(ip))
+	if (xfs_is_cow_inode(ip) || atomic_cow)
 		lockmode = XFS_ILOCK_EXCL;
 	else
 		lockmode = XFS_ILOCK_SHARED;
@@ -857,6 +882,22 @@ xfs_direct_write_iomap_begin(
 	if (error)
 		goto out_unlock;
 
+	if (flags & IOMAP_ATOMIC_COW) {
+		error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared,
+				&lockmode,
+				(flags & IOMAP_DIRECT) || IS_DAX(inode), true);
+		/*
+		 * Don't check @shared. For atomic writes, we should error when
+		 * we don't get a CoW fork.
+		 */
+		if (error)
+			goto out_unlock;
+
+		end_fsb = imap.br_startoff + imap.br_blockcount;
+		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
+		goto out_found_cow;
+	}
+
 	if (imap_needs_cow(ip, flags, &imap, nimaps)) {
 		error = -EAGAIN;
 		if (flags & IOMAP_NOWAIT)
@@ -868,13 +909,38 @@ xfs_direct_write_iomap_begin(
 				(flags & IOMAP_DIRECT) || IS_DAX(inode), false);
 		if (error)
 			goto out_unlock;
-		if (shared)
+		if (shared) {
+			if (atomic_hw &&
+			    !imap_range_valid_for_atomic_write(&cmap,
+					orig_offset_fsb, orig_end_fsb)) {
+				error = -EAGAIN;
+				goto out_unlock;
+			}
 			goto out_found_cow;
+		}
 		end_fsb = imap.br_startoff + imap.br_blockcount;
 		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
 	}
 
-	if (imap_needs_alloc(inode, flags, &imap, nimaps))
+	needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps);
+
+	if (atomic_hw) {
+		error = -EAGAIN;
+		/*
+		 * Use CoW method for when we need to alloc > 1 block,
+		 * otherwise we might allocate less than what we need here and
+		 * have multiple mappings.
+		*/
+		if (needs_alloc && orig_end_fsb - orig_offset_fsb > 1)
+			goto out_unlock;
+
+		if (!imap_range_valid_for_atomic_write(&imap, orig_offset_fsb,
+						orig_end_fsb)) {
+			goto out_unlock;
+		}
+	}
+
+	if (needs_alloc)
 		goto allocate_blocks;
 
 	/*
-- 
2.31.1
Re: [PATCH v2 07/11] xfs: iomap CoW-based atomic write support
Posted by Darrick J. Wong 9 months, 3 weeks ago
On Thu, Feb 13, 2025 at 01:56:15PM +0000, John Garry wrote:
> In cases of an atomic write occurs for misaligned or discontiguous disk
> blocks, we will use a CoW-based method to issue the atomic write.
> 
> So, for that case, return -EAGAIN to request that the write be issued in
> CoW atomic write mode. The dio write path should detect this, similar to
> how misaligned regalar DIO writes are handled.
> 
> For normal HW-based mode, when the range which we are atomic writing to
> covers a shared data extent, try to allocate a new CoW fork. However, if
> we find that what we allocated does not meet atomic write requirements
> in terms of length and alignment, then fallback on the CoW-based mode
> for the atomic write.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  fs/xfs/xfs_iomap.c | 72 ++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 69 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index ab79f0080288..c5ecfafbba60 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -795,6 +795,23 @@ imap_spans_range(
>  	return true;
>  }
>  
> +static bool
> +imap_range_valid_for_atomic_write(

xfs_bmap_valid_for_atomic_write()

> +	struct xfs_bmbt_irec	*imap,
> +	xfs_fileoff_t		offset_fsb,
> +	xfs_fileoff_t		end_fsb)
> +{
> +	/* Misaligned start block wrt size */
> +	if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount))
> +		return false;
> +
> +	/* Discontiguous or mixed extents */
> +	if (!imap_spans_range(imap, offset_fsb, end_fsb))
> +		return false;
> +
> +	return true;
> +}
> +
>  static int
>  xfs_direct_write_iomap_begin(
>  	struct inode		*inode,
> @@ -809,12 +826,20 @@ xfs_direct_write_iomap_begin(
>  	struct xfs_bmbt_irec	imap, cmap;
>  	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
>  	xfs_fileoff_t		end_fsb = xfs_iomap_end_fsb(mp, offset, length);
> +	bool			atomic_cow = flags & IOMAP_ATOMIC_COW;
> +	bool			atomic_hw = flags & IOMAP_ATOMIC_HW;
>  	int			nimaps = 1, error = 0;
>  	bool			shared = false;
>  	u16			iomap_flags = 0;
> +	xfs_fileoff_t		orig_offset_fsb;
> +	xfs_fileoff_t		orig_end_fsb;
> +	bool			needs_alloc;
>  	unsigned int		lockmode;
>  	u64			seq;
>  
> +	orig_offset_fsb = offset_fsb;

When does offset_fsb change?

> +	orig_end_fsb = end_fsb;

Set this in the variable declaration?

> +
>  	ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));
>  
>  	if (xfs_is_shutdown(mp))
> @@ -832,7 +857,7 @@ xfs_direct_write_iomap_begin(
>  	 * COW writes may allocate delalloc space or convert unwritten COW
>  	 * extents, so we need to make sure to take the lock exclusively here.
>  	 */
> -	if (xfs_is_cow_inode(ip))
> +	if (xfs_is_cow_inode(ip) || atomic_cow)
>  		lockmode = XFS_ILOCK_EXCL;
>  	else
>  		lockmode = XFS_ILOCK_SHARED;
> @@ -857,6 +882,22 @@ xfs_direct_write_iomap_begin(
>  	if (error)
>  		goto out_unlock;
>  
> +	if (flags & IOMAP_ATOMIC_COW) {

	if (atomic_cow) ?

Or really, atomic_sw?

> +		error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared,
> +				&lockmode,
> +				(flags & IOMAP_DIRECT) || IS_DAX(inode), true);
> +		/*
> +		 * Don't check @shared. For atomic writes, we should error when
> +		 * we don't get a CoW fork.

"Get a CoW fork"?  What does that mean?  The cow fork should be
automatically allocated when needed, right?  Or should this really read
"...when we don't get a COW mapping"?

> +		 */
> +		if (error)
> +			goto out_unlock;
> +
> +		end_fsb = imap.br_startoff + imap.br_blockcount;
> +		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
> +		goto out_found_cow;
> +	}

Can this be a separate ->iomap_begin (and hence iomap ops)?  I am trying
to avoid the incohesion (still) plaguing most of the other iomap users.

> +
>  	if (imap_needs_cow(ip, flags, &imap, nimaps)) {
>  		error = -EAGAIN;
>  		if (flags & IOMAP_NOWAIT)
> @@ -868,13 +909,38 @@ xfs_direct_write_iomap_begin(
>  				(flags & IOMAP_DIRECT) || IS_DAX(inode), false);
>  		if (error)
>  			goto out_unlock;
> -		if (shared)
> +		if (shared) {
> +			if (atomic_hw &&
> +			    !imap_range_valid_for_atomic_write(&cmap,
> +					orig_offset_fsb, orig_end_fsb)) {
> +				error = -EAGAIN;
> +				goto out_unlock;
> +			}
>  			goto out_found_cow;
> +		}
>  		end_fsb = imap.br_startoff + imap.br_blockcount;
>  		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
>  	}
>  
> -	if (imap_needs_alloc(inode, flags, &imap, nimaps))
> +	needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps);
> +
> +	if (atomic_hw) {
> +		error = -EAGAIN;
> +		/*
> +		 * Use CoW method for when we need to alloc > 1 block,
> +		 * otherwise we might allocate less than what we need here and
> +		 * have multiple mappings.
> +		*/
> +		if (needs_alloc && orig_end_fsb - orig_offset_fsb > 1)
> +			goto out_unlock;
> +
> +		if (!imap_range_valid_for_atomic_write(&imap, orig_offset_fsb,
> +						orig_end_fsb)) {

You only need to indent by two more tabs for a continuation line.

--D

> +			goto out_unlock;
> +		}
> +	}
> +
> +	if (needs_alloc)
>  		goto allocate_blocks;
>  
>  	/*
> -- 
> 2.31.1
> 
>
Re: [PATCH v2 07/11] xfs: iomap CoW-based atomic write support
Posted by John Garry 9 months, 3 weeks ago
On 24/02/2025 20:13, Darrick J. Wong wrote:
> On Thu, Feb 13, 2025 at 01:56:15PM +0000, John Garry wrote:
>> In cases of an atomic write occurs for misaligned or discontiguous disk
>> blocks, we will use a CoW-based method to issue the atomic write.
>>
>> So, for that case, return -EAGAIN to request that the write be issued in
>> CoW atomic write mode. The dio write path should detect this, similar to
>> how misaligned regalar DIO writes are handled.
>>
>> For normal HW-based mode, when the range which we are atomic writing to
>> covers a shared data extent, try to allocate a new CoW fork. However, if
>> we find that what we allocated does not meet atomic write requirements
>> in terms of length and alignment, then fallback on the CoW-based mode
>> for the atomic write.
>>
>> Signed-off-by: John Garry <john.g.garry@oracle.com>
>> ---
>>   fs/xfs/xfs_iomap.c | 72 ++++++++++++++++++++++++++++++++++++++++++++--
>>   1 file changed, 69 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
>> index ab79f0080288..c5ecfafbba60 100644
>> --- a/fs/xfs/xfs_iomap.c
>> +++ b/fs/xfs/xfs_iomap.c
>> @@ -795,6 +795,23 @@ imap_spans_range(
>>   	return true;
>>   }
>>   
>> +static bool
>> +imap_range_valid_for_atomic_write(
> 
> xfs_bmap_valid_for_atomic_write()

I'm ok with this.

But we do have other private functions without "xfs" prefix - like 
imap_needs_cow(), so a bit inconsistent to begin with.

> 
>> +	struct xfs_bmbt_irec	*imap,
>> +	xfs_fileoff_t		offset_fsb,
>> +	xfs_fileoff_t		end_fsb)
>> +{
>> +	/* Misaligned start block wrt size */
>> +	if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount))
>> +		return false;
>> +
>> +	/* Discontiguous or mixed extents */
>> +	if (!imap_spans_range(imap, offset_fsb, end_fsb))
>> +		return false;
>> +
>> +	return true;
>> +}
>> +
>>   static int
>>   xfs_direct_write_iomap_begin(
>>   	struct inode		*inode,
>> @@ -809,12 +826,20 @@ xfs_direct_write_iomap_begin(
>>   	struct xfs_bmbt_irec	imap, cmap;
>>   	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
>>   	xfs_fileoff_t		end_fsb = xfs_iomap_end_fsb(mp, offset, length);
>> +	bool			atomic_cow = flags & IOMAP_ATOMIC_COW;
>> +	bool			atomic_hw = flags & IOMAP_ATOMIC_HW;
>>   	int			nimaps = 1, error = 0;
>>   	bool			shared = false;
>>   	u16			iomap_flags = 0;
>> +	xfs_fileoff_t		orig_offset_fsb;
>> +	xfs_fileoff_t		orig_end_fsb;
>> +	bool			needs_alloc;
>>   	unsigned int		lockmode;
>>   	u64			seq;
>>   
>> +	orig_offset_fsb = offset_fsb;
> 
> When does offset_fsb change?

It doesn't, so this is not really required.

> 
>> +	orig_end_fsb = end_fsb;
> 
> Set this in the variable declaration?

ok

> 
>> +
>>   	ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));
>>   
>>   	if (xfs_is_shutdown(mp))
>> @@ -832,7 +857,7 @@ xfs_direct_write_iomap_begin(
>>   	 * COW writes may allocate delalloc space or convert unwritten COW
>>   	 * extents, so we need to make sure to take the lock exclusively here.
>>   	 */
>> -	if (xfs_is_cow_inode(ip))
>> +	if (xfs_is_cow_inode(ip) || atomic_cow)
>>   		lockmode = XFS_ILOCK_EXCL;
>>   	else
>>   		lockmode = XFS_ILOCK_SHARED;
>> @@ -857,6 +882,22 @@ xfs_direct_write_iomap_begin(
>>   	if (error)
>>   		goto out_unlock;
>>   
>> +	if (flags & IOMAP_ATOMIC_COW) {
> 
> 	if (atomic_cow) ?
> 
> Or really, atomic_sw?

Yes, will change.

> 
>> +		error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared,
>> +				&lockmode,
>> +				(flags & IOMAP_DIRECT) || IS_DAX(inode), true);
>> +		/*
>> +		 * Don't check @shared. For atomic writes, we should error when
>> +		 * we don't get a CoW fork.
> 
> "Get a CoW fork"?  What does that mean?  The cow fork should be
> automatically allocated when needed, right?  Or should this really read
> "...when we don't get a COW mapping"?

ok, I can change as you suggest

> 
>> +		 */
>> +		if (error)
>> +			goto out_unlock;
>> +
>> +		end_fsb = imap.br_startoff + imap.br_blockcount;
>> +		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
>> +		goto out_found_cow;
>> +	}
> 
> Can this be a separate ->iomap_begin (and hence iomap ops)?  I am trying
> to avoid the incohesion (still) plaguing most of the other iomap users.

I can try, and would then need to try to factor out what would be much 
duplicated code.

> 
>> +
>>   	if (imap_needs_cow(ip, flags, &imap, nimaps)) {
>>   		error = -EAGAIN;
>>   		if (flags & IOMAP_NOWAIT)
>> @@ -868,13 +909,38 @@ xfs_direct_write_iomap_begin(
>>   				(flags & IOMAP_DIRECT) || IS_DAX(inode), false);
>>   		if (error)
>>   			goto out_unlock;
>> -		if (shared)
>> +		if (shared) {
>> +			if (atomic_hw &&
>> +			    !imap_range_valid_for_atomic_write(&cmap,
>> +					orig_offset_fsb, orig_end_fsb)) {
>> +				error = -EAGAIN;
>> +				goto out_unlock;
>> +			}
>>   			goto out_found_cow;
>> +		}
>>   		end_fsb = imap.br_startoff + imap.br_blockcount;
>>   		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
>>   	}
>>   
>> -	if (imap_needs_alloc(inode, flags, &imap, nimaps))
>> +	needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps);
>> +
>> +	if (atomic_hw) {
>> +		error = -EAGAIN;
>> +		/*
>> +		 * Use CoW method for when we need to alloc > 1 block,
>> +		 * otherwise we might allocate less than what we need here and
>> +		 * have multiple mappings.
>> +		*/
>> +		if (needs_alloc && orig_end_fsb - orig_offset_fsb > 1)
>> +			goto out_unlock;
>> +
>> +		if (!imap_range_valid_for_atomic_write(&imap, orig_offset_fsb,
>> +						orig_end_fsb)) {
> 
> You only need to indent by two more tabs for a continuation line.

ok

Thanks,
John
Re: [PATCH v2 07/11] xfs: iomap CoW-based atomic write support
Posted by Darrick J. Wong 9 months, 3 weeks ago
On Tue, Feb 25, 2025 at 11:06:50AM +0000, John Garry wrote:
> On 24/02/2025 20:13, Darrick J. Wong wrote:
> > On Thu, Feb 13, 2025 at 01:56:15PM +0000, John Garry wrote:
> > > In cases of an atomic write occurs for misaligned or discontiguous disk
> > > blocks, we will use a CoW-based method to issue the atomic write.
> > > 
> > > So, for that case, return -EAGAIN to request that the write be issued in
> > > CoW atomic write mode. The dio write path should detect this, similar to
> > > how misaligned regalar DIO writes are handled.
> > > 
> > > For normal HW-based mode, when the range which we are atomic writing to
> > > covers a shared data extent, try to allocate a new CoW fork. However, if
> > > we find that what we allocated does not meet atomic write requirements
> > > in terms of length and alignment, then fallback on the CoW-based mode
> > > for the atomic write.
> > > 
> > > Signed-off-by: John Garry <john.g.garry@oracle.com>
> > > ---
> > >   fs/xfs/xfs_iomap.c | 72 ++++++++++++++++++++++++++++++++++++++++++++--
> > >   1 file changed, 69 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> > > index ab79f0080288..c5ecfafbba60 100644
> > > --- a/fs/xfs/xfs_iomap.c
> > > +++ b/fs/xfs/xfs_iomap.c
> > > @@ -795,6 +795,23 @@ imap_spans_range(
> > >   	return true;
> > >   }
> > > +static bool
> > > +imap_range_valid_for_atomic_write(
> > 
> > xfs_bmap_valid_for_atomic_write()
> 
> I'm ok with this.
> 
> But we do have other private functions without "xfs" prefix - like
> imap_needs_cow(), so a bit inconsistent to begin with.

Yeah, others prefer shorter names but I try at least to maintain
consistent prefixes for namespacing.

> > 
> > > +	struct xfs_bmbt_irec	*imap,
> > > +	xfs_fileoff_t		offset_fsb,
> > > +	xfs_fileoff_t		end_fsb)
> > > +{
> > > +	/* Misaligned start block wrt size */
> > > +	if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount))
> > > +		return false;
> > > +
> > > +	/* Discontiguous or mixed extents */
> > > +	if (!imap_spans_range(imap, offset_fsb, end_fsb))
> > > +		return false;
> > > +
> > > +	return true;
> > > +}
> > > +
> > >   static int
> > >   xfs_direct_write_iomap_begin(
> > >   	struct inode		*inode,
> > > @@ -809,12 +826,20 @@ xfs_direct_write_iomap_begin(
> > >   	struct xfs_bmbt_irec	imap, cmap;
> > >   	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
> > >   	xfs_fileoff_t		end_fsb = xfs_iomap_end_fsb(mp, offset, length);
> > > +	bool			atomic_cow = flags & IOMAP_ATOMIC_COW;
> > > +	bool			atomic_hw = flags & IOMAP_ATOMIC_HW;
> > >   	int			nimaps = 1, error = 0;
> > >   	bool			shared = false;
> > >   	u16			iomap_flags = 0;
> > > +	xfs_fileoff_t		orig_offset_fsb;
> > > +	xfs_fileoff_t		orig_end_fsb;
> > > +	bool			needs_alloc;
> > >   	unsigned int		lockmode;
> > >   	u64			seq;
> > > +	orig_offset_fsb = offset_fsb;
> > 
> > When does offset_fsb change?
> 
> It doesn't, so this is not really required.
> 
> > 
> > > +	orig_end_fsb = end_fsb;
> > 
> > Set this in the variable declaration?
> 
> ok
> 
> > 
> > > +
> > >   	ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));
> > >   	if (xfs_is_shutdown(mp))
> > > @@ -832,7 +857,7 @@ xfs_direct_write_iomap_begin(
> > >   	 * COW writes may allocate delalloc space or convert unwritten COW
> > >   	 * extents, so we need to make sure to take the lock exclusively here.
> > >   	 */
> > > -	if (xfs_is_cow_inode(ip))
> > > +	if (xfs_is_cow_inode(ip) || atomic_cow)
> > >   		lockmode = XFS_ILOCK_EXCL;
> > >   	else
> > >   		lockmode = XFS_ILOCK_SHARED;
> > > @@ -857,6 +882,22 @@ xfs_direct_write_iomap_begin(
> > >   	if (error)
> > >   		goto out_unlock;
> > > +	if (flags & IOMAP_ATOMIC_COW) {
> > 
> > 	if (atomic_cow) ?
> > 
> > Or really, atomic_sw?
> 
> Yes, will change.
> 
> > 
> > > +		error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared,
> > > +				&lockmode,
> > > +				(flags & IOMAP_DIRECT) || IS_DAX(inode), true);
> > > +		/*
> > > +		 * Don't check @shared. For atomic writes, we should error when
> > > +		 * we don't get a CoW fork.
> > 
> > "Get a CoW fork"?  What does that mean?  The cow fork should be
> > automatically allocated when needed, right?  Or should this really read
> > "...when we don't get a COW mapping"?
> 
> ok, I can change as you suggest
> 
> > 
> > > +		 */
> > > +		if (error)
> > > +			goto out_unlock;
> > > +
> > > +		end_fsb = imap.br_startoff + imap.br_blockcount;
> > > +		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
> > > +		goto out_found_cow;
> > > +	}
> > 
> > Can this be a separate ->iomap_begin (and hence iomap ops)?  I am trying
> > to avoid the incohesion (still) plaguing most of the other iomap users.
> 
> I can try, and would then need to try to factor out what would be much
> duplicated code.

<nod> I think it's pretty straightforward:

xfs_direct_cow_write_iomap_begin()
{
	ASSERT(flags & IOMAP_WRITE);
	ASSERT(flags & IOMAP_DIRECT);
	ASSERT(flags & IOMAP_ATOMIC_SW);

	if (xfs_is_shutdown(mp))
		return -EIO;

	/*
	 * Writes that span EOF might trigger an IO size update on
	 * completion, so consider them to be dirty for the purposes of
	 * O_DSYNC even if there is no other metadata changes pending or
	 * have been made here.
	 */
	if (offset + length > i_size_read(inode))
		iomap_flags |= IOMAP_F_DIRTY;

	lockmode = XFS_ILOCK_EXCL;
	error = xfs_ilock_for_iomap(ip, flags, &lockmode);
	if (error)
		return error;

	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
			&imap, &nimaps, 0);
	if (error)
		goto out_unlock;

	error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared,
			&lockmode, true, true);
	if (error)
		goto out_unlock;

	endoff = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
	trace_xfs_iomap_found(ip, offset, endoff - offset, XFS_COW_FORK,
			&cmap);
	if (imap.br_startblock != HOLESTARTBLOCK) {
		seq = xfs_iomap_inode_sequence(ip, 0);
		error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0,
				seq);
		if (error)
			goto out_unlock;
	}
	seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
	xfs_iunlock(ip, lockmode);
	return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
			IOMAP_F_SHARED, seq);

out_unlock:
	if (lockmode)
		xfs_iunlock(ip, lockmode);
	return error;
}

--D

> > > +
> > >   	if (imap_needs_cow(ip, flags, &imap, nimaps)) {
> > >   		error = -EAGAIN;
> > >   		if (flags & IOMAP_NOWAIT)
> > > @@ -868,13 +909,38 @@ xfs_direct_write_iomap_begin(
> > >   				(flags & IOMAP_DIRECT) || IS_DAX(inode), false);
> > >   		if (error)
> > >   			goto out_unlock;
> > > -		if (shared)
> > > +		if (shared) {
> > > +			if (atomic_hw &&
> > > +			    !imap_range_valid_for_atomic_write(&cmap,
> > > +					orig_offset_fsb, orig_end_fsb)) {
> > > +				error = -EAGAIN;
> > > +				goto out_unlock;
> > > +			}
> > >   			goto out_found_cow;
> > > +		}
> > >   		end_fsb = imap.br_startoff + imap.br_blockcount;
> > >   		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
> > >   	}
> > > -	if (imap_needs_alloc(inode, flags, &imap, nimaps))
> > > +	needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps);
> > > +
> > > +	if (atomic_hw) {
> > > +		error = -EAGAIN;
> > > +		/*
> > > +		 * Use CoW method for when we need to alloc > 1 block,
> > > +		 * otherwise we might allocate less than what we need here and
> > > +		 * have multiple mappings.
> > > +		*/
> > > +		if (needs_alloc && orig_end_fsb - orig_offset_fsb > 1)
> > > +			goto out_unlock;
> > > +
> > > +		if (!imap_range_valid_for_atomic_write(&imap, orig_offset_fsb,
> > > +						orig_end_fsb)) {
> > 
> > You only need to indent by two more tabs for a continuation line.
> 
> ok
> 
> Thanks,
> John
>
Re: [PATCH v2 07/11] xfs: iomap CoW-based atomic write support
Posted by John Garry 9 months, 3 weeks ago
On 25/02/2025 17:47, Darrick J. Wong wrote:
>> I can try, and would then need to try to factor out what would be much
>> duplicated code.
> <nod> I think it's pretty straightforward:

Yeah, I already had done sometime like this since.

> 
> xfs_direct_cow_write_iomap_begin()
> {
> 	ASSERT(flags & IOMAP_WRITE);
> 	ASSERT(flags & IOMAP_DIRECT);
> 	ASSERT(flags & IOMAP_ATOMIC_SW);
> 
> 	if (xfs_is_shutdown(mp))
> 		return -EIO;
> 
> 	/*
> 	 * Writes that span EOF might trigger an IO size update on
> 	 * completion, so consider them to be dirty for the purposes of
> 	 * O_DSYNC even if there is no other metadata changes pending or
> 	 * have been made here.
> 	 */
> 	if (offset + length > i_size_read(inode))
> 		iomap_flags |= IOMAP_F_DIRTY;
> 
> 	lockmode = XFS_ILOCK_EXCL;
> 	error = xfs_ilock_for_iomap(ip, flags, &lockmode);
> 	if (error)
> 		return error;
> 
> 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
> 			&imap, &nimaps, 0);
> 	if (error)
> 		goto out_unlock;
> 
> 	error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared,
> 			&lockmode, true, true);
> 	if (error)
> 		goto out_unlock;
> 
> 	endoff = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
> 	trace_xfs_iomap_found(ip, offset, endoff - offset, XFS_COW_FORK,
> 			&cmap);
> 	if (imap.br_startblock != HOLESTARTBLOCK) {

note: As you know, all this is common to xfs_direct_write_iomap_begin(), 
but unfortunately can't neatly be factored out due to the xfs_iunlock() 
calls.

> 		seq = xfs_iomap_inode_sequence(ip, 0);
> 		error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0,
> 				seq);
> 		if (error)
> 			goto out_unlock;
> 	}
> 	seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
> 	xfs_iunlock(ip, lockmode);
> 	return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
> 			IOMAP_F_SHARED, seq);
> 
> out_unlock:
> 	if (lockmode)
> 		xfs_iunlock(ip, lockmode);
> 	return error;
> }


Cheers,
John