When completing a CoW-based write, each extent range mapping update is
covered by a separate transaction.
For a CoW-based atomic write, all mappings must be changed at once, so
change to use a single transaction.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
fs/xfs/xfs_file.c | 5 ++++-
fs/xfs/xfs_reflink.c | 45 ++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_reflink.h | 3 +++
3 files changed, 52 insertions(+), 1 deletion(-)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 9762fa503a41..243640fe4874 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -527,7 +527,10 @@ xfs_dio_write_end_io(
nofs_flag = memalloc_nofs_save();
if (flags & IOMAP_DIO_COW) {
- error = xfs_reflink_end_cow(ip, offset, size);
+ if (iocb->ki_flags & IOCB_ATOMIC)
+ error = xfs_reflink_end_atomic_cow(ip, offset, size);
+ else
+ error = xfs_reflink_end_cow(ip, offset, size);
if (error)
goto out;
}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 3dab3ba900a3..d097d33dc000 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -986,6 +986,51 @@ xfs_reflink_end_cow(
trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
return error;
}
+int
+xfs_reflink_end_atomic_cow(
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ xfs_off_t count)
+{
+ xfs_fileoff_t offset_fsb;
+ xfs_fileoff_t end_fsb;
+ int error = 0;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ unsigned int resblks;
+
+ trace_xfs_reflink_end_cow(ip, offset, count);
+
+ offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+ end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
+
+ resblks = (end_fsb - offset_fsb) *
+ XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK);
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
+ XFS_TRANS_RESERVE, &tp);
+ if (error)
+ return error;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
+
+ while (end_fsb > offset_fsb && !error)
+ error = xfs_reflink_end_cow_extent_locked(tp, ip, &offset_fsb,
+ end_fsb);
+
+ if (error) {
+ trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
+ goto out_cancel;
+ }
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return 0;
+out_cancel:
+ xfs_trans_cancel(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+}
/*
* Free all CoW staging blocks that are still referenced by the ondisk refcount
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 754d2bb692d3..ca945d98e823 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -43,6 +43,9 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count, bool cancel_real);
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count);
+ int
+xfs_reflink_end_atomic_cow(struct xfs_inode *ip, xfs_off_t offset,
+ xfs_off_t count);
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, loff_t len,
--
2.31.1
On Thu, Feb 13, 2025 at 01:56:17PM +0000, John Garry wrote:
> When completing a CoW-based write, each extent range mapping update is
> covered by a separate transaction.
>
> For a CoW-based atomic write, all mappings must be changed at once, so
> change to use a single transaction.
>
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
> fs/xfs/xfs_file.c | 5 ++++-
> fs/xfs/xfs_reflink.c | 45 ++++++++++++++++++++++++++++++++++++++++++++
> fs/xfs/xfs_reflink.h | 3 +++
> 3 files changed, 52 insertions(+), 1 deletion(-)
>
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 9762fa503a41..243640fe4874 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -527,7 +527,10 @@ xfs_dio_write_end_io(
> nofs_flag = memalloc_nofs_save();
>
> if (flags & IOMAP_DIO_COW) {
> - error = xfs_reflink_end_cow(ip, offset, size);
> + if (iocb->ki_flags & IOCB_ATOMIC)
> + error = xfs_reflink_end_atomic_cow(ip, offset, size);
> + else
> + error = xfs_reflink_end_cow(ip, offset, size);
> if (error)
> goto out;
> }
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 3dab3ba900a3..d097d33dc000 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -986,6 +986,51 @@ xfs_reflink_end_cow(
> trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
> return error;
> }
> +int
> +xfs_reflink_end_atomic_cow(
> + struct xfs_inode *ip,
> + xfs_off_t offset,
> + xfs_off_t count)
> +{
> + xfs_fileoff_t offset_fsb;
> + xfs_fileoff_t end_fsb;
> + int error = 0;
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_trans *tp;
> + unsigned int resblks;
> +
> + trace_xfs_reflink_end_cow(ip, offset, count);
> +
> + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
> + end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
Use @mp here instead of walking the pointer.
> +
> + resblks = (end_fsb - offset_fsb) *
> + XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK);
How did you arrive at this computation? The "b" parameter to
XFS_NEXTENTADD_SPACE_RES is usually the worst case number of mappings
that you're going to change on this file. I think that quantity is
(end_fsb - offset_fsb)?
> +
> + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
> + XFS_TRANS_RESERVE, &tp);
> + if (error)
> + return error;
> +
> + xfs_ilock(ip, XFS_ILOCK_EXCL);
> + xfs_trans_ijoin(tp, ip, 0);
> +
> + while (end_fsb > offset_fsb && !error)
> + error = xfs_reflink_end_cow_extent_locked(tp, ip, &offset_fsb,
> + end_fsb);
Overly long line, and the continuation line only needs to be indented
two more tabs.
> +
> + if (error) {
> + trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
> + goto out_cancel;
> + }
> + error = xfs_trans_commit(tp);
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + return 0;
Why is it ok to drop @error here? Shouldn't a transaction commit error
should be reported to the writer thread?
--D
> +out_cancel:
> + xfs_trans_cancel(tp);
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + return error;
> +}
>
> /*
> * Free all CoW staging blocks that are still referenced by the ondisk refcount
> diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
> index 754d2bb692d3..ca945d98e823 100644
> --- a/fs/xfs/xfs_reflink.h
> +++ b/fs/xfs/xfs_reflink.h
> @@ -43,6 +43,9 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
> xfs_off_t count, bool cancel_real);
> extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
> xfs_off_t count);
> + int
> +xfs_reflink_end_atomic_cow(struct xfs_inode *ip, xfs_off_t offset,
> + xfs_off_t count);
> extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
> extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
> struct file *file_out, loff_t pos_out, loff_t len,
> --
> 2.31.1
>
>
On 24/02/2025 20:20, Darrick J. Wong wrote:
> On Thu, Feb 13, 2025 at 01:56:17PM +0000, John Garry wrote:
>> When completing a CoW-based write, each extent range mapping update is
>> covered by a separate transaction.
>>
>> For a CoW-based atomic write, all mappings must be changed at once, so
>> change to use a single transaction.
>>
>> Signed-off-by: John Garry <john.g.garry@oracle.com>
>> ---
>> fs/xfs/xfs_file.c | 5 ++++-
>> fs/xfs/xfs_reflink.c | 45 ++++++++++++++++++++++++++++++++++++++++++++
>> fs/xfs/xfs_reflink.h | 3 +++
>> 3 files changed, 52 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
>> index 9762fa503a41..243640fe4874 100644
>> --- a/fs/xfs/xfs_file.c
>> +++ b/fs/xfs/xfs_file.c
>> @@ -527,7 +527,10 @@ xfs_dio_write_end_io(
>> nofs_flag = memalloc_nofs_save();
>>
>> if (flags & IOMAP_DIO_COW) {
>> - error = xfs_reflink_end_cow(ip, offset, size);
>> + if (iocb->ki_flags & IOCB_ATOMIC)
>> + error = xfs_reflink_end_atomic_cow(ip, offset, size);
>> + else
>> + error = xfs_reflink_end_cow(ip, offset, size);
>> if (error)
>> goto out;
>> }
>> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
>> index 3dab3ba900a3..d097d33dc000 100644
>> --- a/fs/xfs/xfs_reflink.c
>> +++ b/fs/xfs/xfs_reflink.c
>> @@ -986,6 +986,51 @@ xfs_reflink_end_cow(
>> trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
>> return error;
>> }
>> +int
>> +xfs_reflink_end_atomic_cow(
>> + struct xfs_inode *ip,
>> + xfs_off_t offset,
>> + xfs_off_t count)
>> +{
>> + xfs_fileoff_t offset_fsb;
>> + xfs_fileoff_t end_fsb;
>> + int error = 0;
>> + struct xfs_mount *mp = ip->i_mount;
>> + struct xfs_trans *tp;
>> + unsigned int resblks;
>> +
>> + trace_xfs_reflink_end_cow(ip, offset, count);
>> +
>> + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
>> + end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
>
> Use @mp here instead of walking the pointer.
Yes
>
>> +
>> + resblks = (end_fsb - offset_fsb) *
>> + XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK);
>
> How did you arrive at this computation?
hmmm... you suggested this, but maybe I picked it up incorrectly :)
> The "b" parameter to
> XFS_NEXTENTADD_SPACE_RES is usually the worst case number of mappings
> that you're going to change on this file. I think that quantity is
> (end_fsb - offset_fsb)?
Can you please check this versus what you suggested in
https://lore.kernel.org/linux-xfs/20250206215014.GX21808@frogsfrogsfrogs/#t
>
>> +
>> + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
>> + XFS_TRANS_RESERVE, &tp);
>> + if (error)
>> + return error;
>> +
>> + xfs_ilock(ip, XFS_ILOCK_EXCL);
>> + xfs_trans_ijoin(tp, ip, 0);
>> +
>> + while (end_fsb > offset_fsb && !error)
>> + error = xfs_reflink_end_cow_extent_locked(tp, ip, &offset_fsb,
>> + end_fsb);
>
> Overly long line, and the continuation line only needs to be indented
> two more tabs.
ok
>
>> +
>> + if (error) {
>> + trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
>> + goto out_cancel;
>> + }
>> + error = xfs_trans_commit(tp);
>> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
>> + return 0;
>
> Why is it ok to drop @error here? Shouldn't a transaction commit error
> should be reported to the writer thread?
>
I can fix that, as I should not ignore errors from xfs_trans_commit()
Thanks,
John
On Tue, Feb 25, 2025 at 11:11:45AM +0000, John Garry wrote:
> On 24/02/2025 20:20, Darrick J. Wong wrote:
> > On Thu, Feb 13, 2025 at 01:56:17PM +0000, John Garry wrote:
> > > When completing a CoW-based write, each extent range mapping update is
> > > covered by a separate transaction.
> > >
> > > For a CoW-based atomic write, all mappings must be changed at once, so
> > > change to use a single transaction.
> > >
> > > Signed-off-by: John Garry <john.g.garry@oracle.com>
> > > ---
> > > fs/xfs/xfs_file.c | 5 ++++-
> > > fs/xfs/xfs_reflink.c | 45 ++++++++++++++++++++++++++++++++++++++++++++
> > > fs/xfs/xfs_reflink.h | 3 +++
> > > 3 files changed, 52 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > > index 9762fa503a41..243640fe4874 100644
> > > --- a/fs/xfs/xfs_file.c
> > > +++ b/fs/xfs/xfs_file.c
> > > @@ -527,7 +527,10 @@ xfs_dio_write_end_io(
> > > nofs_flag = memalloc_nofs_save();
> > > if (flags & IOMAP_DIO_COW) {
> > > - error = xfs_reflink_end_cow(ip, offset, size);
> > > + if (iocb->ki_flags & IOCB_ATOMIC)
> > > + error = xfs_reflink_end_atomic_cow(ip, offset, size);
> > > + else
> > > + error = xfs_reflink_end_cow(ip, offset, size);
> > > if (error)
> > > goto out;
> > > }
> > > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> > > index 3dab3ba900a3..d097d33dc000 100644
> > > --- a/fs/xfs/xfs_reflink.c
> > > +++ b/fs/xfs/xfs_reflink.c
> > > @@ -986,6 +986,51 @@ xfs_reflink_end_cow(
> > > trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
> > > return error;
> > > }
> > > +int
> > > +xfs_reflink_end_atomic_cow(
> > > + struct xfs_inode *ip,
> > > + xfs_off_t offset,
> > > + xfs_off_t count)
> > > +{
> > > + xfs_fileoff_t offset_fsb;
> > > + xfs_fileoff_t end_fsb;
> > > + int error = 0;
> > > + struct xfs_mount *mp = ip->i_mount;
> > > + struct xfs_trans *tp;
> > > + unsigned int resblks;
> > > +
> > > + trace_xfs_reflink_end_cow(ip, offset, count);
> > > +
> > > + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
> > > + end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
> >
> > Use @mp here instead of walking the pointer.
>
> Yes
>
> >
> > > +
> > > + resblks = (end_fsb - offset_fsb) *
> > > + XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK);
> >
> > How did you arrive at this computation?
>
> hmmm... you suggested this, but maybe I picked it up incorrectly :)
>
> > The "b" parameter to
> > XFS_NEXTENTADD_SPACE_RES is usually the worst case number of mappings
> > that you're going to change on this file. I think that quantity is
> > (end_fsb - offset_fsb)?
>
> Can you please check this versus what you suggested in
> https://lore.kernel.org/linux-xfs/20250206215014.GX21808@frogsfrogsfrogs/#t
Ah, yeah, that ^^ is correct. This needs a better comment then:
/*
* Each remapping operation could cause a btree split, so in
* the worst case that's one for each block.
*/
resblks = (end_fsb - offset_fsb) *
XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK);
--D
> >
> > > +
> > > + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
> > > + XFS_TRANS_RESERVE, &tp);
> > > + if (error)
> > > + return error;
> > > +
> > > + xfs_ilock(ip, XFS_ILOCK_EXCL);
> > > + xfs_trans_ijoin(tp, ip, 0);
> > > +
> > > + while (end_fsb > offset_fsb && !error)
> > > + error = xfs_reflink_end_cow_extent_locked(tp, ip, &offset_fsb,
> > > + end_fsb);
> >
> > Overly long line, and the continuation line only needs to be indented
> > two more tabs.
>
> ok
>
> >
> > > +
> > > + if (error) {
> > > + trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
> > > + goto out_cancel;
> > > + }
> > > + error = xfs_trans_commit(tp);
> > > + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> > > + return 0;
> >
> > Why is it ok to drop @error here? Shouldn't a transaction commit error
> > should be reported to the writer thread?
> >
>
> I can fix that, as I should not ignore errors from xfs_trans_commit()
>
> Thanks,
> John
>
On 25/02/2025 17:50, Darrick J. Wong wrote: >> Can you please check this versus what you suggested in >> https://lore.kernel.org/linux- >> xfs/20250206215014.GX21808@frogsfrogsfrogs/#t > Ah, yeah, that ^^ is correct. This needs a better comment then: > > /* > * Each remapping operation could cause a btree split, so in > * the worst case that's one for each block. > */ > resblks = (end_fsb - offset_fsb) * > XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK); ok, fine. Cheers
© 2016 - 2025 Red Hat, Inc.