When issuing an atomic write by the CoW method, give the block allocator a
hint to naturally align the data blocks.
This means that we have a better chance to issuing the atomic write via
HW offload next time.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
fs/xfs/libxfs/xfs_bmap.c | 7 ++++++-
fs/xfs/libxfs/xfs_bmap.h | 6 +++++-
fs/xfs/xfs_reflink.c | 8 ++++++--
3 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 40ad22fb808b..7a3910018dee 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3454,6 +3454,12 @@ xfs_bmap_compute_alignments(
align = xfs_get_cowextsz_hint(ap->ip);
else if (ap->datatype & XFS_ALLOC_USERDATA)
align = xfs_get_extsz_hint(ap->ip);
+
+ if (align > 1 && ap->flags & XFS_BMAPI_NALIGN)
+ args->alignment = align;
+ else
+ args->alignment = 1;
+
if (align) {
if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
ap->eof, 0, ap->conv, &ap->offset,
@@ -3781,7 +3787,6 @@ xfs_bmap_btalloc(
.wasdel = ap->wasdel,
.resv = XFS_AG_RESV_NONE,
.datatype = ap->datatype,
- .alignment = 1,
.minalignslop = 0,
};
xfs_fileoff_t orig_offset;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 4b721d935994..d68b594c3fa2 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -87,6 +87,9 @@ struct xfs_bmalloca {
/* Do not update the rmap btree. Used for reconstructing bmbt from rmapbt. */
#define XFS_BMAPI_NORMAP (1u << 10)
+/* Try to naturally align allocations */
+#define XFS_BMAPI_NALIGN (1u << 11)
+
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \
@@ -98,7 +101,8 @@ struct xfs_bmalloca {
{ XFS_BMAPI_REMAP, "REMAP" }, \
{ XFS_BMAPI_COWFORK, "COWFORK" }, \
{ XFS_BMAPI_NODISCARD, "NODISCARD" }, \
- { XFS_BMAPI_NORMAP, "NORMAP" }
+ { XFS_BMAPI_NORMAP, "NORMAP" },\
+ { XFS_BMAPI_NALIGN, "NALIGN" }
static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 60c986300faa..198fb5372f10 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -445,6 +445,11 @@ xfs_reflink_fill_cow_hole(
int nimaps;
int error;
bool found;
+ uint32_t bmapi_flags = XFS_BMAPI_COWFORK |
+ XFS_BMAPI_PREALLOC;
+
+ if (atomic)
+ bmapi_flags |= XFS_BMAPI_NALIGN;
resaligned = xfs_aligned_fsb_count(imap->br_startoff,
imap->br_blockcount, xfs_get_cowextsz_hint(ip));
@@ -478,8 +483,7 @@ xfs_reflink_fill_cow_hole(
/* Allocate the entire reservation as unwritten blocks. */
nimaps = 1;
error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
- XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap,
- &nimaps);
+ bmapi_flags, 0, cmap, &nimaps);
if (error)
goto out_trans_cancel;
--
2.31.1
On Tue, Feb 04, 2025 at 12:01:27PM +0000, John Garry wrote:
> When issuing an atomic write by the CoW method, give the block allocator a
> hint to naturally align the data blocks.
>
> This means that we have a better chance to issuing the atomic write via
> HW offload next time.
>
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
> fs/xfs/libxfs/xfs_bmap.c | 7 ++++++-
> fs/xfs/libxfs/xfs_bmap.h | 6 +++++-
> fs/xfs/xfs_reflink.c | 8 ++++++--
> 3 files changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index 40ad22fb808b..7a3910018dee 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -3454,6 +3454,12 @@ xfs_bmap_compute_alignments(
> align = xfs_get_cowextsz_hint(ap->ip);
> else if (ap->datatype & XFS_ALLOC_USERDATA)
> align = xfs_get_extsz_hint(ap->ip);
> +
> + if (align > 1 && ap->flags & XFS_BMAPI_NALIGN)
> + args->alignment = align;
> + else
> + args->alignment = 1;
> +
> if (align) {
> if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
> ap->eof, 0, ap->conv, &ap->offset,
> @@ -3781,7 +3787,6 @@ xfs_bmap_btalloc(
> .wasdel = ap->wasdel,
> .resv = XFS_AG_RESV_NONE,
> .datatype = ap->datatype,
> - .alignment = 1,
> .minalignslop = 0,
> };
> xfs_fileoff_t orig_offset;
> diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
> index 4b721d935994..d68b594c3fa2 100644
> --- a/fs/xfs/libxfs/xfs_bmap.h
> +++ b/fs/xfs/libxfs/xfs_bmap.h
> @@ -87,6 +87,9 @@ struct xfs_bmalloca {
> /* Do not update the rmap btree. Used for reconstructing bmbt from rmapbt. */
> #define XFS_BMAPI_NORMAP (1u << 10)
>
> +/* Try to naturally align allocations */
> +#define XFS_BMAPI_NALIGN (1u << 11)
> +
> #define XFS_BMAPI_FLAGS \
> { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
> { XFS_BMAPI_METADATA, "METADATA" }, \
> @@ -98,7 +101,8 @@ struct xfs_bmalloca {
> { XFS_BMAPI_REMAP, "REMAP" }, \
> { XFS_BMAPI_COWFORK, "COWFORK" }, \
> { XFS_BMAPI_NODISCARD, "NODISCARD" }, \
> - { XFS_BMAPI_NORMAP, "NORMAP" }
> + { XFS_BMAPI_NORMAP, "NORMAP" },\
> + { XFS_BMAPI_NALIGN, "NALIGN" }
Tihs isn't really "naturally" aligned, is it? It really means "try to
align allocations to the extent size hint", which isn't required to be a
power of two.
--D
>
>
> static inline int xfs_bmapi_aflag(int w)
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 60c986300faa..198fb5372f10 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -445,6 +445,11 @@ xfs_reflink_fill_cow_hole(
> int nimaps;
> int error;
> bool found;
> + uint32_t bmapi_flags = XFS_BMAPI_COWFORK |
> + XFS_BMAPI_PREALLOC;
> +
> + if (atomic)
> + bmapi_flags |= XFS_BMAPI_NALIGN;
>
> resaligned = xfs_aligned_fsb_count(imap->br_startoff,
> imap->br_blockcount, xfs_get_cowextsz_hint(ip));
> @@ -478,8 +483,7 @@ xfs_reflink_fill_cow_hole(
> /* Allocate the entire reservation as unwritten blocks. */
> nimaps = 1;
> error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
> - XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap,
> - &nimaps);
> + bmapi_flags, 0, cmap, &nimaps);
> if (error)
> goto out_trans_cancel;
>
> --
> 2.31.1
>
>
On 05/02/2025 19:20, Darrick J. Wong wrote:
> On Tue, Feb 04, 2025 at 12:01:27PM +0000, John Garry wrote:
>> When issuing an atomic write by the CoW method, give the block allocator a
>> hint to naturally align the data blocks.
>>
>> This means that we have a better chance to issuing the atomic write via
>> HW offload next time.
>>
>> Signed-off-by: John Garry <john.g.garry@oracle.com>
>> ---
>> fs/xfs/libxfs/xfs_bmap.c | 7 ++++++-
>> fs/xfs/libxfs/xfs_bmap.h | 6 +++++-
>> fs/xfs/xfs_reflink.c | 8 ++++++--
>> 3 files changed, 17 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
>> index 40ad22fb808b..7a3910018dee 100644
>> --- a/fs/xfs/libxfs/xfs_bmap.c
>> +++ b/fs/xfs/libxfs/xfs_bmap.c
>> @@ -3454,6 +3454,12 @@ xfs_bmap_compute_alignments(
>> align = xfs_get_cowextsz_hint(ap->ip);
>> else if (ap->datatype & XFS_ALLOC_USERDATA)
>> align = xfs_get_extsz_hint(ap->ip);
>> +
>> + if (align > 1 && ap->flags & XFS_BMAPI_NALIGN)
>> + args->alignment = align;
>> + else
>> + args->alignment = 1;
>> +
>> if (align) {
>> if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
>> ap->eof, 0, ap->conv, &ap->offset,
>> @@ -3781,7 +3787,6 @@ xfs_bmap_btalloc(
>> .wasdel = ap->wasdel,
>> .resv = XFS_AG_RESV_NONE,
>> .datatype = ap->datatype,
>> - .alignment = 1,
>> .minalignslop = 0,
>> };
>> xfs_fileoff_t orig_offset;
>> diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
>> index 4b721d935994..d68b594c3fa2 100644
>> --- a/fs/xfs/libxfs/xfs_bmap.h
>> +++ b/fs/xfs/libxfs/xfs_bmap.h
>> @@ -87,6 +87,9 @@ struct xfs_bmalloca {
>> /* Do not update the rmap btree. Used for reconstructing bmbt from rmapbt. */
>> #define XFS_BMAPI_NORMAP (1u << 10)
>>
>> +/* Try to naturally align allocations */
>> +#define XFS_BMAPI_NALIGN (1u << 11)
>> +
>> #define XFS_BMAPI_FLAGS \
>> { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
>> { XFS_BMAPI_METADATA, "METADATA" }, \
>> @@ -98,7 +101,8 @@ struct xfs_bmalloca {
>> { XFS_BMAPI_REMAP, "REMAP" }, \
>> { XFS_BMAPI_COWFORK, "COWFORK" }, \
>> { XFS_BMAPI_NODISCARD, "NODISCARD" }, \
>> - { XFS_BMAPI_NORMAP, "NORMAP" }
>> + { XFS_BMAPI_NORMAP, "NORMAP" },\
>> + { XFS_BMAPI_NALIGN, "NALIGN" }
>
> Tihs isn't really "naturally" aligned, is it? It really means "try to
> align allocations to the extent size hint", which isn't required to be a
> power of two.
Sure, so I would expect that the user will set extsize/cowextsize
according to the size what we want to do atomics for, and we can align
to that. I don't think that it makes a difference that either extsize
isn't mandated to be a power-of-2.
So then I should rename to XFS_BMAPI_EXTSZALIGN or something like that - ok?
Thanks,
John
>
> --D
>
>>
>>
>> static inline int xfs_bmapi_aflag(int w)
>> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
>> index 60c986300faa..198fb5372f10 100644
>> --- a/fs/xfs/xfs_reflink.c
>> +++ b/fs/xfs/xfs_reflink.c
>> @@ -445,6 +445,11 @@ xfs_reflink_fill_cow_hole(
>> int nimaps;
>> int error;
>> bool found;
>> + uint32_t bmapi_flags = XFS_BMAPI_COWFORK |
>> + XFS_BMAPI_PREALLOC;
>> +
>> + if (atomic)
>> + bmapi_flags |= XFS_BMAPI_NALIGN;
>>
>> resaligned = xfs_aligned_fsb_count(imap->br_startoff,
>> imap->br_blockcount, xfs_get_cowextsz_hint(ip));
>> @@ -478,8 +483,7 @@ xfs_reflink_fill_cow_hole(
>> /* Allocate the entire reservation as unwritten blocks. */
>> nimaps = 1;
>> error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
>> - XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap,
>> - &nimaps);
>> + bmapi_flags, 0, cmap, &nimaps);
>> if (error)
>> goto out_trans_cancel;
>>
>> --
>> 2.31.1
>>
>>
On Thu, Feb 06, 2025 at 08:10:24AM +0000, John Garry wrote:
> On 05/02/2025 19:20, Darrick J. Wong wrote:
> > On Tue, Feb 04, 2025 at 12:01:27PM +0000, John Garry wrote:
> > > When issuing an atomic write by the CoW method, give the block allocator a
> > > hint to naturally align the data blocks.
> > >
> > > This means that we have a better chance to issuing the atomic write via
> > > HW offload next time.
> > >
> > > Signed-off-by: John Garry <john.g.garry@oracle.com>
> > > ---
> > > fs/xfs/libxfs/xfs_bmap.c | 7 ++++++-
> > > fs/xfs/libxfs/xfs_bmap.h | 6 +++++-
> > > fs/xfs/xfs_reflink.c | 8 ++++++--
> > > 3 files changed, 17 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> > > index 40ad22fb808b..7a3910018dee 100644
> > > --- a/fs/xfs/libxfs/xfs_bmap.c
> > > +++ b/fs/xfs/libxfs/xfs_bmap.c
> > > @@ -3454,6 +3454,12 @@ xfs_bmap_compute_alignments(
> > > align = xfs_get_cowextsz_hint(ap->ip);
> > > else if (ap->datatype & XFS_ALLOC_USERDATA)
> > > align = xfs_get_extsz_hint(ap->ip);
> > > +
> > > + if (align > 1 && ap->flags & XFS_BMAPI_NALIGN)
> > > + args->alignment = align;
> > > + else
> > > + args->alignment = 1;
> > > +
> > > if (align) {
> > > if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
> > > ap->eof, 0, ap->conv, &ap->offset,
> > > @@ -3781,7 +3787,6 @@ xfs_bmap_btalloc(
> > > .wasdel = ap->wasdel,
> > > .resv = XFS_AG_RESV_NONE,
> > > .datatype = ap->datatype,
> > > - .alignment = 1,
> > > .minalignslop = 0,
> > > };
> > > xfs_fileoff_t orig_offset;
> > > diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
> > > index 4b721d935994..d68b594c3fa2 100644
> > > --- a/fs/xfs/libxfs/xfs_bmap.h
> > > +++ b/fs/xfs/libxfs/xfs_bmap.h
> > > @@ -87,6 +87,9 @@ struct xfs_bmalloca {
> > > /* Do not update the rmap btree. Used for reconstructing bmbt from rmapbt. */
> > > #define XFS_BMAPI_NORMAP (1u << 10)
> > > +/* Try to naturally align allocations */
> > > +#define XFS_BMAPI_NALIGN (1u << 11)
> > > +
> > > #define XFS_BMAPI_FLAGS \
> > > { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
> > > { XFS_BMAPI_METADATA, "METADATA" }, \
> > > @@ -98,7 +101,8 @@ struct xfs_bmalloca {
> > > { XFS_BMAPI_REMAP, "REMAP" }, \
> > > { XFS_BMAPI_COWFORK, "COWFORK" }, \
> > > { XFS_BMAPI_NODISCARD, "NODISCARD" }, \
> > > - { XFS_BMAPI_NORMAP, "NORMAP" }
> > > + { XFS_BMAPI_NORMAP, "NORMAP" },\
> > > + { XFS_BMAPI_NALIGN, "NALIGN" }
> >
> > Tihs isn't really "naturally" aligned, is it? It really means "try to
> > align allocations to the extent size hint", which isn't required to be a
> > power of two.
>
> Sure, so I would expect that the user will set extsize/cowextsize according
> to the size what we want to do atomics for, and we can align to that. I
> don't think that it makes a difference that either extsize isn't mandated to
> be a power-of-2.
>
> So then I should rename to XFS_BMAPI_EXTSZALIGN or something like that - ok?
Yep.
--D
> Thanks,
> John
>
>
> >
> > --D
> >
> > > static inline int xfs_bmapi_aflag(int w)
> > > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> > > index 60c986300faa..198fb5372f10 100644
> > > --- a/fs/xfs/xfs_reflink.c
> > > +++ b/fs/xfs/xfs_reflink.c
> > > @@ -445,6 +445,11 @@ xfs_reflink_fill_cow_hole(
> > > int nimaps;
> > > int error;
> > > bool found;
> > > + uint32_t bmapi_flags = XFS_BMAPI_COWFORK |
> > > + XFS_BMAPI_PREALLOC;
> > > +
> > > + if (atomic)
> > > + bmapi_flags |= XFS_BMAPI_NALIGN;
> > > resaligned = xfs_aligned_fsb_count(imap->br_startoff,
> > > imap->br_blockcount, xfs_get_cowextsz_hint(ip));
> > > @@ -478,8 +483,7 @@ xfs_reflink_fill_cow_hole(
> > > /* Allocate the entire reservation as unwritten blocks. */
> > > nimaps = 1;
> > > error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
> > > - XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap,
> > > - &nimaps);
> > > + bmapi_flags, 0, cmap, &nimaps);
> > > if (error)
> > > goto out_trans_cancel;
> > > --
> > > 2.31.1
> > >
> > >
>
>
© 2016 - 2025 Red Hat, Inc.