[PATCH RFC 10/10] xfs: Allow block allocator to take an alignment hint

John Garry posted 10 patches 10 months, 1 week ago
There is a newer version of this series
[PATCH RFC 10/10] xfs: Allow block allocator to take an alignment hint
Posted by John Garry 10 months, 1 week ago
When issuing an atomic write by the CoW method, give the block allocator a
hint to naturally align the data blocks.

This means that we have a better chance to issuing the atomic write via
HW offload next time.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 7 ++++++-
 fs/xfs/libxfs/xfs_bmap.h | 6 +++++-
 fs/xfs/xfs_reflink.c     | 8 ++++++--
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 40ad22fb808b..7a3910018dee 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3454,6 +3454,12 @@ xfs_bmap_compute_alignments(
 		align = xfs_get_cowextsz_hint(ap->ip);
 	else if (ap->datatype & XFS_ALLOC_USERDATA)
 		align = xfs_get_extsz_hint(ap->ip);
+
+	if (align > 1 && ap->flags & XFS_BMAPI_NALIGN)
+		args->alignment = align;
+	else
+		args->alignment = 1;
+
 	if (align) {
 		if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
 					ap->eof, 0, ap->conv, &ap->offset,
@@ -3781,7 +3787,6 @@ xfs_bmap_btalloc(
 		.wasdel		= ap->wasdel,
 		.resv		= XFS_AG_RESV_NONE,
 		.datatype	= ap->datatype,
-		.alignment	= 1,
 		.minalignslop	= 0,
 	};
 	xfs_fileoff_t		orig_offset;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 4b721d935994..d68b594c3fa2 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -87,6 +87,9 @@ struct xfs_bmalloca {
 /* Do not update the rmap btree.  Used for reconstructing bmbt from rmapbt. */
 #define XFS_BMAPI_NORMAP	(1u << 10)
 
+/* Try to naturally align allocations */
+#define XFS_BMAPI_NALIGN	(1u << 11)
+
 #define XFS_BMAPI_FLAGS \
 	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
 	{ XFS_BMAPI_METADATA,	"METADATA" }, \
@@ -98,7 +101,8 @@ struct xfs_bmalloca {
 	{ XFS_BMAPI_REMAP,	"REMAP" }, \
 	{ XFS_BMAPI_COWFORK,	"COWFORK" }, \
 	{ XFS_BMAPI_NODISCARD,	"NODISCARD" }, \
-	{ XFS_BMAPI_NORMAP,	"NORMAP" }
+	{ XFS_BMAPI_NORMAP,	"NORMAP" },\
+	{ XFS_BMAPI_NALIGN,	"NALIGN" }
 
 
 static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 60c986300faa..198fb5372f10 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -445,6 +445,11 @@ xfs_reflink_fill_cow_hole(
 	int			nimaps;
 	int			error;
 	bool			found;
+	uint32_t		bmapi_flags = XFS_BMAPI_COWFORK |
+					XFS_BMAPI_PREALLOC;
+
+	if (atomic)
+		bmapi_flags |= XFS_BMAPI_NALIGN;
 
 	resaligned = xfs_aligned_fsb_count(imap->br_startoff,
 		imap->br_blockcount, xfs_get_cowextsz_hint(ip));
@@ -478,8 +483,7 @@ xfs_reflink_fill_cow_hole(
 	/* Allocate the entire reservation as unwritten blocks. */
 	nimaps = 1;
 	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
-			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap,
-			&nimaps);
+			bmapi_flags, 0, cmap, &nimaps);
 	if (error)
 		goto out_trans_cancel;
 
-- 
2.31.1
Re: [PATCH RFC 10/10] xfs: Allow block allocator to take an alignment hint
Posted by Darrick J. Wong 10 months, 1 week ago
On Tue, Feb 04, 2025 at 12:01:27PM +0000, John Garry wrote:
> When issuing an atomic write by the CoW method, give the block allocator a
> hint to naturally align the data blocks.
> 
> This means that we have a better chance to issuing the atomic write via
> HW offload next time.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_bmap.c | 7 ++++++-
>  fs/xfs/libxfs/xfs_bmap.h | 6 +++++-
>  fs/xfs/xfs_reflink.c     | 8 ++++++--
>  3 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index 40ad22fb808b..7a3910018dee 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -3454,6 +3454,12 @@ xfs_bmap_compute_alignments(
>  		align = xfs_get_cowextsz_hint(ap->ip);
>  	else if (ap->datatype & XFS_ALLOC_USERDATA)
>  		align = xfs_get_extsz_hint(ap->ip);
> +
> +	if (align > 1 && ap->flags & XFS_BMAPI_NALIGN)
> +		args->alignment = align;
> +	else
> +		args->alignment = 1;
> +
>  	if (align) {
>  		if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
>  					ap->eof, 0, ap->conv, &ap->offset,
> @@ -3781,7 +3787,6 @@ xfs_bmap_btalloc(
>  		.wasdel		= ap->wasdel,
>  		.resv		= XFS_AG_RESV_NONE,
>  		.datatype	= ap->datatype,
> -		.alignment	= 1,
>  		.minalignslop	= 0,
>  	};
>  	xfs_fileoff_t		orig_offset;
> diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
> index 4b721d935994..d68b594c3fa2 100644
> --- a/fs/xfs/libxfs/xfs_bmap.h
> +++ b/fs/xfs/libxfs/xfs_bmap.h
> @@ -87,6 +87,9 @@ struct xfs_bmalloca {
>  /* Do not update the rmap btree.  Used for reconstructing bmbt from rmapbt. */
>  #define XFS_BMAPI_NORMAP	(1u << 10)
>  
> +/* Try to naturally align allocations */
> +#define XFS_BMAPI_NALIGN	(1u << 11)
> +
>  #define XFS_BMAPI_FLAGS \
>  	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
>  	{ XFS_BMAPI_METADATA,	"METADATA" }, \
> @@ -98,7 +101,8 @@ struct xfs_bmalloca {
>  	{ XFS_BMAPI_REMAP,	"REMAP" }, \
>  	{ XFS_BMAPI_COWFORK,	"COWFORK" }, \
>  	{ XFS_BMAPI_NODISCARD,	"NODISCARD" }, \
> -	{ XFS_BMAPI_NORMAP,	"NORMAP" }
> +	{ XFS_BMAPI_NORMAP,	"NORMAP" },\
> +	{ XFS_BMAPI_NALIGN,	"NALIGN" }

Tihs isn't really "naturally" aligned, is it?  It really means "try to
align allocations to the extent size hint", which isn't required to be a
power of two.

--D

>  
>  
>  static inline int xfs_bmapi_aflag(int w)
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 60c986300faa..198fb5372f10 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -445,6 +445,11 @@ xfs_reflink_fill_cow_hole(
>  	int			nimaps;
>  	int			error;
>  	bool			found;
> +	uint32_t		bmapi_flags = XFS_BMAPI_COWFORK |
> +					XFS_BMAPI_PREALLOC;
> +
> +	if (atomic)
> +		bmapi_flags |= XFS_BMAPI_NALIGN;
>  
>  	resaligned = xfs_aligned_fsb_count(imap->br_startoff,
>  		imap->br_blockcount, xfs_get_cowextsz_hint(ip));
> @@ -478,8 +483,7 @@ xfs_reflink_fill_cow_hole(
>  	/* Allocate the entire reservation as unwritten blocks. */
>  	nimaps = 1;
>  	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
> -			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap,
> -			&nimaps);
> +			bmapi_flags, 0, cmap, &nimaps);
>  	if (error)
>  		goto out_trans_cancel;
>  
> -- 
> 2.31.1
> 
>
Re: [PATCH RFC 10/10] xfs: Allow block allocator to take an alignment hint
Posted by John Garry 10 months, 1 week ago
On 05/02/2025 19:20, Darrick J. Wong wrote:
> On Tue, Feb 04, 2025 at 12:01:27PM +0000, John Garry wrote:
>> When issuing an atomic write by the CoW method, give the block allocator a
>> hint to naturally align the data blocks.
>>
>> This means that we have a better chance to issuing the atomic write via
>> HW offload next time.
>>
>> Signed-off-by: John Garry <john.g.garry@oracle.com>
>> ---
>>   fs/xfs/libxfs/xfs_bmap.c | 7 ++++++-
>>   fs/xfs/libxfs/xfs_bmap.h | 6 +++++-
>>   fs/xfs/xfs_reflink.c     | 8 ++++++--
>>   3 files changed, 17 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
>> index 40ad22fb808b..7a3910018dee 100644
>> --- a/fs/xfs/libxfs/xfs_bmap.c
>> +++ b/fs/xfs/libxfs/xfs_bmap.c
>> @@ -3454,6 +3454,12 @@ xfs_bmap_compute_alignments(
>>   		align = xfs_get_cowextsz_hint(ap->ip);
>>   	else if (ap->datatype & XFS_ALLOC_USERDATA)
>>   		align = xfs_get_extsz_hint(ap->ip);
>> +
>> +	if (align > 1 && ap->flags & XFS_BMAPI_NALIGN)
>> +		args->alignment = align;
>> +	else
>> +		args->alignment = 1;
>> +
>>   	if (align) {
>>   		if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
>>   					ap->eof, 0, ap->conv, &ap->offset,
>> @@ -3781,7 +3787,6 @@ xfs_bmap_btalloc(
>>   		.wasdel		= ap->wasdel,
>>   		.resv		= XFS_AG_RESV_NONE,
>>   		.datatype	= ap->datatype,
>> -		.alignment	= 1,
>>   		.minalignslop	= 0,
>>   	};
>>   	xfs_fileoff_t		orig_offset;
>> diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
>> index 4b721d935994..d68b594c3fa2 100644
>> --- a/fs/xfs/libxfs/xfs_bmap.h
>> +++ b/fs/xfs/libxfs/xfs_bmap.h
>> @@ -87,6 +87,9 @@ struct xfs_bmalloca {
>>   /* Do not update the rmap btree.  Used for reconstructing bmbt from rmapbt. */
>>   #define XFS_BMAPI_NORMAP	(1u << 10)
>>   
>> +/* Try to naturally align allocations */
>> +#define XFS_BMAPI_NALIGN	(1u << 11)
>> +
>>   #define XFS_BMAPI_FLAGS \
>>   	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
>>   	{ XFS_BMAPI_METADATA,	"METADATA" }, \
>> @@ -98,7 +101,8 @@ struct xfs_bmalloca {
>>   	{ XFS_BMAPI_REMAP,	"REMAP" }, \
>>   	{ XFS_BMAPI_COWFORK,	"COWFORK" }, \
>>   	{ XFS_BMAPI_NODISCARD,	"NODISCARD" }, \
>> -	{ XFS_BMAPI_NORMAP,	"NORMAP" }
>> +	{ XFS_BMAPI_NORMAP,	"NORMAP" },\
>> +	{ XFS_BMAPI_NALIGN,	"NALIGN" }
> 
> Tihs isn't really "naturally" aligned, is it?  It really means "try to
> align allocations to the extent size hint", which isn't required to be a
> power of two.

Sure, so I would expect that the user will set extsize/cowextsize 
according to the size what we want to do atomics for, and we can align 
to that. I don't think that it makes a difference that either extsize 
isn't mandated to be a power-of-2.

So then I should rename to XFS_BMAPI_EXTSZALIGN or something like that - ok?

Thanks,
John


> 
> --D
> 
>>   
>>   
>>   static inline int xfs_bmapi_aflag(int w)
>> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
>> index 60c986300faa..198fb5372f10 100644
>> --- a/fs/xfs/xfs_reflink.c
>> +++ b/fs/xfs/xfs_reflink.c
>> @@ -445,6 +445,11 @@ xfs_reflink_fill_cow_hole(
>>   	int			nimaps;
>>   	int			error;
>>   	bool			found;
>> +	uint32_t		bmapi_flags = XFS_BMAPI_COWFORK |
>> +					XFS_BMAPI_PREALLOC;
>> +
>> +	if (atomic)
>> +		bmapi_flags |= XFS_BMAPI_NALIGN;
>>   
>>   	resaligned = xfs_aligned_fsb_count(imap->br_startoff,
>>   		imap->br_blockcount, xfs_get_cowextsz_hint(ip));
>> @@ -478,8 +483,7 @@ xfs_reflink_fill_cow_hole(
>>   	/* Allocate the entire reservation as unwritten blocks. */
>>   	nimaps = 1;
>>   	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
>> -			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap,
>> -			&nimaps);
>> +			bmapi_flags, 0, cmap, &nimaps);
>>   	if (error)
>>   		goto out_trans_cancel;
>>   
>> -- 
>> 2.31.1
>>
>>
Re: [PATCH RFC 10/10] xfs: Allow block allocator to take an alignment hint
Posted by Darrick J. Wong 10 months, 1 week ago
On Thu, Feb 06, 2025 at 08:10:24AM +0000, John Garry wrote:
> On 05/02/2025 19:20, Darrick J. Wong wrote:
> > On Tue, Feb 04, 2025 at 12:01:27PM +0000, John Garry wrote:
> > > When issuing an atomic write by the CoW method, give the block allocator a
> > > hint to naturally align the data blocks.
> > > 
> > > This means that we have a better chance to issuing the atomic write via
> > > HW offload next time.
> > > 
> > > Signed-off-by: John Garry <john.g.garry@oracle.com>
> > > ---
> > >   fs/xfs/libxfs/xfs_bmap.c | 7 ++++++-
> > >   fs/xfs/libxfs/xfs_bmap.h | 6 +++++-
> > >   fs/xfs/xfs_reflink.c     | 8 ++++++--
> > >   3 files changed, 17 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> > > index 40ad22fb808b..7a3910018dee 100644
> > > --- a/fs/xfs/libxfs/xfs_bmap.c
> > > +++ b/fs/xfs/libxfs/xfs_bmap.c
> > > @@ -3454,6 +3454,12 @@ xfs_bmap_compute_alignments(
> > >   		align = xfs_get_cowextsz_hint(ap->ip);
> > >   	else if (ap->datatype & XFS_ALLOC_USERDATA)
> > >   		align = xfs_get_extsz_hint(ap->ip);
> > > +
> > > +	if (align > 1 && ap->flags & XFS_BMAPI_NALIGN)
> > > +		args->alignment = align;
> > > +	else
> > > +		args->alignment = 1;
> > > +
> > >   	if (align) {
> > >   		if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
> > >   					ap->eof, 0, ap->conv, &ap->offset,
> > > @@ -3781,7 +3787,6 @@ xfs_bmap_btalloc(
> > >   		.wasdel		= ap->wasdel,
> > >   		.resv		= XFS_AG_RESV_NONE,
> > >   		.datatype	= ap->datatype,
> > > -		.alignment	= 1,
> > >   		.minalignslop	= 0,
> > >   	};
> > >   	xfs_fileoff_t		orig_offset;
> > > diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
> > > index 4b721d935994..d68b594c3fa2 100644
> > > --- a/fs/xfs/libxfs/xfs_bmap.h
> > > +++ b/fs/xfs/libxfs/xfs_bmap.h
> > > @@ -87,6 +87,9 @@ struct xfs_bmalloca {
> > >   /* Do not update the rmap btree.  Used for reconstructing bmbt from rmapbt. */
> > >   #define XFS_BMAPI_NORMAP	(1u << 10)
> > > +/* Try to naturally align allocations */
> > > +#define XFS_BMAPI_NALIGN	(1u << 11)
> > > +
> > >   #define XFS_BMAPI_FLAGS \
> > >   	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
> > >   	{ XFS_BMAPI_METADATA,	"METADATA" }, \
> > > @@ -98,7 +101,8 @@ struct xfs_bmalloca {
> > >   	{ XFS_BMAPI_REMAP,	"REMAP" }, \
> > >   	{ XFS_BMAPI_COWFORK,	"COWFORK" }, \
> > >   	{ XFS_BMAPI_NODISCARD,	"NODISCARD" }, \
> > > -	{ XFS_BMAPI_NORMAP,	"NORMAP" }
> > > +	{ XFS_BMAPI_NORMAP,	"NORMAP" },\
> > > +	{ XFS_BMAPI_NALIGN,	"NALIGN" }
> > 
> > Tihs isn't really "naturally" aligned, is it?  It really means "try to
> > align allocations to the extent size hint", which isn't required to be a
> > power of two.
> 
> Sure, so I would expect that the user will set extsize/cowextsize according
> to the size what we want to do atomics for, and we can align to that. I
> don't think that it makes a difference that either extsize isn't mandated to
> be a power-of-2.
> 
> So then I should rename to XFS_BMAPI_EXTSZALIGN or something like that - ok?

Yep.

--D

> Thanks,
> John
> 
> 
> > 
> > --D
> > 
> > >   static inline int xfs_bmapi_aflag(int w)
> > > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> > > index 60c986300faa..198fb5372f10 100644
> > > --- a/fs/xfs/xfs_reflink.c
> > > +++ b/fs/xfs/xfs_reflink.c
> > > @@ -445,6 +445,11 @@ xfs_reflink_fill_cow_hole(
> > >   	int			nimaps;
> > >   	int			error;
> > >   	bool			found;
> > > +	uint32_t		bmapi_flags = XFS_BMAPI_COWFORK |
> > > +					XFS_BMAPI_PREALLOC;
> > > +
> > > +	if (atomic)
> > > +		bmapi_flags |= XFS_BMAPI_NALIGN;
> > >   	resaligned = xfs_aligned_fsb_count(imap->br_startoff,
> > >   		imap->br_blockcount, xfs_get_cowextsz_hint(ip));
> > > @@ -478,8 +483,7 @@ xfs_reflink_fill_cow_hole(
> > >   	/* Allocate the entire reservation as unwritten blocks. */
> > >   	nimaps = 1;
> > >   	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
> > > -			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap,
> > > -			&nimaps);
> > > +			bmapi_flags, 0, cmap, &nimaps);
> > >   	if (error)
> > >   		goto out_trans_cancel;
> > > -- 
> > > 2.31.1
> > > 
> > > 
> 
>