[v7] enable bs > ps in XFS

[PATCH v7 03/11] filemap: allocate mapping_min_order folios in the page cache

Posted by Pankaj Raghav (Samsung) 1 year, 8 months ago

From: Pankaj Raghav <p.raghav@samsung.com>

filemap_create_folio() and do_read_cache_folio() were always allocating
folio of order 0. __filemap_get_folio was trying to allocate higher
order folios when fgp_flags had higher order hint set but it will default
to order 0 folio if higher order memory allocation fails.

Supporting mapping_min_order implies that we guarantee each folio in the
page cache has at least an order of mapping_min_order. When adding new
folios to the page cache we must also ensure the index used is aligned to
the mapping_min_order as the page cache requires the index to be aligned
to the order of the folio.

Co-developed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
---
 include/linux/pagemap.h | 20 ++++++++++++++++++++
 mm/filemap.c            | 26 ++++++++++++++++++--------
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 228275e7049f..899b8d751768 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -439,6 +439,26 @@ unsigned int mapping_min_folio_order(const struct address_space *mapping)
 	return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN;
 }
 
+static inline unsigned long mapping_min_folio_nrpages(struct address_space *mapping)
+{
+	return 1UL << mapping_min_folio_order(mapping);
+}
+
+/**
+ * mapping_align_start_index() - Align starting index based on the min
+ * folio order of the page cache.
+ * @mapping: The address_space.
+ *
+ * Ensure the index used is aligned to the minimum folio order when adding
+ * new folios to the page cache by rounding down to the nearest minimum
+ * folio number of pages.
+ */
+static inline pgoff_t mapping_align_start_index(struct address_space *mapping,
+						pgoff_t index)
+{
+	return round_down(index, mapping_min_folio_nrpages(mapping));
+}
+
 /*
  * Large folio support currently depends on THP.  These dependencies are
  * being worked on but are not yet fixed.
diff --git a/mm/filemap.c b/mm/filemap.c
index 46c7a6f59788..8bb0d2bc93c5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -859,6 +859,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
 
 	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
 	VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
+	VM_BUG_ON_FOLIO(folio_order(folio) < mapping_min_folio_order(mapping),
+			folio);
 	mapping_set_update(&xas, mapping);
 
 	VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
@@ -1919,8 +1921,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 		folio_wait_stable(folio);
 no_page:
 	if (!folio && (fgp_flags & FGP_CREAT)) {
-		unsigned order = FGF_GET_ORDER(fgp_flags);
+		unsigned int min_order = mapping_min_folio_order(mapping);
+		unsigned int order = max(min_order, FGF_GET_ORDER(fgp_flags));
 		int err;
+		index = mapping_align_start_index(mapping, index);
 
 		if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
 			gfp |= __GFP_WRITE;
@@ -1943,7 +1947,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 			gfp_t alloc_gfp = gfp;
 
 			err = -ENOMEM;
-			if (order > 0)
+			if (order > min_order)
 				alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
 			folio = filemap_alloc_folio(alloc_gfp, order);
 			if (!folio)
@@ -1958,7 +1962,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 				break;
 			folio_put(folio);
 			folio = NULL;
-		} while (order-- > 0);
+		} while (order-- > min_order);
 
 		if (err == -EEXIST)
 			goto repeat;
@@ -2447,13 +2451,16 @@ static int filemap_update_page(struct kiocb *iocb,
 }
 
 static int filemap_create_folio(struct file *file,
-		struct address_space *mapping, pgoff_t index,
+		struct address_space *mapping, loff_t pos,
 		struct folio_batch *fbatch)
 {
 	struct folio *folio;
 	int error;
+	unsigned int min_order = mapping_min_folio_order(mapping);
+	pgoff_t index;
 
-	folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
+	folio = filemap_alloc_folio(mapping_gfp_mask(mapping),
+				    min_order);
 	if (!folio)
 		return -ENOMEM;
 
@@ -2471,6 +2478,8 @@ static int filemap_create_folio(struct file *file,
 	 * well to keep locking rules simple.
 	 */
 	filemap_invalidate_lock_shared(mapping);
+	/* index in PAGE units but aligned to min_order number of pages. */
+	index = (pos >> (PAGE_SHIFT + min_order)) << min_order;
 	error = filemap_add_folio(mapping, folio, index,
 			mapping_gfp_constraint(mapping, GFP_KERNEL));
 	if (error == -EEXIST)
@@ -2531,8 +2540,7 @@ static int filemap_get_pages(struct kiocb *iocb, size_t count,
 	if (!folio_batch_count(fbatch)) {
 		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
 			return -EAGAIN;
-		err = filemap_create_folio(filp, mapping,
-				iocb->ki_pos >> PAGE_SHIFT, fbatch);
+		err = filemap_create_folio(filp, mapping, iocb->ki_pos, fbatch);
 		if (err == AOP_TRUNCATED_PAGE)
 			goto retry;
 		return err;
@@ -3748,9 +3756,11 @@ static struct folio *do_read_cache_folio(struct address_space *mapping,
 repeat:
 	folio = filemap_get_folio(mapping, index);
 	if (IS_ERR(folio)) {
-		folio = filemap_alloc_folio(gfp, 0);
+		folio = filemap_alloc_folio(gfp,
+					    mapping_min_folio_order(mapping));
 		if (!folio)
 			return ERR_PTR(-ENOMEM);
+		index = mapping_align_start_index(mapping, index);
 		err = filemap_add_folio(mapping, folio, index, gfp);
 		if (unlikely(err)) {
 			folio_put(folio);
-- 
2.44.1

Re: [PATCH v7 03/11] filemap: allocate mapping_min_order folios in the page cache

Posted by Christoph Hellwig 1 year, 8 months ago

On Fri, Jun 07, 2024 at 02:58:54PM +0000, Pankaj Raghav (Samsung) wrote:
> +static inline unsigned long mapping_min_folio_nrpages(struct address_space *mapping)
> +{
> +	return 1UL << mapping_min_folio_order(mapping);
> +}

Overly long line here, just line break after the return type.

Then again it only has a single user just below and no documentation
so maybe just fold it into the caller?

>  no_page:
>  	if (!folio && (fgp_flags & FGP_CREAT)) {
> -		unsigned order = FGF_GET_ORDER(fgp_flags);
> +		unsigned int min_order = mapping_min_folio_order(mapping);
> +		unsigned int order = max(min_order, FGF_GET_ORDER(fgp_flags));
>  		int err;
> +		index = mapping_align_start_index(mapping, index);

I wonder if at some point splitting this block that actually allocates
a new folio into a separate helper would be nice.  It just keep growing
in size and complexity.

> -	folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
> +	folio = filemap_alloc_folio(mapping_gfp_mask(mapping),
> +				    min_order);

Nit: no need to split this into multiple lines.

>  	if (!folio)
>  		return -ENOMEM;
>  
> @@ -2471,6 +2478,8 @@ static int filemap_create_folio(struct file *file,
>  	 * well to keep locking rules simple.
>  	 */
>  	filemap_invalidate_lock_shared(mapping);
> +	/* index in PAGE units but aligned to min_order number of pages. */

in PAGE_SIZE units?  Maybe also make this a complete sentence?

Re: [PATCH v7 03/11] filemap: allocate mapping_min_order folios in the page cache

Posted by Pankaj Raghav (Samsung) 1 year, 8 months ago

On Thu, Jun 13, 2024 at 10:44:10AM +0200, Christoph Hellwig wrote:
> On Fri, Jun 07, 2024 at 02:58:54PM +0000, Pankaj Raghav (Samsung) wrote:
> > +static inline unsigned long mapping_min_folio_nrpages(struct address_space *mapping)
> > +{
> > +	return 1UL << mapping_min_folio_order(mapping);
> > +}
> 
> Overly long line here, just line break after the return type.
> 
> Then again it only has a single user just below and no documentation
> so maybe just fold it into the caller?

I do use it in later patches. I will adjust the long line here :)

> 
> >  no_page:
> >  	if (!folio && (fgp_flags & FGP_CREAT)) {
> > -		unsigned order = FGF_GET_ORDER(fgp_flags);
> > +		unsigned int min_order = mapping_min_folio_order(mapping);
> > +		unsigned int order = max(min_order, FGF_GET_ORDER(fgp_flags));
> >  		int err;
> > +		index = mapping_align_start_index(mapping, index);
> 
> I wonder if at some point splitting this block that actually allocates
> a new folio into a separate helper would be nice.  It just keep growing
> in size and complexity.
> 

I agree with that. I will put it in my future todo backlog.

> > -	folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
> > +	folio = filemap_alloc_folio(mapping_gfp_mask(mapping),
> > +				    min_order);
> 
> Nit: no need to split this into multiple lines.

Ok.
> 
> >  	if (!folio)
> >  		return -ENOMEM;
> >  
> > @@ -2471,6 +2478,8 @@ static int filemap_create_folio(struct file *file,
> >  	 * well to keep locking rules simple.
> >  	 */
> >  	filemap_invalidate_lock_shared(mapping);
> > +	/* index in PAGE units but aligned to min_order number of pages. */
> 
> in PAGE_SIZE units?  Maybe also make this a complete sentence?
Yes, will do.
>

Re: [PATCH v7 03/11] filemap: allocate mapping_min_order folios in the page cache

Posted by Matthew Wilcox 1 year, 8 months ago

On Mon, Jun 17, 2024 at 09:58:37AM +0000, Pankaj Raghav (Samsung) wrote:
> > > @@ -2471,6 +2478,8 @@ static int filemap_create_folio(struct file *file,
> > >  	 * well to keep locking rules simple.
> > >  	 */
> > >  	filemap_invalidate_lock_shared(mapping);
> > > +	/* index in PAGE units but aligned to min_order number of pages. */
> > 
> > in PAGE_SIZE units?  Maybe also make this a complete sentence?
> Yes, will do.

I'd delete the comment entirely.  Anyone working on this code should
already know that folio indices are in units of PAGE_SIZE and must
be aligned, so I'm not sure what value this comment adds.

Re: [PATCH v7 03/11] filemap: allocate mapping_min_order folios in the page cache

Posted by Matthew Wilcox 1 year, 8 months ago

On Fri, Jun 07, 2024 at 02:58:54PM +0000, Pankaj Raghav (Samsung) wrote:
> +/**
> + * mapping_align_start_index() - Align starting index based on the min
> + * folio order of the page cache.

_short_ description.  "Align index appropriately for this mapping".
And maybe that means we should call it "mapping_align_index" instead
of mapping_align_start_index?

> + * @mapping: The address_space.
> + *
> + * Ensure the index used is aligned to the minimum folio order when adding
> + * new folios to the page cache by rounding down to the nearest minimum
> + * folio number of pages.

How about:

 * The index of a folio must be naturally aligned.  If you are adding a
 * new folio to the page cache and need to know what index to give it,
 * call this function.

Re: [PATCH v7 03/11] filemap: allocate mapping_min_order folios in the page cache

Posted by Darrick J. Wong 1 year, 8 months ago

On Fri, Jun 07, 2024 at 02:58:54PM +0000, Pankaj Raghav (Samsung) wrote:
> From: Pankaj Raghav <p.raghav@samsung.com>
> 
> filemap_create_folio() and do_read_cache_folio() were always allocating
> folio of order 0. __filemap_get_folio was trying to allocate higher
> order folios when fgp_flags had higher order hint set but it will default
> to order 0 folio if higher order memory allocation fails.
> 
> Supporting mapping_min_order implies that we guarantee each folio in the
> page cache has at least an order of mapping_min_order. When adding new
> folios to the page cache we must also ensure the index used is aligned to
> the mapping_min_order as the page cache requires the index to be aligned
> to the order of the folio.
> 
> Co-developed-by: Luis Chamberlain <mcgrof@kernel.org>
> Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>

Seems pretty straightforward, so
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

> ---
>  include/linux/pagemap.h | 20 ++++++++++++++++++++
>  mm/filemap.c            | 26 ++++++++++++++++++--------
>  2 files changed, 38 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index 228275e7049f..899b8d751768 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -439,6 +439,26 @@ unsigned int mapping_min_folio_order(const struct address_space *mapping)
>  	return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN;
>  }
>  
> +static inline unsigned long mapping_min_folio_nrpages(struct address_space *mapping)
> +{
> +	return 1UL << mapping_min_folio_order(mapping);
> +}
> +
> +/**
> + * mapping_align_start_index() - Align starting index based on the min
> + * folio order of the page cache.
> + * @mapping: The address_space.
> + *
> + * Ensure the index used is aligned to the minimum folio order when adding
> + * new folios to the page cache by rounding down to the nearest minimum
> + * folio number of pages.
> + */
> +static inline pgoff_t mapping_align_start_index(struct address_space *mapping,
> +						pgoff_t index)
> +{
> +	return round_down(index, mapping_min_folio_nrpages(mapping));
> +}
> +
>  /*
>   * Large folio support currently depends on THP.  These dependencies are
>   * being worked on but are not yet fixed.
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 46c7a6f59788..8bb0d2bc93c5 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -859,6 +859,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
>  
>  	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
>  	VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
> +	VM_BUG_ON_FOLIO(folio_order(folio) < mapping_min_folio_order(mapping),
> +			folio);
>  	mapping_set_update(&xas, mapping);
>  
>  	VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
> @@ -1919,8 +1921,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>  		folio_wait_stable(folio);
>  no_page:
>  	if (!folio && (fgp_flags & FGP_CREAT)) {
> -		unsigned order = FGF_GET_ORDER(fgp_flags);
> +		unsigned int min_order = mapping_min_folio_order(mapping);
> +		unsigned int order = max(min_order, FGF_GET_ORDER(fgp_flags));
>  		int err;
> +		index = mapping_align_start_index(mapping, index);
>  
>  		if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
>  			gfp |= __GFP_WRITE;
> @@ -1943,7 +1947,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>  			gfp_t alloc_gfp = gfp;
>  
>  			err = -ENOMEM;
> -			if (order > 0)
> +			if (order > min_order)
>  				alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
>  			folio = filemap_alloc_folio(alloc_gfp, order);
>  			if (!folio)
> @@ -1958,7 +1962,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>  				break;
>  			folio_put(folio);
>  			folio = NULL;
> -		} while (order-- > 0);
> +		} while (order-- > min_order);
>  
>  		if (err == -EEXIST)
>  			goto repeat;
> @@ -2447,13 +2451,16 @@ static int filemap_update_page(struct kiocb *iocb,
>  }
>  
>  static int filemap_create_folio(struct file *file,
> -		struct address_space *mapping, pgoff_t index,
> +		struct address_space *mapping, loff_t pos,
>  		struct folio_batch *fbatch)
>  {
>  	struct folio *folio;
>  	int error;
> +	unsigned int min_order = mapping_min_folio_order(mapping);
> +	pgoff_t index;
>  
> -	folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
> +	folio = filemap_alloc_folio(mapping_gfp_mask(mapping),
> +				    min_order);
>  	if (!folio)
>  		return -ENOMEM;
>  
> @@ -2471,6 +2478,8 @@ static int filemap_create_folio(struct file *file,
>  	 * well to keep locking rules simple.
>  	 */
>  	filemap_invalidate_lock_shared(mapping);
> +	/* index in PAGE units but aligned to min_order number of pages. */
> +	index = (pos >> (PAGE_SHIFT + min_order)) << min_order;
>  	error = filemap_add_folio(mapping, folio, index,
>  			mapping_gfp_constraint(mapping, GFP_KERNEL));
>  	if (error == -EEXIST)
> @@ -2531,8 +2540,7 @@ static int filemap_get_pages(struct kiocb *iocb, size_t count,
>  	if (!folio_batch_count(fbatch)) {
>  		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
>  			return -EAGAIN;
> -		err = filemap_create_folio(filp, mapping,
> -				iocb->ki_pos >> PAGE_SHIFT, fbatch);
> +		err = filemap_create_folio(filp, mapping, iocb->ki_pos, fbatch);
>  		if (err == AOP_TRUNCATED_PAGE)
>  			goto retry;
>  		return err;
> @@ -3748,9 +3756,11 @@ static struct folio *do_read_cache_folio(struct address_space *mapping,
>  repeat:
>  	folio = filemap_get_folio(mapping, index);
>  	if (IS_ERR(folio)) {
> -		folio = filemap_alloc_folio(gfp, 0);
> +		folio = filemap_alloc_folio(gfp,
> +					    mapping_min_folio_order(mapping));
>  		if (!folio)
>  			return ERR_PTR(-ENOMEM);
> +		index = mapping_align_start_index(mapping, index);
>  		err = filemap_add_folio(mapping, folio, index, gfp);
>  		if (unlikely(err)) {
>  			folio_put(folio);
> -- 
> 2.44.1
> 
>

Re: [PATCH v7 03/11] filemap: allocate mapping_min_order folios in the page cache

Posted by Hannes Reinecke 1 year, 8 months ago

On 6/7/24 16:58, Pankaj Raghav (Samsung) wrote:
> From: Pankaj Raghav <p.raghav@samsung.com>
> 
> filemap_create_folio() and do_read_cache_folio() were always allocating
> folio of order 0. __filemap_get_folio was trying to allocate higher
> order folios when fgp_flags had higher order hint set but it will default
> to order 0 folio if higher order memory allocation fails.
> 
> Supporting mapping_min_order implies that we guarantee each folio in the
> page cache has at least an order of mapping_min_order. When adding new
> folios to the page cache we must also ensure the index used is aligned to
> the mapping_min_order as the page cache requires the index to be aligned
> to the order of the folio.
> 
> Co-developed-by: Luis Chamberlain <mcgrof@kernel.org>
> Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
> ---
>   include/linux/pagemap.h | 20 ++++++++++++++++++++
>   mm/filemap.c            | 26 ++++++++++++++++++--------
>   2 files changed, 38 insertions(+), 8 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich