From: Pankaj Raghav <p.raghav@samsung.com>
filemap_create_folio() and do_read_cache_folio() were always allocating
folio of order 0. __filemap_get_folio was trying to allocate higher
order folios when fgp_flags had higher order hint set but it will default
to order 0 folio if higher order memory allocation fails.
Supporting mapping_min_order implies that we guarantee each folio in the
page cache has at least an order of mapping_min_order. When adding new
folios to the page cache we must also ensure the index used is aligned to
the mapping_min_order as the page cache requires the index to be aligned
to the order of the folio.
Co-developed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
---
include/linux/pagemap.h | 20 ++++++++++++++++++++
mm/filemap.c | 26 ++++++++++++++++++--------
2 files changed, 38 insertions(+), 8 deletions(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 228275e7049f..899b8d751768 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -439,6 +439,26 @@ unsigned int mapping_min_folio_order(const struct address_space *mapping)
return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN;
}
+static inline unsigned long mapping_min_folio_nrpages(struct address_space *mapping)
+{
+ return 1UL << mapping_min_folio_order(mapping);
+}
+
+/**
+ * mapping_align_start_index() - Align starting index based on the min
+ * folio order of the page cache.
+ * @mapping: The address_space.
+ *
+ * Ensure the index used is aligned to the minimum folio order when adding
+ * new folios to the page cache by rounding down to the nearest minimum
+ * folio number of pages.
+ */
+static inline pgoff_t mapping_align_start_index(struct address_space *mapping,
+ pgoff_t index)
+{
+ return round_down(index, mapping_min_folio_nrpages(mapping));
+}
+
/*
* Large folio support currently depends on THP. These dependencies are
* being worked on but are not yet fixed.
diff --git a/mm/filemap.c b/mm/filemap.c
index 46c7a6f59788..8bb0d2bc93c5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -859,6 +859,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
+ VM_BUG_ON_FOLIO(folio_order(folio) < mapping_min_folio_order(mapping),
+ folio);
mapping_set_update(&xas, mapping);
VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
@@ -1919,8 +1921,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
folio_wait_stable(folio);
no_page:
if (!folio && (fgp_flags & FGP_CREAT)) {
- unsigned order = FGF_GET_ORDER(fgp_flags);
+ unsigned int min_order = mapping_min_folio_order(mapping);
+ unsigned int order = max(min_order, FGF_GET_ORDER(fgp_flags));
int err;
+ index = mapping_align_start_index(mapping, index);
if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
gfp |= __GFP_WRITE;
@@ -1943,7 +1947,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
gfp_t alloc_gfp = gfp;
err = -ENOMEM;
- if (order > 0)
+ if (order > min_order)
alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
folio = filemap_alloc_folio(alloc_gfp, order);
if (!folio)
@@ -1958,7 +1962,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
break;
folio_put(folio);
folio = NULL;
- } while (order-- > 0);
+ } while (order-- > min_order);
if (err == -EEXIST)
goto repeat;
@@ -2447,13 +2451,16 @@ static int filemap_update_page(struct kiocb *iocb,
}
static int filemap_create_folio(struct file *file,
- struct address_space *mapping, pgoff_t index,
+ struct address_space *mapping, loff_t pos,
struct folio_batch *fbatch)
{
struct folio *folio;
int error;
+ unsigned int min_order = mapping_min_folio_order(mapping);
+ pgoff_t index;
- folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
+ folio = filemap_alloc_folio(mapping_gfp_mask(mapping),
+ min_order);
if (!folio)
return -ENOMEM;
@@ -2471,6 +2478,8 @@ static int filemap_create_folio(struct file *file,
* well to keep locking rules simple.
*/
filemap_invalidate_lock_shared(mapping);
+ /* index in PAGE units but aligned to min_order number of pages. */
+ index = (pos >> (PAGE_SHIFT + min_order)) << min_order;
error = filemap_add_folio(mapping, folio, index,
mapping_gfp_constraint(mapping, GFP_KERNEL));
if (error == -EEXIST)
@@ -2531,8 +2540,7 @@ static int filemap_get_pages(struct kiocb *iocb, size_t count,
if (!folio_batch_count(fbatch)) {
if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
return -EAGAIN;
- err = filemap_create_folio(filp, mapping,
- iocb->ki_pos >> PAGE_SHIFT, fbatch);
+ err = filemap_create_folio(filp, mapping, iocb->ki_pos, fbatch);
if (err == AOP_TRUNCATED_PAGE)
goto retry;
return err;
@@ -3748,9 +3756,11 @@ static struct folio *do_read_cache_folio(struct address_space *mapping,
repeat:
folio = filemap_get_folio(mapping, index);
if (IS_ERR(folio)) {
- folio = filemap_alloc_folio(gfp, 0);
+ folio = filemap_alloc_folio(gfp,
+ mapping_min_folio_order(mapping));
if (!folio)
return ERR_PTR(-ENOMEM);
+ index = mapping_align_start_index(mapping, index);
err = filemap_add_folio(mapping, folio, index, gfp);
if (unlikely(err)) {
folio_put(folio);
--
2.44.1
On Fri, Jun 07, 2024 at 02:58:54PM +0000, Pankaj Raghav (Samsung) wrote:
> +static inline unsigned long mapping_min_folio_nrpages(struct address_space *mapping)
> +{
> + return 1UL << mapping_min_folio_order(mapping);
> +}
Overly long line here, just line break after the return type.
Then again it only has a single user just below and no documentation
so maybe just fold it into the caller?
> no_page:
> if (!folio && (fgp_flags & FGP_CREAT)) {
> - unsigned order = FGF_GET_ORDER(fgp_flags);
> + unsigned int min_order = mapping_min_folio_order(mapping);
> + unsigned int order = max(min_order, FGF_GET_ORDER(fgp_flags));
> int err;
> + index = mapping_align_start_index(mapping, index);
I wonder if at some point splitting this block that actually allocates
a new folio into a separate helper would be nice. It just keep growing
in size and complexity.
> - folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
> + folio = filemap_alloc_folio(mapping_gfp_mask(mapping),
> + min_order);
Nit: no need to split this into multiple lines.
> if (!folio)
> return -ENOMEM;
>
> @@ -2471,6 +2478,8 @@ static int filemap_create_folio(struct file *file,
> * well to keep locking rules simple.
> */
> filemap_invalidate_lock_shared(mapping);
> + /* index in PAGE units but aligned to min_order number of pages. */
in PAGE_SIZE units? Maybe also make this a complete sentence?
On Thu, Jun 13, 2024 at 10:44:10AM +0200, Christoph Hellwig wrote:
> On Fri, Jun 07, 2024 at 02:58:54PM +0000, Pankaj Raghav (Samsung) wrote:
> > +static inline unsigned long mapping_min_folio_nrpages(struct address_space *mapping)
> > +{
> > + return 1UL << mapping_min_folio_order(mapping);
> > +}
>
> Overly long line here, just line break after the return type.
>
> Then again it only has a single user just below and no documentation
> so maybe just fold it into the caller?
I do use it in later patches. I will adjust the long line here :)
>
> > no_page:
> > if (!folio && (fgp_flags & FGP_CREAT)) {
> > - unsigned order = FGF_GET_ORDER(fgp_flags);
> > + unsigned int min_order = mapping_min_folio_order(mapping);
> > + unsigned int order = max(min_order, FGF_GET_ORDER(fgp_flags));
> > int err;
> > + index = mapping_align_start_index(mapping, index);
>
> I wonder if at some point splitting this block that actually allocates
> a new folio into a separate helper would be nice. It just keep growing
> in size and complexity.
>
I agree with that. I will put it in my future todo backlog.
> > - folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
> > + folio = filemap_alloc_folio(mapping_gfp_mask(mapping),
> > + min_order);
>
> Nit: no need to split this into multiple lines.
Ok.
>
> > if (!folio)
> > return -ENOMEM;
> >
> > @@ -2471,6 +2478,8 @@ static int filemap_create_folio(struct file *file,
> > * well to keep locking rules simple.
> > */
> > filemap_invalidate_lock_shared(mapping);
> > + /* index in PAGE units but aligned to min_order number of pages. */
>
> in PAGE_SIZE units? Maybe also make this a complete sentence?
Yes, will do.
>
On Mon, Jun 17, 2024 at 09:58:37AM +0000, Pankaj Raghav (Samsung) wrote: > > > @@ -2471,6 +2478,8 @@ static int filemap_create_folio(struct file *file, > > > * well to keep locking rules simple. > > > */ > > > filemap_invalidate_lock_shared(mapping); > > > + /* index in PAGE units but aligned to min_order number of pages. */ > > > > in PAGE_SIZE units? Maybe also make this a complete sentence? > Yes, will do. I'd delete the comment entirely. Anyone working on this code should already know that folio indices are in units of PAGE_SIZE and must be aligned, so I'm not sure what value this comment adds.
On Fri, Jun 07, 2024 at 02:58:54PM +0000, Pankaj Raghav (Samsung) wrote: > +/** > + * mapping_align_start_index() - Align starting index based on the min > + * folio order of the page cache. _short_ description. "Align index appropriately for this mapping". And maybe that means we should call it "mapping_align_index" instead of mapping_align_start_index? > + * @mapping: The address_space. > + * > + * Ensure the index used is aligned to the minimum folio order when adding > + * new folios to the page cache by rounding down to the nearest minimum > + * folio number of pages. How about: * The index of a folio must be naturally aligned. If you are adding a * new folio to the page cache and need to know what index to give it, * call this function.
On Fri, Jun 07, 2024 at 02:58:54PM +0000, Pankaj Raghav (Samsung) wrote:
> From: Pankaj Raghav <p.raghav@samsung.com>
>
> filemap_create_folio() and do_read_cache_folio() were always allocating
> folio of order 0. __filemap_get_folio was trying to allocate higher
> order folios when fgp_flags had higher order hint set but it will default
> to order 0 folio if higher order memory allocation fails.
>
> Supporting mapping_min_order implies that we guarantee each folio in the
> page cache has at least an order of mapping_min_order. When adding new
> folios to the page cache we must also ensure the index used is aligned to
> the mapping_min_order as the page cache requires the index to be aligned
> to the order of the folio.
>
> Co-developed-by: Luis Chamberlain <mcgrof@kernel.org>
> Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Seems pretty straightforward, so
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
--D
> ---
> include/linux/pagemap.h | 20 ++++++++++++++++++++
> mm/filemap.c | 26 ++++++++++++++++++--------
> 2 files changed, 38 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index 228275e7049f..899b8d751768 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -439,6 +439,26 @@ unsigned int mapping_min_folio_order(const struct address_space *mapping)
> return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN;
> }
>
> +static inline unsigned long mapping_min_folio_nrpages(struct address_space *mapping)
> +{
> + return 1UL << mapping_min_folio_order(mapping);
> +}
> +
> +/**
> + * mapping_align_start_index() - Align starting index based on the min
> + * folio order of the page cache.
> + * @mapping: The address_space.
> + *
> + * Ensure the index used is aligned to the minimum folio order when adding
> + * new folios to the page cache by rounding down to the nearest minimum
> + * folio number of pages.
> + */
> +static inline pgoff_t mapping_align_start_index(struct address_space *mapping,
> + pgoff_t index)
> +{
> + return round_down(index, mapping_min_folio_nrpages(mapping));
> +}
> +
> /*
> * Large folio support currently depends on THP. These dependencies are
> * being worked on but are not yet fixed.
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 46c7a6f59788..8bb0d2bc93c5 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -859,6 +859,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
>
> VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
> VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
> + VM_BUG_ON_FOLIO(folio_order(folio) < mapping_min_folio_order(mapping),
> + folio);
> mapping_set_update(&xas, mapping);
>
> VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
> @@ -1919,8 +1921,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> folio_wait_stable(folio);
> no_page:
> if (!folio && (fgp_flags & FGP_CREAT)) {
> - unsigned order = FGF_GET_ORDER(fgp_flags);
> + unsigned int min_order = mapping_min_folio_order(mapping);
> + unsigned int order = max(min_order, FGF_GET_ORDER(fgp_flags));
> int err;
> + index = mapping_align_start_index(mapping, index);
>
> if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
> gfp |= __GFP_WRITE;
> @@ -1943,7 +1947,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> gfp_t alloc_gfp = gfp;
>
> err = -ENOMEM;
> - if (order > 0)
> + if (order > min_order)
> alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
> folio = filemap_alloc_folio(alloc_gfp, order);
> if (!folio)
> @@ -1958,7 +1962,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> break;
> folio_put(folio);
> folio = NULL;
> - } while (order-- > 0);
> + } while (order-- > min_order);
>
> if (err == -EEXIST)
> goto repeat;
> @@ -2447,13 +2451,16 @@ static int filemap_update_page(struct kiocb *iocb,
> }
>
> static int filemap_create_folio(struct file *file,
> - struct address_space *mapping, pgoff_t index,
> + struct address_space *mapping, loff_t pos,
> struct folio_batch *fbatch)
> {
> struct folio *folio;
> int error;
> + unsigned int min_order = mapping_min_folio_order(mapping);
> + pgoff_t index;
>
> - folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
> + folio = filemap_alloc_folio(mapping_gfp_mask(mapping),
> + min_order);
> if (!folio)
> return -ENOMEM;
>
> @@ -2471,6 +2478,8 @@ static int filemap_create_folio(struct file *file,
> * well to keep locking rules simple.
> */
> filemap_invalidate_lock_shared(mapping);
> + /* index in PAGE units but aligned to min_order number of pages. */
> + index = (pos >> (PAGE_SHIFT + min_order)) << min_order;
> error = filemap_add_folio(mapping, folio, index,
> mapping_gfp_constraint(mapping, GFP_KERNEL));
> if (error == -EEXIST)
> @@ -2531,8 +2540,7 @@ static int filemap_get_pages(struct kiocb *iocb, size_t count,
> if (!folio_batch_count(fbatch)) {
> if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
> return -EAGAIN;
> - err = filemap_create_folio(filp, mapping,
> - iocb->ki_pos >> PAGE_SHIFT, fbatch);
> + err = filemap_create_folio(filp, mapping, iocb->ki_pos, fbatch);
> if (err == AOP_TRUNCATED_PAGE)
> goto retry;
> return err;
> @@ -3748,9 +3756,11 @@ static struct folio *do_read_cache_folio(struct address_space *mapping,
> repeat:
> folio = filemap_get_folio(mapping, index);
> if (IS_ERR(folio)) {
> - folio = filemap_alloc_folio(gfp, 0);
> + folio = filemap_alloc_folio(gfp,
> + mapping_min_folio_order(mapping));
> if (!folio)
> return ERR_PTR(-ENOMEM);
> + index = mapping_align_start_index(mapping, index);
> err = filemap_add_folio(mapping, folio, index, gfp);
> if (unlikely(err)) {
> folio_put(folio);
> --
> 2.44.1
>
>
On 6/7/24 16:58, Pankaj Raghav (Samsung) wrote: > From: Pankaj Raghav <p.raghav@samsung.com> > > filemap_create_folio() and do_read_cache_folio() were always allocating > folio of order 0. __filemap_get_folio was trying to allocate higher > order folios when fgp_flags had higher order hint set but it will default > to order 0 folio if higher order memory allocation fails. > > Supporting mapping_min_order implies that we guarantee each folio in the > page cache has at least an order of mapping_min_order. When adding new > folios to the page cache we must also ensure the index used is aligned to > the mapping_min_order as the page cache requires the index to be aligned > to the order of the folio. > > Co-developed-by: Luis Chamberlain <mcgrof@kernel.org> > Signed-off-by: Luis Chamberlain <mcgrof@kernel.org> > Signed-off-by: Pankaj Raghav <p.raghav@samsung.com> > --- > include/linux/pagemap.h | 20 ++++++++++++++++++++ > mm/filemap.c | 26 ++++++++++++++++++-------- > 2 files changed, 38 insertions(+), 8 deletions(-) > Reviewed-by: Hannes Reinecke <hare@suse.de> Cheers, Hannes -- Dr. Hannes Reinecke Kernel Storage Architect hare@suse.de +49 911 74053 688 SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
© 2016 - 2026 Red Hat, Inc.