Let's disallow handing out PFN ranges with non-contiguous pages, so we
can remove the nth-page usage in __cma_alloc(), and so any callers don't
have to worry about that either when wanting to blindly iterate pages.
This is really only a problem in configs with SPARSEMEM but without
SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some
cases.
Will this cause harm? Probably not, because it's mostly 32bit that does
not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could
look into allocating the memmap for the memory sections spanned by a
single CMA region in one go from memblock.
Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
include/linux/mm.h | 6 ++++++
mm/cma.c | 39 ++++++++++++++++++++++++---------------
mm/util.c | 33 +++++++++++++++++++++++++++++++++
3 files changed, 63 insertions(+), 15 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f6880e3225c5c..2ca1eb2db63ec 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes;
extern unsigned long sysctl_admin_reserve_kbytes;
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+bool page_range_contiguous(const struct page *page, unsigned long nr_pages);
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
#else
#define nth_page(page,n) ((page) + (n))
+static inline bool page_range_contiguous(const struct page *page,
+ unsigned long nr_pages)
+{
+ return true;
+}
#endif
/* to align the pointer to the (next) page boundary */
diff --git a/mm/cma.c b/mm/cma.c
index e56ec64d0567e..813e6dc7b0954 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -780,10 +780,8 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
unsigned long count, unsigned int align,
struct page **pagep, gfp_t gfp)
{
- unsigned long mask, offset;
- unsigned long pfn = -1;
- unsigned long start = 0;
unsigned long bitmap_maxno, bitmap_no, bitmap_count;
+ unsigned long start, pfn, mask, offset;
int ret = -EBUSY;
struct page *page = NULL;
@@ -795,7 +793,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
if (bitmap_count > bitmap_maxno)
goto out;
- for (;;) {
+ for (start = 0; ; start = bitmap_no + mask + 1) {
spin_lock_irq(&cma->lock);
/*
* If the request is larger than the available number
@@ -812,6 +810,22 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
spin_unlock_irq(&cma->lock);
break;
}
+
+ pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
+ page = pfn_to_page(pfn);
+
+ /*
+ * Do not hand out page ranges that are not contiguous, so
+ * callers can just iterate the pages without having to worry
+ * about these corner cases.
+ */
+ if (!page_range_contiguous(page, count)) {
+ spin_unlock_irq(&cma->lock);
+ pr_warn_ratelimited("%s: %s: skipping incompatible area [0x%lx-0x%lx]",
+ __func__, cma->name, pfn, pfn + count - 1);
+ continue;
+ }
+
bitmap_set(cmr->bitmap, bitmap_no, bitmap_count);
cma->available_count -= count;
/*
@@ -821,29 +835,24 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
*/
spin_unlock_irq(&cma->lock);
- pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
mutex_lock(&cma->alloc_mutex);
ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp);
mutex_unlock(&cma->alloc_mutex);
- if (ret == 0) {
- page = pfn_to_page(pfn);
+ if (!ret)
break;
- }
cma_clear_bitmap(cma, cmr, pfn, count);
if (ret != -EBUSY)
break;
pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n",
- __func__, pfn, pfn_to_page(pfn));
+ __func__, pfn, page);
- trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
- count, align);
- /* try again with a bit different memory target */
- start = bitmap_no + mask + 1;
+ trace_cma_alloc_busy_retry(cma->name, pfn, page, count, align);
}
out:
- *pagep = page;
+ if (!ret)
+ *pagep = page;
return ret;
}
@@ -882,7 +891,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count,
*/
if (page) {
for (i = 0; i < count; i++)
- page_kasan_tag_reset(nth_page(page, i));
+ page_kasan_tag_reset(page + i);
}
if (ret && !(gfp & __GFP_NOWARN)) {
diff --git a/mm/util.c b/mm/util.c
index d235b74f7aff7..0bf349b19b652 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1280,4 +1280,37 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte,
{
return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, 0);
}
+
+#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+/**
+ * page_range_contiguous - test whether the page range is contiguous
+ * @page: the start of the page range.
+ * @nr_pages: the number of pages in the range.
+ *
+ * Test whether the page range is contiguous, such that they can be iterated
+ * naively, corresponding to iterating a contiguous PFN range.
+ *
+ * This function should primarily only be used for debug checks, or when
+ * working with page ranges that are not naturally contiguous (e.g., pages
+ * within a folio are).
+ *
+ * Returns true if contiguous, otherwise false.
+ */
+bool page_range_contiguous(const struct page *page, unsigned long nr_pages)
+{
+ const unsigned long start_pfn = page_to_pfn(page);
+ const unsigned long end_pfn = start_pfn + nr_pages;
+ unsigned long pfn;
+
+ /*
+ * The memmap is allocated per memory section. We need to check
+ * each involved memory section once.
+ */
+ for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
+ pfn < end_pfn; pfn += PAGES_PER_SECTION)
+ if (unlikely(page + (pfn - start_pfn) != pfn_to_page(pfn)))
+ return false;
+ return true;
+}
+#endif
#endif /* CONFIG_MMU */
--
2.50.1
On Thu, Aug 28, 2025 at 12:01:25AM +0200, David Hildenbrand wrote: > Let's disallow handing out PFN ranges with non-contiguous pages, so we > can remove the nth-page usage in __cma_alloc(), and so any callers don't > have to worry about that either when wanting to blindly iterate pages. > > This is really only a problem in configs with SPARSEMEM but without > SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some > cases. I'm guessing this is something that we don't need to worry about in reality? > > Will this cause harm? Probably not, because it's mostly 32bit that does > not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could > look into allocating the memmap for the memory sections spanned by a > single CMA region in one go from memblock. > > Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com> > Signed-off-by: David Hildenbrand <david@redhat.com> LGTM other than refactoring point below. CMA stuff looks fine afaict after staring at it for a while, on proviso that handing out ranges within the same section is always going to be the case. Anyway overall, LGTM, so: Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > --- > include/linux/mm.h | 6 ++++++ > mm/cma.c | 39 ++++++++++++++++++++++++--------------- > mm/util.c | 33 +++++++++++++++++++++++++++++++++ > 3 files changed, 63 insertions(+), 15 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index f6880e3225c5c..2ca1eb2db63ec 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes; > extern unsigned long sysctl_admin_reserve_kbytes; > > #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) > +bool page_range_contiguous(const struct page *page, unsigned long nr_pages); > #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) > #else > #define nth_page(page,n) ((page) + (n)) > +static inline bool page_range_contiguous(const struct page *page, > + unsigned long nr_pages) > +{ > + return true; > +} > #endif > > /* to align the pointer to the (next) page boundary */ > diff --git a/mm/cma.c b/mm/cma.c > index e56ec64d0567e..813e6dc7b0954 100644 > --- a/mm/cma.c > +++ b/mm/cma.c > @@ -780,10 +780,8 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, > unsigned long count, unsigned int align, > struct page **pagep, gfp_t gfp) > { > - unsigned long mask, offset; > - unsigned long pfn = -1; > - unsigned long start = 0; > unsigned long bitmap_maxno, bitmap_no, bitmap_count; > + unsigned long start, pfn, mask, offset; > int ret = -EBUSY; > struct page *page = NULL; > > @@ -795,7 +793,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, > if (bitmap_count > bitmap_maxno) > goto out; > > - for (;;) { > + for (start = 0; ; start = bitmap_no + mask + 1) { > spin_lock_irq(&cma->lock); > /* > * If the request is larger than the available number > @@ -812,6 +810,22 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, > spin_unlock_irq(&cma->lock); > break; > } > + > + pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit); > + page = pfn_to_page(pfn); > + > + /* > + * Do not hand out page ranges that are not contiguous, so > + * callers can just iterate the pages without having to worry > + * about these corner cases. > + */ > + if (!page_range_contiguous(page, count)) { > + spin_unlock_irq(&cma->lock); > + pr_warn_ratelimited("%s: %s: skipping incompatible area [0x%lx-0x%lx]", > + __func__, cma->name, pfn, pfn + count - 1); > + continue; > + } > + > bitmap_set(cmr->bitmap, bitmap_no, bitmap_count); > cma->available_count -= count; > /* > @@ -821,29 +835,24 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, > */ > spin_unlock_irq(&cma->lock); > > - pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit); > mutex_lock(&cma->alloc_mutex); > ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp); > mutex_unlock(&cma->alloc_mutex); > - if (ret == 0) { > - page = pfn_to_page(pfn); > + if (!ret) > break; > - } > > cma_clear_bitmap(cma, cmr, pfn, count); > if (ret != -EBUSY) > break; > > pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n", > - __func__, pfn, pfn_to_page(pfn)); > + __func__, pfn, page); > > - trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn), > - count, align); > - /* try again with a bit different memory target */ > - start = bitmap_no + mask + 1; > + trace_cma_alloc_busy_retry(cma->name, pfn, page, count, align); > } > out: > - *pagep = page; > + if (!ret) > + *pagep = page; > return ret; > } > > @@ -882,7 +891,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, > */ > if (page) { > for (i = 0; i < count; i++) > - page_kasan_tag_reset(nth_page(page, i)); > + page_kasan_tag_reset(page + i); > } > > if (ret && !(gfp & __GFP_NOWARN)) { > diff --git a/mm/util.c b/mm/util.c > index d235b74f7aff7..0bf349b19b652 100644 > --- a/mm/util.c > +++ b/mm/util.c > @@ -1280,4 +1280,37 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte, > { > return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, 0); > } > + > +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) > +/** > + * page_range_contiguous - test whether the page range is contiguous > + * @page: the start of the page range. > + * @nr_pages: the number of pages in the range. > + * > + * Test whether the page range is contiguous, such that they can be iterated > + * naively, corresponding to iterating a contiguous PFN range. > + * > + * This function should primarily only be used for debug checks, or when > + * working with page ranges that are not naturally contiguous (e.g., pages > + * within a folio are). > + * > + * Returns true if contiguous, otherwise false. > + */ > +bool page_range_contiguous(const struct page *page, unsigned long nr_pages) > +{ > + const unsigned long start_pfn = page_to_pfn(page); > + const unsigned long end_pfn = start_pfn + nr_pages; > + unsigned long pfn; > + > + /* > + * The memmap is allocated per memory section. We need to check > + * each involved memory section once. > + */ > + for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION); > + pfn < end_pfn; pfn += PAGES_PER_SECTION) > + if (unlikely(page + (pfn - start_pfn) != pfn_to_page(pfn))) > + return false; I find this pretty confusing, my test for this is how many times I have to read the code to understand what it's doing :) So we have something like: (pfn of page) start_pfn pfn = align UP | | v v | section | <-----------------> pfn - start_pfn Then check page + (pfn - start_pfn) == pfn_to_page(pfn) And loop such that: (pfn of page) start_pfn pfn | | v v | section | section | <------------------------------------------> pfn - start_pfn Again check page + (pfn - start_pfn) == pfn_to_page(pfn) And so on. So the logic looks good, but it's just... that took me a hot second to parse :) I think a few simple fixups bool page_range_contiguous(const struct page *page, unsigned long nr_pages) { const unsigned long start_pfn = page_to_pfn(page); const unsigned long end_pfn = start_pfn + nr_pages; /* The PFN of the start of the next section. */ unsigned long pfn = ALIGN(start_pfn, PAGES_PER_SECTION); /* The page we'd expected to see if the range were contiguous. */ struct page *expected = page + (pfn - start_pfn); /* * The memmap is allocated per memory section. We need to check * each involved memory section once. */ for (; pfn < end_pfn; pfn += PAGES_PER_SECTION, expected += PAGES_PER_SECTION) if (unlikely(expected != pfn_to_page(pfn))) return false; return true; } > + return true; > +} > +#endif > #endif /* CONFIG_MMU */ > -- > 2.50.1 >
On 28.08.25 19:28, Lorenzo Stoakes wrote: > On Thu, Aug 28, 2025 at 12:01:25AM +0200, David Hildenbrand wrote: >> Let's disallow handing out PFN ranges with non-contiguous pages, so we >> can remove the nth-page usage in __cma_alloc(), and so any callers don't >> have to worry about that either when wanting to blindly iterate pages. >> >> This is really only a problem in configs with SPARSEMEM but without >> SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some >> cases. > > I'm guessing this is something that we don't need to worry about in > reality? That my theory yes. > >> >> Will this cause harm? Probably not, because it's mostly 32bit that does >> not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could >> look into allocating the memmap for the memory sections spanned by a >> single CMA region in one go from memblock. >> >> Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com> >> Signed-off-by: David Hildenbrand <david@redhat.com> > > LGTM other than refactoring point below. > > CMA stuff looks fine afaict after staring at it for a while, on proviso > that handing out ranges within the same section is always going to be the > case. > > Anyway overall, > > LGTM, so: > > Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > >> --- >> include/linux/mm.h | 6 ++++++ >> mm/cma.c | 39 ++++++++++++++++++++++++--------------- >> mm/util.c | 33 +++++++++++++++++++++++++++++++++ >> 3 files changed, 63 insertions(+), 15 deletions(-) >> >> diff --git a/include/linux/mm.h b/include/linux/mm.h >> index f6880e3225c5c..2ca1eb2db63ec 100644 >> --- a/include/linux/mm.h >> +++ b/include/linux/mm.h >> @@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes; >> extern unsigned long sysctl_admin_reserve_kbytes; >> >> #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) >> +bool page_range_contiguous(const struct page *page, unsigned long nr_pages); >> #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) >> #else >> #define nth_page(page,n) ((page) + (n)) >> +static inline bool page_range_contiguous(const struct page *page, >> + unsigned long nr_pages) >> +{ >> + return true; >> +} >> #endif >> >> /* to align the pointer to the (next) page boundary */ >> diff --git a/mm/cma.c b/mm/cma.c >> index e56ec64d0567e..813e6dc7b0954 100644 >> --- a/mm/cma.c >> +++ b/mm/cma.c >> @@ -780,10 +780,8 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, >> unsigned long count, unsigned int align, >> struct page **pagep, gfp_t gfp) >> { >> - unsigned long mask, offset; >> - unsigned long pfn = -1; >> - unsigned long start = 0; >> unsigned long bitmap_maxno, bitmap_no, bitmap_count; >> + unsigned long start, pfn, mask, offset; >> int ret = -EBUSY; >> struct page *page = NULL; >> >> @@ -795,7 +793,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, >> if (bitmap_count > bitmap_maxno) >> goto out; >> >> - for (;;) { >> + for (start = 0; ; start = bitmap_no + mask + 1) { >> spin_lock_irq(&cma->lock); >> /* >> * If the request is larger than the available number >> @@ -812,6 +810,22 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, >> spin_unlock_irq(&cma->lock); >> break; >> } >> + >> + pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit); >> + page = pfn_to_page(pfn); >> + >> + /* >> + * Do not hand out page ranges that are not contiguous, so >> + * callers can just iterate the pages without having to worry >> + * about these corner cases. >> + */ >> + if (!page_range_contiguous(page, count)) { >> + spin_unlock_irq(&cma->lock); >> + pr_warn_ratelimited("%s: %s: skipping incompatible area [0x%lx-0x%lx]", >> + __func__, cma->name, pfn, pfn + count - 1); >> + continue; >> + } >> + >> bitmap_set(cmr->bitmap, bitmap_no, bitmap_count); >> cma->available_count -= count; >> /* >> @@ -821,29 +835,24 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, >> */ >> spin_unlock_irq(&cma->lock); >> >> - pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit); >> mutex_lock(&cma->alloc_mutex); >> ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp); >> mutex_unlock(&cma->alloc_mutex); >> - if (ret == 0) { >> - page = pfn_to_page(pfn); >> + if (!ret) >> break; >> - } >> >> cma_clear_bitmap(cma, cmr, pfn, count); >> if (ret != -EBUSY) >> break; >> >> pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n", >> - __func__, pfn, pfn_to_page(pfn)); >> + __func__, pfn, page); >> >> - trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn), >> - count, align); >> - /* try again with a bit different memory target */ >> - start = bitmap_no + mask + 1; >> + trace_cma_alloc_busy_retry(cma->name, pfn, page, count, align); >> } >> out: >> - *pagep = page; >> + if (!ret) >> + *pagep = page; >> return ret; >> } >> >> @@ -882,7 +891,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, >> */ >> if (page) { >> for (i = 0; i < count; i++) >> - page_kasan_tag_reset(nth_page(page, i)); >> + page_kasan_tag_reset(page + i); >> } >> >> if (ret && !(gfp & __GFP_NOWARN)) { >> diff --git a/mm/util.c b/mm/util.c >> index d235b74f7aff7..0bf349b19b652 100644 >> --- a/mm/util.c >> +++ b/mm/util.c >> @@ -1280,4 +1280,37 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte, >> { >> return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, 0); >> } >> + >> +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) >> +/** >> + * page_range_contiguous - test whether the page range is contiguous >> + * @page: the start of the page range. >> + * @nr_pages: the number of pages in the range. >> + * >> + * Test whether the page range is contiguous, such that they can be iterated >> + * naively, corresponding to iterating a contiguous PFN range. >> + * >> + * This function should primarily only be used for debug checks, or when >> + * working with page ranges that are not naturally contiguous (e.g., pages >> + * within a folio are). >> + * >> + * Returns true if contiguous, otherwise false. >> + */ >> +bool page_range_contiguous(const struct page *page, unsigned long nr_pages) >> +{ >> + const unsigned long start_pfn = page_to_pfn(page); >> + const unsigned long end_pfn = start_pfn + nr_pages; >> + unsigned long pfn; >> + >> + /* >> + * The memmap is allocated per memory section. We need to check >> + * each involved memory section once. >> + */ >> + for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION); >> + pfn < end_pfn; pfn += PAGES_PER_SECTION) >> + if (unlikely(page + (pfn - start_pfn) != pfn_to_page(pfn))) >> + return false; > > I find this pretty confusing, my test for this is how many times I have to read > the code to understand what it's doing :) > > So we have something like: > > (pfn of page) > start_pfn pfn = align UP > | | > v v > | section | > <-----------------> > pfn - start_pfn > > Then check page + (pfn - start_pfn) == pfn_to_page(pfn) > > And loop such that: > > (pfn of page) > start_pfn pfn > | | > v v > | section | section | > <------------------------------------------> > pfn - start_pfn > > Again check page + (pfn - start_pfn) == pfn_to_page(pfn) > > And so on. > > So the logic looks good, but it's just... that took me a hot second to > parse :) > > I think a few simple fixups > > bool page_range_contiguous(const struct page *page, unsigned long nr_pages) > { > const unsigned long start_pfn = page_to_pfn(page); > const unsigned long end_pfn = start_pfn + nr_pages; > /* The PFN of the start of the next section. */ > unsigned long pfn = ALIGN(start_pfn, PAGES_PER_SECTION); > /* The page we'd expected to see if the range were contiguous. */ > struct page *expected = page + (pfn - start_pfn); > > /* > * The memmap is allocated per memory section. We need to check > * each involved memory section once. > */ > for (; pfn < end_pfn; pfn += PAGES_PER_SECTION, expected += PAGES_PER_SECTION) > if (unlikely(expected != pfn_to_page(pfn))) > return false; > return true; > } > Hm, I prefer my variant, especially where the pfn is calculated in the for loop. Likely a matter of personal taste. But I can see why skipping the first section might be a surprise when not having the semantics of ALIGN() in the cache. So I'll add the following on top: diff --git a/mm/util.c b/mm/util.c index 0bf349b19b652..fbdb73aaf35fe 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1303,8 +1303,10 @@ bool page_range_contiguous(const struct page *page, unsigned long nr_pages) unsigned long pfn; /* - * The memmap is allocated per memory section. We need to check - * each involved memory section once. + * The memmap is allocated per memory section, so no need to check + * within the first section. However, we need to check each other + * spanned memory section once, making sure the first page in a + * section could similarly be reached by just iterating pages. */ for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION); pfn < end_pfn; pfn += PAGES_PER_SECTION) Thanks! -- Cheers David / dhildenb
On Fri, Aug 29, 2025 at 04:34:54PM +0200, David Hildenbrand wrote: > On 28.08.25 19:28, Lorenzo Stoakes wrote: > > On Thu, Aug 28, 2025 at 12:01:25AM +0200, David Hildenbrand wrote: > > > Let's disallow handing out PFN ranges with non-contiguous pages, so we > > > can remove the nth-page usage in __cma_alloc(), and so any callers don't > > > have to worry about that either when wanting to blindly iterate pages. > > > > > > This is really only a problem in configs with SPARSEMEM but without > > > SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some > > > cases. > > > > I'm guessing this is something that we don't need to worry about in > > reality? > > That my theory yes. Let's hope correct haha, but seems reasonable. > > > > > > > > > Will this cause harm? Probably not, because it's mostly 32bit that does > > > not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could > > > look into allocating the memmap for the memory sections spanned by a > > > single CMA region in one go from memblock. > > > > > > Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com> > > > Signed-off-by: David Hildenbrand <david@redhat.com> > > > > LGTM other than refactoring point below. > > > > CMA stuff looks fine afaict after staring at it for a while, on proviso > > that handing out ranges within the same section is always going to be the > > case. > > > > Anyway overall, > > > > LGTM, so: > > > > Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > > > > > > --- > > > include/linux/mm.h | 6 ++++++ > > > mm/cma.c | 39 ++++++++++++++++++++++++--------------- > > > mm/util.c | 33 +++++++++++++++++++++++++++++++++ > > > 3 files changed, 63 insertions(+), 15 deletions(-) > > > > > > diff --git a/include/linux/mm.h b/include/linux/mm.h > > > index f6880e3225c5c..2ca1eb2db63ec 100644 > > > --- a/include/linux/mm.h > > > +++ b/include/linux/mm.h > > > @@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes; > > > extern unsigned long sysctl_admin_reserve_kbytes; > > > > > > #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) > > > +bool page_range_contiguous(const struct page *page, unsigned long nr_pages); > > > #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) > > > #else > > > #define nth_page(page,n) ((page) + (n)) > > > +static inline bool page_range_contiguous(const struct page *page, > > > + unsigned long nr_pages) > > > +{ > > > + return true; > > > +} > > > #endif > > > > > > /* to align the pointer to the (next) page boundary */ > > > diff --git a/mm/cma.c b/mm/cma.c > > > index e56ec64d0567e..813e6dc7b0954 100644 > > > --- a/mm/cma.c > > > +++ b/mm/cma.c > > > @@ -780,10 +780,8 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, > > > unsigned long count, unsigned int align, > > > struct page **pagep, gfp_t gfp) > > > { > > > - unsigned long mask, offset; > > > - unsigned long pfn = -1; > > > - unsigned long start = 0; > > > unsigned long bitmap_maxno, bitmap_no, bitmap_count; > > > + unsigned long start, pfn, mask, offset; > > > int ret = -EBUSY; > > > struct page *page = NULL; > > > > > > @@ -795,7 +793,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, > > > if (bitmap_count > bitmap_maxno) > > > goto out; > > > > > > - for (;;) { > > > + for (start = 0; ; start = bitmap_no + mask + 1) { > > > spin_lock_irq(&cma->lock); > > > /* > > > * If the request is larger than the available number > > > @@ -812,6 +810,22 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, > > > spin_unlock_irq(&cma->lock); > > > break; > > > } > > > + > > > + pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit); > > > + page = pfn_to_page(pfn); > > > + > > > + /* > > > + * Do not hand out page ranges that are not contiguous, so > > > + * callers can just iterate the pages without having to worry > > > + * about these corner cases. > > > + */ > > > + if (!page_range_contiguous(page, count)) { > > > + spin_unlock_irq(&cma->lock); > > > + pr_warn_ratelimited("%s: %s: skipping incompatible area [0x%lx-0x%lx]", > > > + __func__, cma->name, pfn, pfn + count - 1); > > > + continue; > > > + } > > > + > > > bitmap_set(cmr->bitmap, bitmap_no, bitmap_count); > > > cma->available_count -= count; > > > /* > > > @@ -821,29 +835,24 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, > > > */ > > > spin_unlock_irq(&cma->lock); > > > > > > - pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit); > > > mutex_lock(&cma->alloc_mutex); > > > ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp); > > > mutex_unlock(&cma->alloc_mutex); > > > - if (ret == 0) { > > > - page = pfn_to_page(pfn); > > > + if (!ret) > > > break; > > > - } > > > > > > cma_clear_bitmap(cma, cmr, pfn, count); > > > if (ret != -EBUSY) > > > break; > > > > > > pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n", > > > - __func__, pfn, pfn_to_page(pfn)); > > > + __func__, pfn, page); > > > > > > - trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn), > > > - count, align); > > > - /* try again with a bit different memory target */ > > > - start = bitmap_no + mask + 1; > > > + trace_cma_alloc_busy_retry(cma->name, pfn, page, count, align); > > > } > > > out: > > > - *pagep = page; > > > + if (!ret) > > > + *pagep = page; > > > return ret; > > > } > > > > > > @@ -882,7 +891,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, > > > */ > > > if (page) { > > > for (i = 0; i < count; i++) > > > - page_kasan_tag_reset(nth_page(page, i)); > > > + page_kasan_tag_reset(page + i); > > > } > > > > > > if (ret && !(gfp & __GFP_NOWARN)) { > > > diff --git a/mm/util.c b/mm/util.c > > > index d235b74f7aff7..0bf349b19b652 100644 > > > --- a/mm/util.c > > > +++ b/mm/util.c > > > @@ -1280,4 +1280,37 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte, > > > { > > > return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, 0); > > > } > > > + > > > +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) > > > +/** > > > + * page_range_contiguous - test whether the page range is contiguous > > > + * @page: the start of the page range. > > > + * @nr_pages: the number of pages in the range. > > > + * > > > + * Test whether the page range is contiguous, such that they can be iterated > > > + * naively, corresponding to iterating a contiguous PFN range. > > > + * > > > + * This function should primarily only be used for debug checks, or when > > > + * working with page ranges that are not naturally contiguous (e.g., pages > > > + * within a folio are). > > > + * > > > + * Returns true if contiguous, otherwise false. > > > + */ > > > +bool page_range_contiguous(const struct page *page, unsigned long nr_pages) > > > +{ > > > + const unsigned long start_pfn = page_to_pfn(page); > > > + const unsigned long end_pfn = start_pfn + nr_pages; > > > + unsigned long pfn; > > > + > > > + /* > > > + * The memmap is allocated per memory section. We need to check > > > + * each involved memory section once. > > > + */ > > > + for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION); > > > + pfn < end_pfn; pfn += PAGES_PER_SECTION) > > > + if (unlikely(page + (pfn - start_pfn) != pfn_to_page(pfn))) > > > + return false; > > > > I find this pretty confusing, my test for this is how many times I have to read > > the code to understand what it's doing :) > > > > So we have something like: > > > > (pfn of page) > > start_pfn pfn = align UP > > | | > > v v > > | section | > > <-----------------> > > pfn - start_pfn > > > > Then check page + (pfn - start_pfn) == pfn_to_page(pfn) > > > > And loop such that: > > > > (pfn of page) > > start_pfn pfn > > | | > > v v > > | section | section | > > <------------------------------------------> > > pfn - start_pfn > > > > Again check page + (pfn - start_pfn) == pfn_to_page(pfn) > > > > And so on. > > > > So the logic looks good, but it's just... that took me a hot second to > > parse :) > > > > I think a few simple fixups > > > > bool page_range_contiguous(const struct page *page, unsigned long nr_pages) > > { > > const unsigned long start_pfn = page_to_pfn(page); > > const unsigned long end_pfn = start_pfn + nr_pages; > > /* The PFN of the start of the next section. */ > > unsigned long pfn = ALIGN(start_pfn, PAGES_PER_SECTION); > > /* The page we'd expected to see if the range were contiguous. */ > > struct page *expected = page + (pfn - start_pfn); > > > > /* > > * The memmap is allocated per memory section. We need to check > > * each involved memory section once. > > */ > > for (; pfn < end_pfn; pfn += PAGES_PER_SECTION, expected += PAGES_PER_SECTION) > > if (unlikely(expected != pfn_to_page(pfn))) > > return false; > > return true; > > } > > > > Hm, I prefer my variant, especially where the pfn is calculated in the for loop. Likely a > matter of personal taste. Sure this is always a factor in code :) > > But I can see why skipping the first section might be a surprise when not > having the semantics of ALIGN() in the cache. Yup! > > So I'll add the following on top: > > diff --git a/mm/util.c b/mm/util.c > index 0bf349b19b652..fbdb73aaf35fe 100644 > --- a/mm/util.c > +++ b/mm/util.c > @@ -1303,8 +1303,10 @@ bool page_range_contiguous(const struct page *page, unsigned long nr_pages) > unsigned long pfn; > /* > - * The memmap is allocated per memory section. We need to check > - * each involved memory section once. > + * The memmap is allocated per memory section, so no need to check > + * within the first section. However, we need to check each other > + * spanned memory section once, making sure the first page in a > + * section could similarly be reached by just iterating pages. > */ > for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION); > pfn < end_pfn; pfn += PAGES_PER_SECTION) Cool this helps clarify things, that'll do fine! > > Thanks! > > -- > Cheers > > David / dhildenb > > Cheers, Lorenzo
© 2016 - 2025 Red Hat, Inc.