Let's disallow handing out PFN ranges with non-contiguous pages, so we
can remove the nth-page usage in __cma_alloc(), and so any callers don't
have to worry about that either when wanting to blindly iterate pages.
This is really only a problem in configs with SPARSEMEM but without
SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some
cases.
Will this cause harm? Probably not, because it's mostly 32bit that does
not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could
look into allocating the memmap for the memory sections spanned by a
single CMA region in one go from memblock.
Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
include/linux/mm.h | 6 ++++++
mm/cma.c | 39 ++++++++++++++++++++++++---------------
mm/util.c | 33 +++++++++++++++++++++++++++++++++
3 files changed, 63 insertions(+), 15 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f6880e3225c5c..2ca1eb2db63ec 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes;
extern unsigned long sysctl_admin_reserve_kbytes;
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+bool page_range_contiguous(const struct page *page, unsigned long nr_pages);
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
#else
#define nth_page(page,n) ((page) + (n))
+static inline bool page_range_contiguous(const struct page *page,
+ unsigned long nr_pages)
+{
+ return true;
+}
#endif
/* to align the pointer to the (next) page boundary */
diff --git a/mm/cma.c b/mm/cma.c
index e56ec64d0567e..813e6dc7b0954 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -780,10 +780,8 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
unsigned long count, unsigned int align,
struct page **pagep, gfp_t gfp)
{
- unsigned long mask, offset;
- unsigned long pfn = -1;
- unsigned long start = 0;
unsigned long bitmap_maxno, bitmap_no, bitmap_count;
+ unsigned long start, pfn, mask, offset;
int ret = -EBUSY;
struct page *page = NULL;
@@ -795,7 +793,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
if (bitmap_count > bitmap_maxno)
goto out;
- for (;;) {
+ for (start = 0; ; start = bitmap_no + mask + 1) {
spin_lock_irq(&cma->lock);
/*
* If the request is larger than the available number
@@ -812,6 +810,22 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
spin_unlock_irq(&cma->lock);
break;
}
+
+ pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
+ page = pfn_to_page(pfn);
+
+ /*
+ * Do not hand out page ranges that are not contiguous, so
+ * callers can just iterate the pages without having to worry
+ * about these corner cases.
+ */
+ if (!page_range_contiguous(page, count)) {
+ spin_unlock_irq(&cma->lock);
+ pr_warn_ratelimited("%s: %s: skipping incompatible area [0x%lx-0x%lx]",
+ __func__, cma->name, pfn, pfn + count - 1);
+ continue;
+ }
+
bitmap_set(cmr->bitmap, bitmap_no, bitmap_count);
cma->available_count -= count;
/*
@@ -821,29 +835,24 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
*/
spin_unlock_irq(&cma->lock);
- pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
mutex_lock(&cma->alloc_mutex);
ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp);
mutex_unlock(&cma->alloc_mutex);
- if (ret == 0) {
- page = pfn_to_page(pfn);
+ if (!ret)
break;
- }
cma_clear_bitmap(cma, cmr, pfn, count);
if (ret != -EBUSY)
break;
pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n",
- __func__, pfn, pfn_to_page(pfn));
+ __func__, pfn, page);
- trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
- count, align);
- /* try again with a bit different memory target */
- start = bitmap_no + mask + 1;
+ trace_cma_alloc_busy_retry(cma->name, pfn, page, count, align);
}
out:
- *pagep = page;
+ if (!ret)
+ *pagep = page;
return ret;
}
@@ -882,7 +891,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count,
*/
if (page) {
for (i = 0; i < count; i++)
- page_kasan_tag_reset(nth_page(page, i));
+ page_kasan_tag_reset(page + i);
}
if (ret && !(gfp & __GFP_NOWARN)) {
diff --git a/mm/util.c b/mm/util.c
index d235b74f7aff7..0bf349b19b652 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1280,4 +1280,37 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte,
{
return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, 0);
}
+
+#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+/**
+ * page_range_contiguous - test whether the page range is contiguous
+ * @page: the start of the page range.
+ * @nr_pages: the number of pages in the range.
+ *
+ * Test whether the page range is contiguous, such that they can be iterated
+ * naively, corresponding to iterating a contiguous PFN range.
+ *
+ * This function should primarily only be used for debug checks, or when
+ * working with page ranges that are not naturally contiguous (e.g., pages
+ * within a folio are).
+ *
+ * Returns true if contiguous, otherwise false.
+ */
+bool page_range_contiguous(const struct page *page, unsigned long nr_pages)
+{
+ const unsigned long start_pfn = page_to_pfn(page);
+ const unsigned long end_pfn = start_pfn + nr_pages;
+ unsigned long pfn;
+
+ /*
+ * The memmap is allocated per memory section. We need to check
+ * each involved memory section once.
+ */
+ for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
+ pfn < end_pfn; pfn += PAGES_PER_SECTION)
+ if (unlikely(page + (pfn - start_pfn) != pfn_to_page(pfn)))
+ return false;
+ return true;
+}
+#endif
#endif /* CONFIG_MMU */
--
2.50.1
On Thu, Aug 28, 2025 at 12:01:25AM +0200, David Hildenbrand wrote:
> Let's disallow handing out PFN ranges with non-contiguous pages, so we
> can remove the nth-page usage in __cma_alloc(), and so any callers don't
> have to worry about that either when wanting to blindly iterate pages.
>
> This is really only a problem in configs with SPARSEMEM but without
> SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some
> cases.
I'm guessing this is something that we don't need to worry about in
reality?
>
> Will this cause harm? Probably not, because it's mostly 32bit that does
> not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could
> look into allocating the memmap for the memory sections spanned by a
> single CMA region in one go from memblock.
>
> Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>
LGTM other than refactoring point below.
CMA stuff looks fine afaict after staring at it for a while, on proviso
that handing out ranges within the same section is always going to be the
case.
Anyway overall,
LGTM, so:
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> ---
> include/linux/mm.h | 6 ++++++
> mm/cma.c | 39 ++++++++++++++++++++++++---------------
> mm/util.c | 33 +++++++++++++++++++++++++++++++++
> 3 files changed, 63 insertions(+), 15 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index f6880e3225c5c..2ca1eb2db63ec 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes;
> extern unsigned long sysctl_admin_reserve_kbytes;
>
> #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
> +bool page_range_contiguous(const struct page *page, unsigned long nr_pages);
> #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
> #else
> #define nth_page(page,n) ((page) + (n))
> +static inline bool page_range_contiguous(const struct page *page,
> + unsigned long nr_pages)
> +{
> + return true;
> +}
> #endif
>
> /* to align the pointer to the (next) page boundary */
> diff --git a/mm/cma.c b/mm/cma.c
> index e56ec64d0567e..813e6dc7b0954 100644
> --- a/mm/cma.c
> +++ b/mm/cma.c
> @@ -780,10 +780,8 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
> unsigned long count, unsigned int align,
> struct page **pagep, gfp_t gfp)
> {
> - unsigned long mask, offset;
> - unsigned long pfn = -1;
> - unsigned long start = 0;
> unsigned long bitmap_maxno, bitmap_no, bitmap_count;
> + unsigned long start, pfn, mask, offset;
> int ret = -EBUSY;
> struct page *page = NULL;
>
> @@ -795,7 +793,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
> if (bitmap_count > bitmap_maxno)
> goto out;
>
> - for (;;) {
> + for (start = 0; ; start = bitmap_no + mask + 1) {
> spin_lock_irq(&cma->lock);
> /*
> * If the request is larger than the available number
> @@ -812,6 +810,22 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
> spin_unlock_irq(&cma->lock);
> break;
> }
> +
> + pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
> + page = pfn_to_page(pfn);
> +
> + /*
> + * Do not hand out page ranges that are not contiguous, so
> + * callers can just iterate the pages without having to worry
> + * about these corner cases.
> + */
> + if (!page_range_contiguous(page, count)) {
> + spin_unlock_irq(&cma->lock);
> + pr_warn_ratelimited("%s: %s: skipping incompatible area [0x%lx-0x%lx]",
> + __func__, cma->name, pfn, pfn + count - 1);
> + continue;
> + }
> +
> bitmap_set(cmr->bitmap, bitmap_no, bitmap_count);
> cma->available_count -= count;
> /*
> @@ -821,29 +835,24 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
> */
> spin_unlock_irq(&cma->lock);
>
> - pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
> mutex_lock(&cma->alloc_mutex);
> ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp);
> mutex_unlock(&cma->alloc_mutex);
> - if (ret == 0) {
> - page = pfn_to_page(pfn);
> + if (!ret)
> break;
> - }
>
> cma_clear_bitmap(cma, cmr, pfn, count);
> if (ret != -EBUSY)
> break;
>
> pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n",
> - __func__, pfn, pfn_to_page(pfn));
> + __func__, pfn, page);
>
> - trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
> - count, align);
> - /* try again with a bit different memory target */
> - start = bitmap_no + mask + 1;
> + trace_cma_alloc_busy_retry(cma->name, pfn, page, count, align);
> }
> out:
> - *pagep = page;
> + if (!ret)
> + *pagep = page;
> return ret;
> }
>
> @@ -882,7 +891,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count,
> */
> if (page) {
> for (i = 0; i < count; i++)
> - page_kasan_tag_reset(nth_page(page, i));
> + page_kasan_tag_reset(page + i);
> }
>
> if (ret && !(gfp & __GFP_NOWARN)) {
> diff --git a/mm/util.c b/mm/util.c
> index d235b74f7aff7..0bf349b19b652 100644
> --- a/mm/util.c
> +++ b/mm/util.c
> @@ -1280,4 +1280,37 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte,
> {
> return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, 0);
> }
> +
> +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
> +/**
> + * page_range_contiguous - test whether the page range is contiguous
> + * @page: the start of the page range.
> + * @nr_pages: the number of pages in the range.
> + *
> + * Test whether the page range is contiguous, such that they can be iterated
> + * naively, corresponding to iterating a contiguous PFN range.
> + *
> + * This function should primarily only be used for debug checks, or when
> + * working with page ranges that are not naturally contiguous (e.g., pages
> + * within a folio are).
> + *
> + * Returns true if contiguous, otherwise false.
> + */
> +bool page_range_contiguous(const struct page *page, unsigned long nr_pages)
> +{
> + const unsigned long start_pfn = page_to_pfn(page);
> + const unsigned long end_pfn = start_pfn + nr_pages;
> + unsigned long pfn;
> +
> + /*
> + * The memmap is allocated per memory section. We need to check
> + * each involved memory section once.
> + */
> + for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
> + pfn < end_pfn; pfn += PAGES_PER_SECTION)
> + if (unlikely(page + (pfn - start_pfn) != pfn_to_page(pfn)))
> + return false;
I find this pretty confusing, my test for this is how many times I have to read
the code to understand what it's doing :)
So we have something like:
(pfn of page)
start_pfn pfn = align UP
| |
v v
| section |
<----------------->
pfn - start_pfn
Then check page + (pfn - start_pfn) == pfn_to_page(pfn)
And loop such that:
(pfn of page)
start_pfn pfn
| |
v v
| section | section |
<------------------------------------------>
pfn - start_pfn
Again check page + (pfn - start_pfn) == pfn_to_page(pfn)
And so on.
So the logic looks good, but it's just... that took me a hot second to
parse :)
I think a few simple fixups
bool page_range_contiguous(const struct page *page, unsigned long nr_pages)
{
const unsigned long start_pfn = page_to_pfn(page);
const unsigned long end_pfn = start_pfn + nr_pages;
/* The PFN of the start of the next section. */
unsigned long pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
/* The page we'd expected to see if the range were contiguous. */
struct page *expected = page + (pfn - start_pfn);
/*
* The memmap is allocated per memory section. We need to check
* each involved memory section once.
*/
for (; pfn < end_pfn; pfn += PAGES_PER_SECTION, expected += PAGES_PER_SECTION)
if (unlikely(expected != pfn_to_page(pfn)))
return false;
return true;
}
> + return true;
> +}
> +#endif
> #endif /* CONFIG_MMU */
> --
> 2.50.1
>
On 28.08.25 19:28, Lorenzo Stoakes wrote:
> On Thu, Aug 28, 2025 at 12:01:25AM +0200, David Hildenbrand wrote:
>> Let's disallow handing out PFN ranges with non-contiguous pages, so we
>> can remove the nth-page usage in __cma_alloc(), and so any callers don't
>> have to worry about that either when wanting to blindly iterate pages.
>>
>> This is really only a problem in configs with SPARSEMEM but without
>> SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some
>> cases.
>
> I'm guessing this is something that we don't need to worry about in
> reality?
That my theory yes.
>
>>
>> Will this cause harm? Probably not, because it's mostly 32bit that does
>> not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could
>> look into allocating the memmap for the memory sections spanned by a
>> single CMA region in one go from memblock.
>>
>> Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>
> LGTM other than refactoring point below.
>
> CMA stuff looks fine afaict after staring at it for a while, on proviso
> that handing out ranges within the same section is always going to be the
> case.
>
> Anyway overall,
>
> LGTM, so:
>
> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>
>
>> ---
>> include/linux/mm.h | 6 ++++++
>> mm/cma.c | 39 ++++++++++++++++++++++++---------------
>> mm/util.c | 33 +++++++++++++++++++++++++++++++++
>> 3 files changed, 63 insertions(+), 15 deletions(-)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index f6880e3225c5c..2ca1eb2db63ec 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes;
>> extern unsigned long sysctl_admin_reserve_kbytes;
>>
>> #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
>> +bool page_range_contiguous(const struct page *page, unsigned long nr_pages);
>> #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
>> #else
>> #define nth_page(page,n) ((page) + (n))
>> +static inline bool page_range_contiguous(const struct page *page,
>> + unsigned long nr_pages)
>> +{
>> + return true;
>> +}
>> #endif
>>
>> /* to align the pointer to the (next) page boundary */
>> diff --git a/mm/cma.c b/mm/cma.c
>> index e56ec64d0567e..813e6dc7b0954 100644
>> --- a/mm/cma.c
>> +++ b/mm/cma.c
>> @@ -780,10 +780,8 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
>> unsigned long count, unsigned int align,
>> struct page **pagep, gfp_t gfp)
>> {
>> - unsigned long mask, offset;
>> - unsigned long pfn = -1;
>> - unsigned long start = 0;
>> unsigned long bitmap_maxno, bitmap_no, bitmap_count;
>> + unsigned long start, pfn, mask, offset;
>> int ret = -EBUSY;
>> struct page *page = NULL;
>>
>> @@ -795,7 +793,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
>> if (bitmap_count > bitmap_maxno)
>> goto out;
>>
>> - for (;;) {
>> + for (start = 0; ; start = bitmap_no + mask + 1) {
>> spin_lock_irq(&cma->lock);
>> /*
>> * If the request is larger than the available number
>> @@ -812,6 +810,22 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
>> spin_unlock_irq(&cma->lock);
>> break;
>> }
>> +
>> + pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
>> + page = pfn_to_page(pfn);
>> +
>> + /*
>> + * Do not hand out page ranges that are not contiguous, so
>> + * callers can just iterate the pages without having to worry
>> + * about these corner cases.
>> + */
>> + if (!page_range_contiguous(page, count)) {
>> + spin_unlock_irq(&cma->lock);
>> + pr_warn_ratelimited("%s: %s: skipping incompatible area [0x%lx-0x%lx]",
>> + __func__, cma->name, pfn, pfn + count - 1);
>> + continue;
>> + }
>> +
>> bitmap_set(cmr->bitmap, bitmap_no, bitmap_count);
>> cma->available_count -= count;
>> /*
>> @@ -821,29 +835,24 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
>> */
>> spin_unlock_irq(&cma->lock);
>>
>> - pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
>> mutex_lock(&cma->alloc_mutex);
>> ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp);
>> mutex_unlock(&cma->alloc_mutex);
>> - if (ret == 0) {
>> - page = pfn_to_page(pfn);
>> + if (!ret)
>> break;
>> - }
>>
>> cma_clear_bitmap(cma, cmr, pfn, count);
>> if (ret != -EBUSY)
>> break;
>>
>> pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n",
>> - __func__, pfn, pfn_to_page(pfn));
>> + __func__, pfn, page);
>>
>> - trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
>> - count, align);
>> - /* try again with a bit different memory target */
>> - start = bitmap_no + mask + 1;
>> + trace_cma_alloc_busy_retry(cma->name, pfn, page, count, align);
>> }
>> out:
>> - *pagep = page;
>> + if (!ret)
>> + *pagep = page;
>> return ret;
>> }
>>
>> @@ -882,7 +891,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count,
>> */
>> if (page) {
>> for (i = 0; i < count; i++)
>> - page_kasan_tag_reset(nth_page(page, i));
>> + page_kasan_tag_reset(page + i);
>> }
>>
>> if (ret && !(gfp & __GFP_NOWARN)) {
>> diff --git a/mm/util.c b/mm/util.c
>> index d235b74f7aff7..0bf349b19b652 100644
>> --- a/mm/util.c
>> +++ b/mm/util.c
>> @@ -1280,4 +1280,37 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte,
>> {
>> return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, 0);
>> }
>> +
>> +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
>> +/**
>> + * page_range_contiguous - test whether the page range is contiguous
>> + * @page: the start of the page range.
>> + * @nr_pages: the number of pages in the range.
>> + *
>> + * Test whether the page range is contiguous, such that they can be iterated
>> + * naively, corresponding to iterating a contiguous PFN range.
>> + *
>> + * This function should primarily only be used for debug checks, or when
>> + * working with page ranges that are not naturally contiguous (e.g., pages
>> + * within a folio are).
>> + *
>> + * Returns true if contiguous, otherwise false.
>> + */
>> +bool page_range_contiguous(const struct page *page, unsigned long nr_pages)
>> +{
>> + const unsigned long start_pfn = page_to_pfn(page);
>> + const unsigned long end_pfn = start_pfn + nr_pages;
>> + unsigned long pfn;
>> +
>> + /*
>> + * The memmap is allocated per memory section. We need to check
>> + * each involved memory section once.
>> + */
>> + for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
>> + pfn < end_pfn; pfn += PAGES_PER_SECTION)
>> + if (unlikely(page + (pfn - start_pfn) != pfn_to_page(pfn)))
>> + return false;
>
> I find this pretty confusing, my test for this is how many times I have to read
> the code to understand what it's doing :)
>
> So we have something like:
>
> (pfn of page)
> start_pfn pfn = align UP
> | |
> v v
> | section |
> <----------------->
> pfn - start_pfn
>
> Then check page + (pfn - start_pfn) == pfn_to_page(pfn)
>
> And loop such that:
>
> (pfn of page)
> start_pfn pfn
> | |
> v v
> | section | section |
> <------------------------------------------>
> pfn - start_pfn
>
> Again check page + (pfn - start_pfn) == pfn_to_page(pfn)
>
> And so on.
>
> So the logic looks good, but it's just... that took me a hot second to
> parse :)
>
> I think a few simple fixups
>
> bool page_range_contiguous(const struct page *page, unsigned long nr_pages)
> {
> const unsigned long start_pfn = page_to_pfn(page);
> const unsigned long end_pfn = start_pfn + nr_pages;
> /* The PFN of the start of the next section. */
> unsigned long pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
> /* The page we'd expected to see if the range were contiguous. */
> struct page *expected = page + (pfn - start_pfn);
>
> /*
> * The memmap is allocated per memory section. We need to check
> * each involved memory section once.
> */
> for (; pfn < end_pfn; pfn += PAGES_PER_SECTION, expected += PAGES_PER_SECTION)
> if (unlikely(expected != pfn_to_page(pfn)))
> return false;
> return true;
> }
>
Hm, I prefer my variant, especially where the pfn is calculated in the for loop. Likely a
matter of personal taste.
But I can see why skipping the first section might be a surprise when not
having the semantics of ALIGN() in the cache.
So I'll add the following on top:
diff --git a/mm/util.c b/mm/util.c
index 0bf349b19b652..fbdb73aaf35fe 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1303,8 +1303,10 @@ bool page_range_contiguous(const struct page *page, unsigned long nr_pages)
unsigned long pfn;
/*
- * The memmap is allocated per memory section. We need to check
- * each involved memory section once.
+ * The memmap is allocated per memory section, so no need to check
+ * within the first section. However, we need to check each other
+ * spanned memory section once, making sure the first page in a
+ * section could similarly be reached by just iterating pages.
*/
for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
pfn < end_pfn; pfn += PAGES_PER_SECTION)
Thanks!
--
Cheers
David / dhildenb
On Fri, Aug 29, 2025 at 04:34:54PM +0200, David Hildenbrand wrote:
> On 28.08.25 19:28, Lorenzo Stoakes wrote:
> > On Thu, Aug 28, 2025 at 12:01:25AM +0200, David Hildenbrand wrote:
> > > Let's disallow handing out PFN ranges with non-contiguous pages, so we
> > > can remove the nth-page usage in __cma_alloc(), and so any callers don't
> > > have to worry about that either when wanting to blindly iterate pages.
> > >
> > > This is really only a problem in configs with SPARSEMEM but without
> > > SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some
> > > cases.
> >
> > I'm guessing this is something that we don't need to worry about in
> > reality?
>
> That my theory yes.
Let's hope correct haha, but seems reasonable.
>
> >
> > >
> > > Will this cause harm? Probably not, because it's mostly 32bit that does
> > > not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could
> > > look into allocating the memmap for the memory sections spanned by a
> > > single CMA region in one go from memblock.
> > >
> > > Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
> > > Signed-off-by: David Hildenbrand <david@redhat.com>
> >
> > LGTM other than refactoring point below.
> >
> > CMA stuff looks fine afaict after staring at it for a while, on proviso
> > that handing out ranges within the same section is always going to be the
> > case.
> >
> > Anyway overall,
> >
> > LGTM, so:
> >
> > Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> >
> >
> > > ---
> > > include/linux/mm.h | 6 ++++++
> > > mm/cma.c | 39 ++++++++++++++++++++++++---------------
> > > mm/util.c | 33 +++++++++++++++++++++++++++++++++
> > > 3 files changed, 63 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > > index f6880e3225c5c..2ca1eb2db63ec 100644
> > > --- a/include/linux/mm.h
> > > +++ b/include/linux/mm.h
> > > @@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes;
> > > extern unsigned long sysctl_admin_reserve_kbytes;
> > >
> > > #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
> > > +bool page_range_contiguous(const struct page *page, unsigned long nr_pages);
> > > #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
> > > #else
> > > #define nth_page(page,n) ((page) + (n))
> > > +static inline bool page_range_contiguous(const struct page *page,
> > > + unsigned long nr_pages)
> > > +{
> > > + return true;
> > > +}
> > > #endif
> > >
> > > /* to align the pointer to the (next) page boundary */
> > > diff --git a/mm/cma.c b/mm/cma.c
> > > index e56ec64d0567e..813e6dc7b0954 100644
> > > --- a/mm/cma.c
> > > +++ b/mm/cma.c
> > > @@ -780,10 +780,8 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
> > > unsigned long count, unsigned int align,
> > > struct page **pagep, gfp_t gfp)
> > > {
> > > - unsigned long mask, offset;
> > > - unsigned long pfn = -1;
> > > - unsigned long start = 0;
> > > unsigned long bitmap_maxno, bitmap_no, bitmap_count;
> > > + unsigned long start, pfn, mask, offset;
> > > int ret = -EBUSY;
> > > struct page *page = NULL;
> > >
> > > @@ -795,7 +793,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
> > > if (bitmap_count > bitmap_maxno)
> > > goto out;
> > >
> > > - for (;;) {
> > > + for (start = 0; ; start = bitmap_no + mask + 1) {
> > > spin_lock_irq(&cma->lock);
> > > /*
> > > * If the request is larger than the available number
> > > @@ -812,6 +810,22 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
> > > spin_unlock_irq(&cma->lock);
> > > break;
> > > }
> > > +
> > > + pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
> > > + page = pfn_to_page(pfn);
> > > +
> > > + /*
> > > + * Do not hand out page ranges that are not contiguous, so
> > > + * callers can just iterate the pages without having to worry
> > > + * about these corner cases.
> > > + */
> > > + if (!page_range_contiguous(page, count)) {
> > > + spin_unlock_irq(&cma->lock);
> > > + pr_warn_ratelimited("%s: %s: skipping incompatible area [0x%lx-0x%lx]",
> > > + __func__, cma->name, pfn, pfn + count - 1);
> > > + continue;
> > > + }
> > > +
> > > bitmap_set(cmr->bitmap, bitmap_no, bitmap_count);
> > > cma->available_count -= count;
> > > /*
> > > @@ -821,29 +835,24 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
> > > */
> > > spin_unlock_irq(&cma->lock);
> > >
> > > - pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
> > > mutex_lock(&cma->alloc_mutex);
> > > ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp);
> > > mutex_unlock(&cma->alloc_mutex);
> > > - if (ret == 0) {
> > > - page = pfn_to_page(pfn);
> > > + if (!ret)
> > > break;
> > > - }
> > >
> > > cma_clear_bitmap(cma, cmr, pfn, count);
> > > if (ret != -EBUSY)
> > > break;
> > >
> > > pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n",
> > > - __func__, pfn, pfn_to_page(pfn));
> > > + __func__, pfn, page);
> > >
> > > - trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
> > > - count, align);
> > > - /* try again with a bit different memory target */
> > > - start = bitmap_no + mask + 1;
> > > + trace_cma_alloc_busy_retry(cma->name, pfn, page, count, align);
> > > }
> > > out:
> > > - *pagep = page;
> > > + if (!ret)
> > > + *pagep = page;
> > > return ret;
> > > }
> > >
> > > @@ -882,7 +891,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count,
> > > */
> > > if (page) {
> > > for (i = 0; i < count; i++)
> > > - page_kasan_tag_reset(nth_page(page, i));
> > > + page_kasan_tag_reset(page + i);
> > > }
> > >
> > > if (ret && !(gfp & __GFP_NOWARN)) {
> > > diff --git a/mm/util.c b/mm/util.c
> > > index d235b74f7aff7..0bf349b19b652 100644
> > > --- a/mm/util.c
> > > +++ b/mm/util.c
> > > @@ -1280,4 +1280,37 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte,
> > > {
> > > return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, 0);
> > > }
> > > +
> > > +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
> > > +/**
> > > + * page_range_contiguous - test whether the page range is contiguous
> > > + * @page: the start of the page range.
> > > + * @nr_pages: the number of pages in the range.
> > > + *
> > > + * Test whether the page range is contiguous, such that they can be iterated
> > > + * naively, corresponding to iterating a contiguous PFN range.
> > > + *
> > > + * This function should primarily only be used for debug checks, or when
> > > + * working with page ranges that are not naturally contiguous (e.g., pages
> > > + * within a folio are).
> > > + *
> > > + * Returns true if contiguous, otherwise false.
> > > + */
> > > +bool page_range_contiguous(const struct page *page, unsigned long nr_pages)
> > > +{
> > > + const unsigned long start_pfn = page_to_pfn(page);
> > > + const unsigned long end_pfn = start_pfn + nr_pages;
> > > + unsigned long pfn;
> > > +
> > > + /*
> > > + * The memmap is allocated per memory section. We need to check
> > > + * each involved memory section once.
> > > + */
> > > + for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
> > > + pfn < end_pfn; pfn += PAGES_PER_SECTION)
> > > + if (unlikely(page + (pfn - start_pfn) != pfn_to_page(pfn)))
> > > + return false;
> >
> > I find this pretty confusing, my test for this is how many times I have to read
> > the code to understand what it's doing :)
> >
> > So we have something like:
> >
> > (pfn of page)
> > start_pfn pfn = align UP
> > | |
> > v v
> > | section |
> > <----------------->
> > pfn - start_pfn
> >
> > Then check page + (pfn - start_pfn) == pfn_to_page(pfn)
> >
> > And loop such that:
> >
> > (pfn of page)
> > start_pfn pfn
> > | |
> > v v
> > | section | section |
> > <------------------------------------------>
> > pfn - start_pfn
> >
> > Again check page + (pfn - start_pfn) == pfn_to_page(pfn)
> >
> > And so on.
> >
> > So the logic looks good, but it's just... that took me a hot second to
> > parse :)
> >
> > I think a few simple fixups
> >
> > bool page_range_contiguous(const struct page *page, unsigned long nr_pages)
> > {
> > const unsigned long start_pfn = page_to_pfn(page);
> > const unsigned long end_pfn = start_pfn + nr_pages;
> > /* The PFN of the start of the next section. */
> > unsigned long pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
> > /* The page we'd expected to see if the range were contiguous. */
> > struct page *expected = page + (pfn - start_pfn);
> >
> > /*
> > * The memmap is allocated per memory section. We need to check
> > * each involved memory section once.
> > */
> > for (; pfn < end_pfn; pfn += PAGES_PER_SECTION, expected += PAGES_PER_SECTION)
> > if (unlikely(expected != pfn_to_page(pfn)))
> > return false;
> > return true;
> > }
> >
>
> Hm, I prefer my variant, especially where the pfn is calculated in the for loop. Likely a
> matter of personal taste.
Sure this is always a factor in code :)
>
> But I can see why skipping the first section might be a surprise when not
> having the semantics of ALIGN() in the cache.
Yup!
>
> So I'll add the following on top:
>
> diff --git a/mm/util.c b/mm/util.c
> index 0bf349b19b652..fbdb73aaf35fe 100644
> --- a/mm/util.c
> +++ b/mm/util.c
> @@ -1303,8 +1303,10 @@ bool page_range_contiguous(const struct page *page, unsigned long nr_pages)
> unsigned long pfn;
> /*
> - * The memmap is allocated per memory section. We need to check
> - * each involved memory section once.
> + * The memmap is allocated per memory section, so no need to check
> + * within the first section. However, we need to check each other
> + * spanned memory section once, making sure the first page in a
> + * section could similarly be reached by just iterating pages.
> */
> for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
> pfn < end_pfn; pfn += PAGES_PER_SECTION)
Cool this helps clarify things, that'll do fine!
>
> Thanks!
>
> --
> Cheers
>
> David / dhildenb
>
>
Cheers, Lorenzo
© 2016 - 2026 Red Hat, Inc.