Today, cma_alloc() is used to allocate a contiguous memory region. The
function allows the caller to specify the number of pages to allocate, but
not the starting address. cma_alloc() will walk over the entire CMA region
trying to allocate the first available range of the specified size.
Introduce cma_alloc_range(), which makes CMA more versatile by allowing the
caller to specify a particular range in the CMA region, defined by the
start pfn and the size.
arm64 will make use of this function when tag storage management will be
implemented: cma_alloc_range() will be used to reserve the tag storage
associated with a tagged page.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
---
Changes since rfc v2:
* New patch.
include/linux/cma.h | 2 +
include/trace/events/cma.h | 59 ++++++++++++++++++++++++++
mm/cma.c | 86 ++++++++++++++++++++++++++++++++++++++
3 files changed, 147 insertions(+)
diff --git a/include/linux/cma.h b/include/linux/cma.h
index 63873b93deaa..e32559da6942 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -50,6 +50,8 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
struct cma **res_cma);
extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align,
bool no_warn);
+extern int cma_alloc_range(struct cma *cma, unsigned long start, unsigned long count,
+ unsigned tries, gfp_t gfp);
extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count);
extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h
index 25103e67737c..a89af313a572 100644
--- a/include/trace/events/cma.h
+++ b/include/trace/events/cma.h
@@ -36,6 +36,65 @@ TRACE_EVENT(cma_release,
__entry->count)
);
+TRACE_EVENT(cma_alloc_range_start,
+
+ TP_PROTO(const char *name, unsigned long start, unsigned long count,
+ unsigned tries),
+
+ TP_ARGS(name, start, count, tries),
+
+ TP_STRUCT__entry(
+ __string(name, name)
+ __field(unsigned long, start)
+ __field(unsigned long, count)
+ __field(unsigned, tries)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name, name);
+ __entry->start = start;
+ __entry->count = count;
+ __entry->tries = tries;
+ ),
+
+ TP_printk("name=%s start=%lx count=%lu tries=%u",
+ __get_str(name),
+ __entry->start,
+ __entry->count,
+ __entry->tries)
+);
+
+TRACE_EVENT(cma_alloc_range_finish,
+
+ TP_PROTO(const char *name, unsigned long start, unsigned long count,
+ unsigned attempts, int err),
+
+ TP_ARGS(name, start, count, attempts, err),
+
+ TP_STRUCT__entry(
+ __string(name, name)
+ __field(unsigned long, start)
+ __field(unsigned long, count)
+ __field(unsigned, attempts)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name, name);
+ __entry->start = start;
+ __entry->count = count;
+ __entry->attempts = attempts;
+ __entry->err = err;
+ ),
+
+ TP_printk("name=%s start=%lx count=%lu attempts=%u err=%d",
+ __get_str(name),
+ __entry->start,
+ __entry->count,
+ __entry->attempts,
+ __entry->err)
+);
+
TRACE_EVENT(cma_alloc_start,
TP_PROTO(const char *name, unsigned long count, unsigned int align),
diff --git a/mm/cma.c b/mm/cma.c
index 543bb6b3be8e..4a0f68b9443b 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -416,6 +416,92 @@ static void cma_debug_show_areas(struct cma *cma)
static inline void cma_debug_show_areas(struct cma *cma) { }
#endif
+/**
+ * cma_alloc_range() - allocate pages in a specific range
+ * @cma: Contiguous memory region for which the allocation is performed.
+ * @start: Starting pfn of the allocation.
+ * @count: Requested number of pages
+ * @tries: Number of tries if the range is busy
+ * @no_warn: Avoid printing message about failed allocation
+ *
+ * This function allocates part of contiguous memory from a specific contiguous
+ * memory area, from the specified starting address. The 'start' pfn and the the
+ * 'count' number of pages must be aligned to the CMA bitmap order per bit.
+ */
+int cma_alloc_range(struct cma *cma, unsigned long start, unsigned long count,
+ unsigned tries, gfp_t gfp)
+{
+ unsigned long bitmap_maxno, bitmap_no, bitmap_start, bitmap_count;
+ unsigned long i = 0;
+ struct page *page;
+ int err = -EINVAL;
+
+ if (!cma || !cma->count || !cma->bitmap)
+ goto out_stats;
+
+ trace_cma_alloc_range_start(cma->name, start, count, tries);
+
+ if (!count || start < cma->base_pfn ||
+ start + count > cma->base_pfn + cma->count)
+ goto out_stats;
+
+ if (!IS_ALIGNED(start | count, 1 << cma->order_per_bit))
+ goto out_stats;
+
+ bitmap_start = (start - cma->base_pfn) >> cma->order_per_bit;
+ bitmap_maxno = cma_bitmap_maxno(cma);
+ bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+ spin_lock_irq(&cma->lock);
+ bitmap_no = bitmap_find_next_zero_area(cma->bitmap, bitmap_maxno,
+ bitmap_start, bitmap_count, 0);
+ if (bitmap_no != bitmap_start) {
+ spin_unlock_irq(&cma->lock);
+ err = -EEXIST;
+ goto out_stats;
+ }
+ bitmap_set(cma->bitmap, bitmap_start, bitmap_count);
+ spin_unlock_irq(&cma->lock);
+
+ for (i = 0; i < tries; i++) {
+ mutex_lock(&cma_mutex);
+ err = alloc_contig_range(start, start + count, MIGRATE_CMA, gfp);
+ mutex_unlock(&cma_mutex);
+
+ if (err != -EBUSY)
+ break;
+ }
+
+ if (err) {
+ cma_clear_bitmap(cma, start, count);
+ } else {
+ page = pfn_to_page(start);
+
+ /*
+ * CMA can allocate multiple page blocks, which results in
+ * different blocks being marked with different tags. Reset the
+ * tags to ignore those page blocks.
+ */
+ for (i = 0; i < count; i++)
+ page_kasan_tag_reset(nth_page(page, i));
+ }
+
+out_stats:
+ trace_cma_alloc_range_finish(cma->name, start, count, i, err);
+
+ if (err) {
+ count_vm_events(CMA_ALLOC_FAIL, count);
+ if (cma)
+ cma_sysfs_account_fail_pages(cma, count);
+ } else {
+ count_vm_events(CMA_ALLOC_SUCCESS, count);
+ cma_sysfs_account_success_pages(cma, count);
+ }
+
+ return err;
+}
+
+
/**
* cma_alloc() - allocate pages from contiguous area
* @cma: Contiguous memory region for which the allocation is performed.
--
2.43.0
On 1/25/24 22:12, Alexandru Elisei wrote:
> Today, cma_alloc() is used to allocate a contiguous memory region. The
> function allows the caller to specify the number of pages to allocate, but
> not the starting address. cma_alloc() will walk over the entire CMA region
> trying to allocate the first available range of the specified size.
>
> Introduce cma_alloc_range(), which makes CMA more versatile by allowing the
> caller to specify a particular range in the CMA region, defined by the
> start pfn and the size.
>
> arm64 will make use of this function when tag storage management will be
> implemented: cma_alloc_range() will be used to reserve the tag storage
> associated with a tagged page.
Basically, you would like to pass on a preferred start address and the
allocation could just fail if a contig range is not available from such
a starting address ?
Then why not just change cma_alloc() to take a new argument 'start_pfn'.
Why create a new but almost similar allocator ?
But then I am wondering why this could not be done in the arm64 platform
code itself operating on a CMA area reserved just for tag storage. Unless
this new allocator has other usage beyond MTE, this could be implemented
in the platform itself.
>
> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> ---
>
> Changes since rfc v2:
>
> * New patch.
>
> include/linux/cma.h | 2 +
> include/trace/events/cma.h | 59 ++++++++++++++++++++++++++
> mm/cma.c | 86 ++++++++++++++++++++++++++++++++++++++
> 3 files changed, 147 insertions(+)
>
> diff --git a/include/linux/cma.h b/include/linux/cma.h
> index 63873b93deaa..e32559da6942 100644
> --- a/include/linux/cma.h
> +++ b/include/linux/cma.h
> @@ -50,6 +50,8 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
> struct cma **res_cma);
> extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align,
> bool no_warn);
> +extern int cma_alloc_range(struct cma *cma, unsigned long start, unsigned long count,
> + unsigned tries, gfp_t gfp);
> extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count);
> extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
>
> diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h
> index 25103e67737c..a89af313a572 100644
> --- a/include/trace/events/cma.h
> +++ b/include/trace/events/cma.h
> @@ -36,6 +36,65 @@ TRACE_EVENT(cma_release,
> __entry->count)
> );
>
> +TRACE_EVENT(cma_alloc_range_start,
> +
> + TP_PROTO(const char *name, unsigned long start, unsigned long count,
> + unsigned tries),
> +
> + TP_ARGS(name, start, count, tries),
> +
> + TP_STRUCT__entry(
> + __string(name, name)
> + __field(unsigned long, start)
> + __field(unsigned long, count)
> + __field(unsigned, tries)
> + ),
> +
> + TP_fast_assign(
> + __assign_str(name, name);
> + __entry->start = start;
> + __entry->count = count;
> + __entry->tries = tries;
> + ),
> +
> + TP_printk("name=%s start=%lx count=%lu tries=%u",
> + __get_str(name),
> + __entry->start,
> + __entry->count,
> + __entry->tries)
> +);
> +
> +TRACE_EVENT(cma_alloc_range_finish,
> +
> + TP_PROTO(const char *name, unsigned long start, unsigned long count,
> + unsigned attempts, int err),
> +
> + TP_ARGS(name, start, count, attempts, err),
> +
> + TP_STRUCT__entry(
> + __string(name, name)
> + __field(unsigned long, start)
> + __field(unsigned long, count)
> + __field(unsigned, attempts)
> + __field(int, err)
> + ),
> +
> + TP_fast_assign(
> + __assign_str(name, name);
> + __entry->start = start;
> + __entry->count = count;
> + __entry->attempts = attempts;
> + __entry->err = err;
> + ),
> +
> + TP_printk("name=%s start=%lx count=%lu attempts=%u err=%d",
> + __get_str(name),
> + __entry->start,
> + __entry->count,
> + __entry->attempts,
> + __entry->err)
> +);
> +
> TRACE_EVENT(cma_alloc_start,
>
> TP_PROTO(const char *name, unsigned long count, unsigned int align),
> diff --git a/mm/cma.c b/mm/cma.c
> index 543bb6b3be8e..4a0f68b9443b 100644
> --- a/mm/cma.c
> +++ b/mm/cma.c
> @@ -416,6 +416,92 @@ static void cma_debug_show_areas(struct cma *cma)
> static inline void cma_debug_show_areas(struct cma *cma) { }
> #endif
>
> +/**
> + * cma_alloc_range() - allocate pages in a specific range
> + * @cma: Contiguous memory region for which the allocation is performed.
> + * @start: Starting pfn of the allocation.
> + * @count: Requested number of pages
> + * @tries: Number of tries if the range is busy
> + * @no_warn: Avoid printing message about failed allocation
> + *
> + * This function allocates part of contiguous memory from a specific contiguous
> + * memory area, from the specified starting address. The 'start' pfn and the the
> + * 'count' number of pages must be aligned to the CMA bitmap order per bit.
> + */
> +int cma_alloc_range(struct cma *cma, unsigned long start, unsigned long count,
> + unsigned tries, gfp_t gfp)
> +{
> + unsigned long bitmap_maxno, bitmap_no, bitmap_start, bitmap_count;
> + unsigned long i = 0;
> + struct page *page;
> + int err = -EINVAL;
> +
> + if (!cma || !cma->count || !cma->bitmap)
> + goto out_stats;
> +
> + trace_cma_alloc_range_start(cma->name, start, count, tries);
> +
> + if (!count || start < cma->base_pfn ||
> + start + count > cma->base_pfn + cma->count)
> + goto out_stats;
> +
> + if (!IS_ALIGNED(start | count, 1 << cma->order_per_bit))
> + goto out_stats;
> +
> + bitmap_start = (start - cma->base_pfn) >> cma->order_per_bit;
> + bitmap_maxno = cma_bitmap_maxno(cma);
> + bitmap_count = cma_bitmap_pages_to_bits(cma, count);
> +
> + spin_lock_irq(&cma->lock);
> + bitmap_no = bitmap_find_next_zero_area(cma->bitmap, bitmap_maxno,
> + bitmap_start, bitmap_count, 0);
> + if (bitmap_no != bitmap_start) {
> + spin_unlock_irq(&cma->lock);
> + err = -EEXIST;
> + goto out_stats;
> + }
> + bitmap_set(cma->bitmap, bitmap_start, bitmap_count);
> + spin_unlock_irq(&cma->lock);
> +
> + for (i = 0; i < tries; i++) {
> + mutex_lock(&cma_mutex);
> + err = alloc_contig_range(start, start + count, MIGRATE_CMA, gfp);
> + mutex_unlock(&cma_mutex);
> +
> + if (err != -EBUSY)
> + break;
> + }
> +
> + if (err) {
> + cma_clear_bitmap(cma, start, count);
> + } else {
> + page = pfn_to_page(start);
> +
> + /*
> + * CMA can allocate multiple page blocks, which results in
> + * different blocks being marked with different tags. Reset the
> + * tags to ignore those page blocks.
> + */
> + for (i = 0; i < count; i++)
> + page_kasan_tag_reset(nth_page(page, i));
> + }
> +
> +out_stats:
> + trace_cma_alloc_range_finish(cma->name, start, count, i, err);
> +
> + if (err) {
> + count_vm_events(CMA_ALLOC_FAIL, count);
> + if (cma)
> + cma_sysfs_account_fail_pages(cma, count);
> + } else {
> + count_vm_events(CMA_ALLOC_SUCCESS, count);
> + cma_sysfs_account_success_pages(cma, count);
> + }
> +
> + return err;
> +}
> +
> +
> /**
> * cma_alloc() - allocate pages from contiguous area
> * @cma: Contiguous memory region for which the allocation is performed.
Hi,
On Tue, Jan 30, 2024 at 10:50:00AM +0530, Anshuman Khandual wrote:
>
>
> On 1/25/24 22:12, Alexandru Elisei wrote:
> > Today, cma_alloc() is used to allocate a contiguous memory region. The
> > function allows the caller to specify the number of pages to allocate, but
> > not the starting address. cma_alloc() will walk over the entire CMA region
> > trying to allocate the first available range of the specified size.
> >
> > Introduce cma_alloc_range(), which makes CMA more versatile by allowing the
> > caller to specify a particular range in the CMA region, defined by the
> > start pfn and the size.
> >
> > arm64 will make use of this function when tag storage management will be
> > implemented: cma_alloc_range() will be used to reserve the tag storage
> > associated with a tagged page.
>
> Basically, you would like to pass on a preferred start address and the
> allocation could just fail if a contig range is not available from such
> a starting address ?
>
> Then why not just change cma_alloc() to take a new argument 'start_pfn'.
> Why create a new but almost similar allocator ?
I tried doing that, and I gave up because:
- It made cma_alloc() even more complex and hard to follow.
- What value should 'start_pfn' be to tell cma_alloc() that it should be
ignored? Or, to put it another way, what pfn number is invalid on **all**
platforms that Linux supports?
I can give it another go if we can come up with an invalid value for
'start_pfn'.
>
> But then I am wondering why this could not be done in the arm64 platform
> code itself operating on a CMA area reserved just for tag storage. Unless
> this new allocator has other usage beyond MTE, this could be implemented
> in the platform itself.
I had the same idea in the previous iteration, David Hildenbrand suggested
this approach [1].
[1] https://lore.kernel.org/linux-fsdevel/2aafd53f-af1f-45f3-a08c-d11962254315@redhat.com/
Thanks,
Alex
>
> >
> > Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> > ---
> >
> > Changes since rfc v2:
> >
> > * New patch.
> >
> > include/linux/cma.h | 2 +
> > include/trace/events/cma.h | 59 ++++++++++++++++++++++++++
> > mm/cma.c | 86 ++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 147 insertions(+)
> >
> > diff --git a/include/linux/cma.h b/include/linux/cma.h
> > index 63873b93deaa..e32559da6942 100644
> > --- a/include/linux/cma.h
> > +++ b/include/linux/cma.h
> > @@ -50,6 +50,8 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
> > struct cma **res_cma);
> > extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align,
> > bool no_warn);
> > +extern int cma_alloc_range(struct cma *cma, unsigned long start, unsigned long count,
> > + unsigned tries, gfp_t gfp);
> > extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count);
> > extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
> >
> > diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h
> > index 25103e67737c..a89af313a572 100644
> > --- a/include/trace/events/cma.h
> > +++ b/include/trace/events/cma.h
> > @@ -36,6 +36,65 @@ TRACE_EVENT(cma_release,
> > __entry->count)
> > );
> >
> > +TRACE_EVENT(cma_alloc_range_start,
> > +
> > + TP_PROTO(const char *name, unsigned long start, unsigned long count,
> > + unsigned tries),
> > +
> > + TP_ARGS(name, start, count, tries),
> > +
> > + TP_STRUCT__entry(
> > + __string(name, name)
> > + __field(unsigned long, start)
> > + __field(unsigned long, count)
> > + __field(unsigned, tries)
> > + ),
> > +
> > + TP_fast_assign(
> > + __assign_str(name, name);
> > + __entry->start = start;
> > + __entry->count = count;
> > + __entry->tries = tries;
> > + ),
> > +
> > + TP_printk("name=%s start=%lx count=%lu tries=%u",
> > + __get_str(name),
> > + __entry->start,
> > + __entry->count,
> > + __entry->tries)
> > +);
> > +
> > +TRACE_EVENT(cma_alloc_range_finish,
> > +
> > + TP_PROTO(const char *name, unsigned long start, unsigned long count,
> > + unsigned attempts, int err),
> > +
> > + TP_ARGS(name, start, count, attempts, err),
> > +
> > + TP_STRUCT__entry(
> > + __string(name, name)
> > + __field(unsigned long, start)
> > + __field(unsigned long, count)
> > + __field(unsigned, attempts)
> > + __field(int, err)
> > + ),
> > +
> > + TP_fast_assign(
> > + __assign_str(name, name);
> > + __entry->start = start;
> > + __entry->count = count;
> > + __entry->attempts = attempts;
> > + __entry->err = err;
> > + ),
> > +
> > + TP_printk("name=%s start=%lx count=%lu attempts=%u err=%d",
> > + __get_str(name),
> > + __entry->start,
> > + __entry->count,
> > + __entry->attempts,
> > + __entry->err)
> > +);
> > +
> > TRACE_EVENT(cma_alloc_start,
> >
> > TP_PROTO(const char *name, unsigned long count, unsigned int align),
> > diff --git a/mm/cma.c b/mm/cma.c
> > index 543bb6b3be8e..4a0f68b9443b 100644
> > --- a/mm/cma.c
> > +++ b/mm/cma.c
> > @@ -416,6 +416,92 @@ static void cma_debug_show_areas(struct cma *cma)
> > static inline void cma_debug_show_areas(struct cma *cma) { }
> > #endif
> >
> > +/**
> > + * cma_alloc_range() - allocate pages in a specific range
> > + * @cma: Contiguous memory region for which the allocation is performed.
> > + * @start: Starting pfn of the allocation.
> > + * @count: Requested number of pages
> > + * @tries: Number of tries if the range is busy
> > + * @no_warn: Avoid printing message about failed allocation
> > + *
> > + * This function allocates part of contiguous memory from a specific contiguous
> > + * memory area, from the specified starting address. The 'start' pfn and the the
> > + * 'count' number of pages must be aligned to the CMA bitmap order per bit.
> > + */
> > +int cma_alloc_range(struct cma *cma, unsigned long start, unsigned long count,
> > + unsigned tries, gfp_t gfp)
> > +{
> > + unsigned long bitmap_maxno, bitmap_no, bitmap_start, bitmap_count;
> > + unsigned long i = 0;
> > + struct page *page;
> > + int err = -EINVAL;
> > +
> > + if (!cma || !cma->count || !cma->bitmap)
> > + goto out_stats;
> > +
> > + trace_cma_alloc_range_start(cma->name, start, count, tries);
> > +
> > + if (!count || start < cma->base_pfn ||
> > + start + count > cma->base_pfn + cma->count)
> > + goto out_stats;
> > +
> > + if (!IS_ALIGNED(start | count, 1 << cma->order_per_bit))
> > + goto out_stats;
> > +
> > + bitmap_start = (start - cma->base_pfn) >> cma->order_per_bit;
> > + bitmap_maxno = cma_bitmap_maxno(cma);
> > + bitmap_count = cma_bitmap_pages_to_bits(cma, count);
> > +
> > + spin_lock_irq(&cma->lock);
> > + bitmap_no = bitmap_find_next_zero_area(cma->bitmap, bitmap_maxno,
> > + bitmap_start, bitmap_count, 0);
> > + if (bitmap_no != bitmap_start) {
> > + spin_unlock_irq(&cma->lock);
> > + err = -EEXIST;
> > + goto out_stats;
> > + }
> > + bitmap_set(cma->bitmap, bitmap_start, bitmap_count);
> > + spin_unlock_irq(&cma->lock);
> > +
> > + for (i = 0; i < tries; i++) {
> > + mutex_lock(&cma_mutex);
> > + err = alloc_contig_range(start, start + count, MIGRATE_CMA, gfp);
> > + mutex_unlock(&cma_mutex);
> > +
> > + if (err != -EBUSY)
> > + break;
> > + }
> > +
> > + if (err) {
> > + cma_clear_bitmap(cma, start, count);
> > + } else {
> > + page = pfn_to_page(start);
> > +
> > + /*
> > + * CMA can allocate multiple page blocks, which results in
> > + * different blocks being marked with different tags. Reset the
> > + * tags to ignore those page blocks.
> > + */
> > + for (i = 0; i < count; i++)
> > + page_kasan_tag_reset(nth_page(page, i));
> > + }
> > +
> > +out_stats:
> > + trace_cma_alloc_range_finish(cma->name, start, count, i, err);
> > +
> > + if (err) {
> > + count_vm_events(CMA_ALLOC_FAIL, count);
> > + if (cma)
> > + cma_sysfs_account_fail_pages(cma, count);
> > + } else {
> > + count_vm_events(CMA_ALLOC_SUCCESS, count);
> > + cma_sysfs_account_success_pages(cma, count);
> > + }
> > +
> > + return err;
> > +}
> > +
> > +
> > /**
> > * cma_alloc() - allocate pages from contiguous area
> > * @cma: Contiguous memory region for which the allocation is performed.
>
On 1/30/24 17:05, Alexandru Elisei wrote:
> Hi,
>
> On Tue, Jan 30, 2024 at 10:50:00AM +0530, Anshuman Khandual wrote:
>>
>> On 1/25/24 22:12, Alexandru Elisei wrote:
>>> Today, cma_alloc() is used to allocate a contiguous memory region. The
>>> function allows the caller to specify the number of pages to allocate, but
>>> not the starting address. cma_alloc() will walk over the entire CMA region
>>> trying to allocate the first available range of the specified size.
>>>
>>> Introduce cma_alloc_range(), which makes CMA more versatile by allowing the
>>> caller to specify a particular range in the CMA region, defined by the
>>> start pfn and the size.
>>>
>>> arm64 will make use of this function when tag storage management will be
>>> implemented: cma_alloc_range() will be used to reserve the tag storage
>>> associated with a tagged page.
>> Basically, you would like to pass on a preferred start address and the
>> allocation could just fail if a contig range is not available from such
>> a starting address ?
>>
>> Then why not just change cma_alloc() to take a new argument 'start_pfn'.
>> Why create a new but almost similar allocator ?
> I tried doing that, and I gave up because:
>
> - It made cma_alloc() even more complex and hard to follow.
>
> - What value should 'start_pfn' be to tell cma_alloc() that it should be
> ignored? Or, to put it another way, what pfn number is invalid on **all**
> platforms that Linux supports?
>
> I can give it another go if we can come up with an invalid value for
> 'start_pfn'.
Something negative might work. How about -1/-1UL ? A quick search gives
some instances such as ...
git grep "pfn == -1"
mm/mm_init.c: if (*start_pfn == -1UL)
mm/vmscan.c: if (pfn == -1)
mm/vmscan.c: if (pfn == -1)
mm/vmscan.c: if (pfn == -1)
tools/testing/selftests/mm/hugepage-vmemmap.c: if (pfn == -1UL) {
Could not -1UL be abstracted as common macro MM_INVALID_PFN to be used in
such scenarios including here ?
>
>> But then I am wondering why this could not be done in the arm64 platform
>> code itself operating on a CMA area reserved just for tag storage. Unless
>> this new allocator has other usage beyond MTE, this could be implemented
>> in the platform itself.
> I had the same idea in the previous iteration, David Hildenbrand suggested
> this approach [1].
>
> [1] https://lore.kernel.org/linux-fsdevel/2aafd53f-af1f-45f3-a08c-d11962254315@redhat.com/
There are two different cma_alloc() proposals here - including the next
patch i.e mm: cma: Fast track allocating memory when the pages are free
1) Augment cma_alloc() or add cma_alloc_range() with start_pfn parameter
2) Speed up cma_alloc() for small allocation requests when pages are free
The second one if separated out from this series could be considered on
its own as it will help all existing cma_alloc() callers. The first one
definitely needs an use case as provided in this series.
Hi,
On Wed, Jan 31, 2024 at 11:54:17AM +0530, Anshuman Khandual wrote:
>
>
> On 1/30/24 17:05, Alexandru Elisei wrote:
> > Hi,
> >
> > On Tue, Jan 30, 2024 at 10:50:00AM +0530, Anshuman Khandual wrote:
> >>
> >> On 1/25/24 22:12, Alexandru Elisei wrote:
> >>> Today, cma_alloc() is used to allocate a contiguous memory region. The
> >>> function allows the caller to specify the number of pages to allocate, but
> >>> not the starting address. cma_alloc() will walk over the entire CMA region
> >>> trying to allocate the first available range of the specified size.
> >>>
> >>> Introduce cma_alloc_range(), which makes CMA more versatile by allowing the
> >>> caller to specify a particular range in the CMA region, defined by the
> >>> start pfn and the size.
> >>>
> >>> arm64 will make use of this function when tag storage management will be
> >>> implemented: cma_alloc_range() will be used to reserve the tag storage
> >>> associated with a tagged page.
> >> Basically, you would like to pass on a preferred start address and the
> >> allocation could just fail if a contig range is not available from such
> >> a starting address ?
> >>
> >> Then why not just change cma_alloc() to take a new argument 'start_pfn'.
> >> Why create a new but almost similar allocator ?
> > I tried doing that, and I gave up because:
> >
> > - It made cma_alloc() even more complex and hard to follow.
> >
> > - What value should 'start_pfn' be to tell cma_alloc() that it should be
> > ignored? Or, to put it another way, what pfn number is invalid on **all**
> > platforms that Linux supports?
> >
> > I can give it another go if we can come up with an invalid value for
> > 'start_pfn'.
>
> Something negative might work. How about -1/-1UL ? A quick search gives
> some instances such as ...
>
> git grep "pfn == -1"
>
> mm/mm_init.c: if (*start_pfn == -1UL)
> mm/vmscan.c: if (pfn == -1)
> mm/vmscan.c: if (pfn == -1)
> mm/vmscan.c: if (pfn == -1)
> tools/testing/selftests/mm/hugepage-vmemmap.c: if (pfn == -1UL) {
>
> Could not -1UL be abstracted as common macro MM_INVALID_PFN to be used in
> such scenarios including here ?
Ah yes, you are right, get_pte_pfn() already uses -1 as an invalid pfn, so
I can just use that.
Will definitely give it a go on the next iteration, thanks for the
suggestion!
>
> >
> >> But then I am wondering why this could not be done in the arm64 platform
> >> code itself operating on a CMA area reserved just for tag storage. Unless
> >> this new allocator has other usage beyond MTE, this could be implemented
> >> in the platform itself.
> > I had the same idea in the previous iteration, David Hildenbrand suggested
> > this approach [1].
> >
> > [1] https://lore.kernel.org/linux-fsdevel/2aafd53f-af1f-45f3-a08c-d11962254315@redhat.com/
>
> There are two different cma_alloc() proposals here - including the next
> patch i.e mm: cma: Fast track allocating memory when the pages are free
>
> 1) Augment cma_alloc() or add cma_alloc_range() with start_pfn parameter
> 2) Speed up cma_alloc() for small allocation requests when pages are free
>
> The second one if separated out from this series could be considered on
> its own as it will help all existing cma_alloc() callers. The first one
> definitely needs an use case as provided in this series.
I understand, thanks for the input!
Alex
© 2016 - 2025 Red Hat, Inc.