mm/cma.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-)
From: gaoxiang17 <gaoxiang17@xiaomi.com>
This makes cma info more intuitive during debugging.
before:
[ 24.407814] cma: cma_alloc(cma (____ptrval____), name: reserved, count 1, align 0)
[ 24.413397] cma: cma_alloc(cma (____ptrval____), name: reserved, count 1, align 0)
[ 24.415886] cma: cma_alloc(cma (____ptrval____), name: reserved, count 1, align 0)
after:
[ 24.069738] cma: cma_alloc(cma (____ptrval____), name: reserved, total pages: 16384, used pages: 64, request pages: 1, align 0)
[ 24.075317] cma: cma_alloc(cma (____ptrval____), name: reserved, total pages: 16384, used pages: 65, request pages: 1, align 0)
[ 24.078455] cma: cma_alloc(cma (____ptrval____), name: reserved, total pages: 16384, used pages: 66, request pages: 1, align 0)
Signed-off-by: gaoxiang17 <gaoxiang17@xiaomi.com>
---
mm/cma.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/mm/cma.c b/mm/cma.c
index 2ffa4befb99a..46cc98e7f587 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -776,6 +776,17 @@ static void cma_debug_show_areas(struct cma *cma)
spin_unlock_irq(&cma->lock);
}
+static unsigned long cma_get_used_pages(struct cma *cma)
+{
+ unsigned long used;
+
+ spin_lock_irq(&cma->lock);
+ used = bitmap_weight(cma->bitmap, (int)cma_bitmap_maxno(cma));
+ spin_unlock_irq(&cma->lock);
+
+ return used << cma->order_per_bit;
+}
+
static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
unsigned long count, unsigned int align,
struct page **pagep, gfp_t gfp)
@@ -858,8 +869,8 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count,
if (!cma || !cma->count)
return page;
- pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__,
- (void *)cma, cma->name, count, align);
+ pr_debug("%s(cma %p, name: %s, total pages: %lu, used pages: %lu, request pages: %lu, align %d)\n",
+ __func__, (void *)cma, cma->name, cma->count, cma_get_used_pages(cma), count, align);
if (!count)
return page;
--
2.34.1
Hi Xiang, kernel test robot noticed the following build errors: [auto build test ERROR on akpm-mm/mm-everything] url: https://github.com/intel-lab-lkp/linux/commits/Xiang-Gao/mm-cma-print-total-and-used-pages-in-cma_alloc/20250816-122940 base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything patch link: https://lore.kernel.org/r/20250816042842.3959315-1-gxxa03070307%40gmail.com patch subject: [PATCH] mm/cma: print total and used pages in cma_alloc() config: arm-randconfig-002-20250816 (https://download.01.org/0day-ci/archive/20250817/202508170014.PK57XSd7-lkp@intel.com/config) compiler: arm-linux-gnueabi-gcc (GCC) 10.5.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250817/202508170014.PK57XSd7-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202508170014.PK57XSd7-lkp@intel.com/ All errors (new ones prefixed by >>): mm/cma.c: In function 'cma_get_used_pages': >> mm/cma.c:784:26: error: 'struct cma' has no member named 'bitmap' 784 | used = bitmap_weight(cma->bitmap, (int)cma_bitmap_maxno(cma)); | ^~ >> mm/cma.c:784:41: error: too few arguments to function 'cma_bitmap_maxno' 784 | used = bitmap_weight(cma->bitmap, (int)cma_bitmap_maxno(cma)); | ^~~~~~~~~~~~~~~~ In file included from mm/cma.c:34: mm/cma.h:77:29: note: declared here 77 | static inline unsigned long cma_bitmap_maxno(struct cma *cma, | ^~~~~~~~~~~~~~~~ vim +784 mm/cma.c 778 779 static unsigned long cma_get_used_pages(struct cma *cma) 780 { 781 unsigned long used; 782 783 spin_lock_irq(&cma->lock); > 784 used = bitmap_weight(cma->bitmap, (int)cma_bitmap_maxno(cma)); 785 spin_unlock_irq(&cma->lock); 786 787 return used << cma->order_per_bit; 788 } 789 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On 16.08.25 06:28, Xiang Gao wrote: > From: gaoxiang17 <gaoxiang17@xiaomi.com> > > This makes cma info more intuitive during debugging. > > before: > [ 24.407814] cma: cma_alloc(cma (____ptrval____), name: reserved, count 1, align 0) > [ 24.413397] cma: cma_alloc(cma (____ptrval____), name: reserved, count 1, align 0) > [ 24.415886] cma: cma_alloc(cma (____ptrval____), name: reserved, count 1, align 0) > > after: > [ 24.069738] cma: cma_alloc(cma (____ptrval____), name: reserved, total pages: 16384, used pages: 64, request pages: 1, align 0) > [ 24.075317] cma: cma_alloc(cma (____ptrval____), name: reserved, total pages: 16384, used pages: 65, request pages: 1, align 0) > [ 24.078455] cma: cma_alloc(cma (____ptrval____), name: reserved, total pages: 16384, used pages: 66, request pages: 1, align 0) > > Signed-off-by: gaoxiang17 <gaoxiang17@xiaomi.com> > --- > mm/cma.c | 15 +++++++++++++-- > 1 file changed, 13 insertions(+), 2 deletions(-) > > diff --git a/mm/cma.c b/mm/cma.c > index 2ffa4befb99a..46cc98e7f587 100644 > --- a/mm/cma.c > +++ b/mm/cma.c > @@ -776,6 +776,17 @@ static void cma_debug_show_areas(struct cma *cma) > spin_unlock_irq(&cma->lock); > } > > +static unsigned long cma_get_used_pages(struct cma *cma) > +{ > + unsigned long used; > + > + spin_lock_irq(&cma->lock); > + used = bitmap_weight(cma->bitmap, (int)cma_bitmap_maxno(cma)); > + spin_unlock_irq(&cma->lock); > + > + return used << cma->order_per_bit; > +} > + > static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, > unsigned long count, unsigned int align, > struct page **pagep, gfp_t gfp) > @@ -858,8 +869,8 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, > if (!cma || !cma->count) > return page; > > - pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__, > - (void *)cma, cma->name, count, align); > + pr_debug("%s(cma %p, name: %s, total pages: %lu, used pages: %lu, request pages: %lu, align %d)\n", > + __func__, (void *)cma, cma->name, cma->count, cma_get_used_pages(cma), count, align); ^ one space missing for proper indentation. But doing another spinlock cycle just for debugging purposes? That does not feel right, sorry. -- Cheers David / dhildenb
What about using tracepoints? Add a trace_cma_alloc() event that only runs when tracing is enabled. No lock unless someone is actively monitoring On 8/16/2025 10:27 AM, David Hildenbrand wrote: > On 16.08.25 06:28, Xiang Gao wrote: >> From: gaoxiang17 <gaoxiang17@xiaomi.com> >> >> This makes cma info more intuitive during debugging. >> >> before: >> [ 24.407814] cma: cma_alloc(cma (____ptrval____), name: reserved, >> count 1, align 0) >> [ 24.413397] cma: cma_alloc(cma (____ptrval____), name: reserved, >> count 1, align 0) >> [ 24.415886] cma: cma_alloc(cma (____ptrval____), name: reserved, >> count 1, align 0) >> >> after: >> [ 24.069738] cma: cma_alloc(cma (____ptrval____), name: reserved, >> total pages: 16384, used pages: 64, request pages: 1, align 0) >> [ 24.075317] cma: cma_alloc(cma (____ptrval____), name: reserved, >> total pages: 16384, used pages: 65, request pages: 1, align 0) >> [ 24.078455] cma: cma_alloc(cma (____ptrval____), name: reserved, >> total pages: 16384, used pages: 66, request pages: 1, align 0) >> >> Signed-off-by: gaoxiang17 <gaoxiang17@xiaomi.com> >> --- >> mm/cma.c | 15 +++++++++++++-- >> 1 file changed, 13 insertions(+), 2 deletions(-) >> >> diff --git a/mm/cma.c b/mm/cma.c >> index 2ffa4befb99a..46cc98e7f587 100644 >> --- a/mm/cma.c >> +++ b/mm/cma.c >> @@ -776,6 +776,17 @@ static void cma_debug_show_areas(struct cma *cma) >> spin_unlock_irq(&cma->lock); >> } >> +static unsigned long cma_get_used_pages(struct cma *cma) >> +{ >> + unsigned long used; >> + >> + spin_lock_irq(&cma->lock); >> + used = bitmap_weight(cma->bitmap, (int)cma_bitmap_maxno(cma)); >> + spin_unlock_irq(&cma->lock); >> + >> + return used << cma->order_per_bit; >> +} >> + >> static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, >> unsigned long count, unsigned int align, >> struct page **pagep, gfp_t gfp) >> @@ -858,8 +869,8 @@ static struct page *__cma_alloc(struct cma *cma, >> unsigned long count, >> if (!cma || !cma->count) >> return page; >> - pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__, >> - (void *)cma, cma->name, count, align); >> + pr_debug("%s(cma %p, name: %s, total pages: %lu, used pages: %lu, >> request pages: %lu, align %d)\n", >> + __func__, (void *)cma, cma->name, cma->count, >> cma_get_used_pages(cma), count, align); > > ^ one space missing for proper indentation. > > But doing another spinlock cycle just for debugging purposes? That does > not feel right, sorry. >
On 16.08.25 08:42, Giorgi Tchankvetadze wrote: > What about using tracepoints? > Add a trace_cma_alloc() event that only runs when tracing is enabled. No > lock unless someone is actively monitoring Tracing in general sounds like a much better approach here than the two pr_debug() in this function. -- Cheers David / dhildenb
On Sat, 16 Aug 2025 08:27:39 +0200 David Hildenbrand <david@redhat.com> wrote: > > @@ -858,8 +869,8 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, > > if (!cma || !cma->count) > > return page; > > > > - pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__, > > - (void *)cma, cma->name, count, align); > > + pr_debug("%s(cma %p, name: %s, total pages: %lu, used pages: %lu, request pages: %lu, align %d)\n", > > + __func__, (void *)cma, cma->name, cma->count, cma_get_used_pages(cma), count, align); > > ^ one space missing for proper indentation. > > But doing another spinlock cycle just for debugging purposes? That does > not feel right, sorry. If we're calling pr_debug() frequently enough for this to matter, we have other problems!
On 16.08.25 08:45, Andrew Morton wrote: > On Sat, 16 Aug 2025 08:27:39 +0200 David Hildenbrand <david@redhat.com> wrote: > >>> @@ -858,8 +869,8 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, >>> if (!cma || !cma->count) >>> return page; >>> >>> - pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__, >>> - (void *)cma, cma->name, count, align); >>> + pr_debug("%s(cma %p, name: %s, total pages: %lu, used pages: %lu, request pages: %lu, align %d)\n", >>> + __func__, (void *)cma, cma->name, cma->count, cma_get_used_pages(cma), count, align); >> >> ^ one space missing for proper indentation. >> >> But doing another spinlock cycle just for debugging purposes? That does >> not feel right, sorry. > > If we're calling pr_debug() frequently enough for this to matter, we > have other problems! We call it for each and every actual CMA allocation? I really don't see why we want to just randomly make CMA allocation latency worse. Is the existing pr_debug() a problem? Maybe. But who actually has debug messages enabled in any sane setup? -- Cheers David / dhildenb
On Sat, 16 Aug 2025 08:56:47 +0200 David Hildenbrand <david@redhat.com> wrote: > On 16.08.25 08:45, Andrew Morton wrote: > > On Sat, 16 Aug 2025 08:27:39 +0200 David Hildenbrand <david@redhat.com> wrote: > > > >>> @@ -858,8 +869,8 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, > >>> if (!cma || !cma->count) > >>> return page; > >>> > >>> - pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__, > >>> - (void *)cma, cma->name, count, align); > >>> + pr_debug("%s(cma %p, name: %s, total pages: %lu, used pages: %lu, request pages: %lu, align %d)\n", > >>> + __func__, (void *)cma, cma->name, cma->count, cma_get_used_pages(cma), count, align); > >> > >> ^ one space missing for proper indentation. > >> > >> But doing another spinlock cycle just for debugging purposes? That does > >> not feel right, sorry. > > > > If we're calling pr_debug() frequently enough for this to matter, we > > have other problems! > > We call it for each and every actual CMA allocation? I really don't see > why we want to just randomly make CMA allocation latency worse. pr_debug() is 12 million times more expensive than a spin_lock()! > Is the existing pr_debug() a problem? Maybe. But who actually has debug > messages enabled in any sane setup? Nobody, clearly. If anyone enabled pr_debug() in here, they'd immediately have to remove those statements to get any work done. Kill it.
On 16.08.25 09:34, Andrew Morton wrote: > On Sat, 16 Aug 2025 08:56:47 +0200 David Hildenbrand <david@redhat.com> wrote: > >> On 16.08.25 08:45, Andrew Morton wrote: >>> On Sat, 16 Aug 2025 08:27:39 +0200 David Hildenbrand <david@redhat.com> wrote: >>> >>>>> @@ -858,8 +869,8 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, >>>>> if (!cma || !cma->count) >>>>> return page; >>>>> >>>>> - pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__, >>>>> - (void *)cma, cma->name, count, align); >>>>> + pr_debug("%s(cma %p, name: %s, total pages: %lu, used pages: %lu, request pages: %lu, align %d)\n", >>>>> + __func__, (void *)cma, cma->name, cma->count, cma_get_used_pages(cma), count, align); >>>> >>>> ^ one space missing for proper indentation. >>>> >>>> But doing another spinlock cycle just for debugging purposes? That does >>>> not feel right, sorry. >>> >>> If we're calling pr_debug() frequently enough for this to matter, we >>> have other problems! >> >> We call it for each and every actual CMA allocation? I really don't see >> why we want to just randomly make CMA allocation latency worse. > > pr_debug() is 12 million times more expensive than a spin_lock()! > >> Is the existing pr_debug() a problem? Maybe. But who actually has debug >> messages enabled in any sane setup? > > Nobody, clearly. If anyone enabled pr_debug() in here, they'd > immediately have to remove those statements to get any work done. Kill > it. I just learned that pr_debug() on a !CONFIG_DEBUG kernel translates to no_printk(), which is just a mostly-empty macro that doesn't really use any of the parameters. I would assume the cma_get_used_pages() would get completely optimized out in that case. So, I don't care, but ... moving to tracing seems much more reasonable. -- Cheers David / dhildenb
© 2016 - 2025 Red Hat, Inc.