[PATCH 1/4] mm/mm_init: use deferred_init_memmap_chunk() in deferred_grow_zone()

Mike Rapoport posted 4 patches 1 month, 2 weeks ago
There is a newer version of this series
[PATCH 1/4] mm/mm_init: use deferred_init_memmap_chunk() in deferred_grow_zone()
Posted by Mike Rapoport 1 month, 2 weeks ago
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>

deferred_grow_zone() initializes one or more sections in the memory map
if buddy runs out of initialized struct pages when
CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled.

It loops through memblock regions and initializes and frees pages in
MAX_ORDER_NR_PAGES chunks.

Essentially the same loop is implemented in deferred_init_memmap_chunk(),
the only actual difference is that deferred_init_memmap_chunk() does not
count initialized pages.

Make deferred_init_memmap_chunk() count the initialized pages and return
their number, wrap it with deferred_init_memmap_job() for multithreaded
initialization with padata_do_multithreaded() and replace open-coded
initialization of struct pages in deferred_grow_zone() with a call to
deferred_init_memmap_chunk().

Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
 mm/mm_init.c | 65 ++++++++++++++++++++++++++--------------------------
 1 file changed, 32 insertions(+), 33 deletions(-)

diff --git a/mm/mm_init.c b/mm/mm_init.c
index 5c21b3af216b..81809b83814b 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2134,12 +2134,12 @@ deferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn,
 	return nr_pages;
 }
 
-static void __init
+static unsigned long __init
 deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
-			   void *arg)
+			   struct zone *zone)
 {
+	unsigned long nr_pages = 0;
 	unsigned long spfn, epfn;
-	struct zone *zone = arg;
 	u64 i = 0;
 
 	deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
@@ -2149,9 +2149,20 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
 	 * we can avoid introducing any issues with the buddy allocator.
 	 */
 	while (spfn < end_pfn) {
-		deferred_init_maxorder(&i, zone, &spfn, &epfn);
+		nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
 		cond_resched();
 	}
+
+	return nr_pages;
+}
+
+static void __init
+deferred_init_memmap_job(unsigned long start_pfn, unsigned long end_pfn,
+			 void *arg)
+{
+	struct zone *zone = arg;
+
+	deferred_init_memmap_chunk(start_pfn, end_pfn, zone);
 }
 
 static unsigned int __init
@@ -2204,7 +2215,7 @@ static int __init deferred_init_memmap(void *data)
 	while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) {
 		first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION);
 		struct padata_mt_job job = {
-			.thread_fn   = deferred_init_memmap_chunk,
+			.thread_fn   = deferred_init_memmap_job,
 			.fn_arg      = zone,
 			.start       = spfn,
 			.size        = first_init_pfn - spfn,
@@ -2240,12 +2251,11 @@ static int __init deferred_init_memmap(void *data)
  */
 bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
 {
-	unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
+	unsigned long nr_pages_needed = SECTION_ALIGN_UP(1 << order);
 	pg_data_t *pgdat = zone->zone_pgdat;
 	unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
 	unsigned long spfn, epfn, flags;
 	unsigned long nr_pages = 0;
-	u64 i = 0;
 
 	/* Only the last zone may have deferred pages */
 	if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
@@ -2262,37 +2272,26 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
 		return true;
 	}
 
-	/* If the zone is empty somebody else may have cleared out the zone */
-	if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
-						 first_deferred_pfn)) {
-		pgdat->first_deferred_pfn = ULONG_MAX;
-		pgdat_resize_unlock(pgdat, &flags);
-		/* Retry only once. */
-		return first_deferred_pfn != ULONG_MAX;
+	/*
+	 * Initialize at least nr_pages_needed in section chunks.
+	 * If a section has less free memory than nr_pages_needed, the next
+	 * section will be also initalized.
+	 * Note, that it still does not guarantee that allocation of order can
+	 * be satisfied if the sections are fragmented because of memblock
+	 * allocations.
+	 */
+	for (spfn = first_deferred_pfn, epfn = SECTION_ALIGN_UP(spfn + 1);
+	     nr_pages < nr_pages_needed && spfn < zone_end_pfn(zone);
+	     spfn = epfn, epfn += PAGES_PER_SECTION) {
+		nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone);
 	}
 
 	/*
-	 * Initialize and free pages in MAX_PAGE_ORDER sized increments so
-	 * that we can avoid introducing any issues with the buddy
-	 * allocator.
+	 * There were no pages to initialize and free which means the zone's
+	 * memory map is completely initialized.
 	 */
-	while (spfn < epfn) {
-		/* update our first deferred PFN for this section */
-		first_deferred_pfn = spfn;
-
-		nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
-		touch_nmi_watchdog();
-
-		/* We should only stop along section boundaries */
-		if ((first_deferred_pfn ^ spfn) < PAGES_PER_SECTION)
-			continue;
-
-		/* If our quota has been met we can stop here */
-		if (nr_pages >= nr_pages_needed)
-			break;
-	}
+	pgdat->first_deferred_pfn = nr_pages ? spfn : ULONG_MAX;
 
-	pgdat->first_deferred_pfn = spfn;
 	pgdat_resize_unlock(pgdat, &flags);
 
 	return nr_pages > 0;
-- 
2.50.1
Re: [PATCH 1/4] mm/mm_init: use deferred_init_memmap_chunk() in deferred_grow_zone()
Posted by Wei Yang 1 month, 2 weeks ago
Hi, Mike

After going through the code again, I have some trivial thoughts to discuss
with you. If not right, please let me know.

On Mon, Aug 18, 2025 at 09:46:12AM +0300, Mike Rapoport wrote:
[...]
> bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
> {
>-	unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
>+	unsigned long nr_pages_needed = SECTION_ALIGN_UP(1 << order);
> 	pg_data_t *pgdat = zone->zone_pgdat;
> 	unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
> 	unsigned long spfn, epfn, flags;
> 	unsigned long nr_pages = 0;
>-	u64 i = 0;
> 
> 	/* Only the last zone may have deferred pages */
> 	if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
>@@ -2262,37 +2272,26 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
> 		return true;
> 	}

In the file above this line, there is a compare between first_deferred_pfn and
its original value after grab pgdat_resize_lock.

I am thinking to compare first_deferred_pfn with ULONG_MAX, as it compared in
deferred_init_memmap(). This indicate this zone has already been initialized
totally.

Current code guard this by spfn < zone_end_pfn(zone). Maybe a check ahead
would be more clear?

> 
>-	/* If the zone is empty somebody else may have cleared out the zone */
>-	if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
>-						 first_deferred_pfn)) {
>-		pgdat->first_deferred_pfn = ULONG_MAX;
>-		pgdat_resize_unlock(pgdat, &flags);
>-		/* Retry only once. */
>-		return first_deferred_pfn != ULONG_MAX;
>+	/*
>+	 * Initialize at least nr_pages_needed in section chunks.
>+	 * If a section has less free memory than nr_pages_needed, the next
>+	 * section will be also initalized.
>+	 * Note, that it still does not guarantee that allocation of order can
>+	 * be satisfied if the sections are fragmented because of memblock
>+	 * allocations.
>+	 */
>+	for (spfn = first_deferred_pfn, epfn = SECTION_ALIGN_UP(spfn + 1);

I am expecting first_deferred_pfn is section aligned. So epfn += PAGES_PER_SECTION
is fine?

Maybe I missed something.

>+	     nr_pages < nr_pages_needed && spfn < zone_end_pfn(zone);
>+	     spfn = epfn, epfn += PAGES_PER_SECTION) {
>+		nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone);
> 	}
> 
> 	/*
>-	 * Initialize and free pages in MAX_PAGE_ORDER sized increments so
>-	 * that we can avoid introducing any issues with the buddy
>-	 * allocator.
>+	 * There were no pages to initialize and free which means the zone's
>+	 * memory map is completely initialized.
> 	 */
>-	while (spfn < epfn) {
>-		/* update our first deferred PFN for this section */
>-		first_deferred_pfn = spfn;
>-
>-		nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
>-		touch_nmi_watchdog();
>-
>-		/* We should only stop along section boundaries */
>-		if ((first_deferred_pfn ^ spfn) < PAGES_PER_SECTION)
>-			continue;
>-
>-		/* If our quota has been met we can stop here */
>-		if (nr_pages >= nr_pages_needed)
>-			break;
>-	}
>+	pgdat->first_deferred_pfn = nr_pages ? spfn : ULONG_MAX;

If we come here because spfn >= zone_end_pfn(zone), first_deferred_pfn is left
a "valid" value and deferred_init_memmap() will try to do its job. But
actually nothing left to initialize.

For this case, I suggest to set it ULONG_MAX too. But this is really corner
case.

> 
>-	pgdat->first_deferred_pfn = spfn;
> 	pgdat_resize_unlock(pgdat, &flags);
> 
> 	return nr_pages > 0;
>-- 
>2.50.1
>

-- 
Wei Yang
Help you, Help me
Re: [PATCH 1/4] mm/mm_init: use deferred_init_memmap_chunk() in deferred_grow_zone()
Posted by Mike Rapoport 1 month, 2 weeks ago
On Tue, Aug 19, 2025 at 09:52:23AM +0000, Wei Yang wrote:
> Hi, Mike
> 
> After going through the code again, I have some trivial thoughts to discuss
> with you. If not right, please let me know.
> 
> On Mon, Aug 18, 2025 at 09:46:12AM +0300, Mike Rapoport wrote:
> [...]
> > bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
> > {
> >-	unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
> >+	unsigned long nr_pages_needed = SECTION_ALIGN_UP(1 << order);
> > 	pg_data_t *pgdat = zone->zone_pgdat;
> > 	unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
> > 	unsigned long spfn, epfn, flags;
> > 	unsigned long nr_pages = 0;
> >-	u64 i = 0;
> > 
> > 	/* Only the last zone may have deferred pages */
> > 	if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
> >@@ -2262,37 +2272,26 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
> > 		return true;
> > 	}
> 
> In the file above this line, there is a compare between first_deferred_pfn and
> its original value after grab pgdat_resize_lock.

Do you mean this one:

	if (first_deferred_pfn != pgdat->first_deferred_pfn) {
		pgdat_resize_unlock(pgdat, &flags);
		return true;
	}
 
> I am thinking to compare first_deferred_pfn with ULONG_MAX, as it compared in
> deferred_init_memmap(). This indicate this zone has already been initialized
> totally.

It may be another CPU ran deferred_grow_zone() and won the race for resize
lock. Then pgdat->first_deferred_pfn will be larger than
first_deferred_pfn, but still not entire zone would be initialized.
 
> Current code guard this by spfn < zone_end_pfn(zone). Maybe a check ahead
> would be more clear?

Not sure I follow you here. The check that we don't pass zone_end_pfn is
inside the loop for every section we initialize.
 
> > 
> >-	/* If the zone is empty somebody else may have cleared out the zone */
> >-	if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
> >-						 first_deferred_pfn)) {
> >-		pgdat->first_deferred_pfn = ULONG_MAX;
> >-		pgdat_resize_unlock(pgdat, &flags);
> >-		/* Retry only once. */
> >-		return first_deferred_pfn != ULONG_MAX;
> >+	/*
> >+	 * Initialize at least nr_pages_needed in section chunks.
> >+	 * If a section has less free memory than nr_pages_needed, the next
> >+	 * section will be also initalized.
> >+	 * Note, that it still does not guarantee that allocation of order can
> >+	 * be satisfied if the sections are fragmented because of memblock
> >+	 * allocations.
> >+	 */
> >+	for (spfn = first_deferred_pfn, epfn = SECTION_ALIGN_UP(spfn + 1);
> 
> I am expecting first_deferred_pfn is section aligned. So epfn += PAGES_PER_SECTION
> is fine?

It should be, but I'd prefer to be on the safe side and keep it this way.
 
> Maybe I missed something.
> 
> >+	     nr_pages < nr_pages_needed && spfn < zone_end_pfn(zone);
> >+	     spfn = epfn, epfn += PAGES_PER_SECTION) {
> >+		nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone);
> > 	}
> > 
> > 	/*
> >-	 * Initialize and free pages in MAX_PAGE_ORDER sized increments so
> >-	 * that we can avoid introducing any issues with the buddy
> >-	 * allocator.
> >+	 * There were no pages to initialize and free which means the zone's
> >+	 * memory map is completely initialized.
> > 	 */
> >-	while (spfn < epfn) {
> >-		/* update our first deferred PFN for this section */
> >-		first_deferred_pfn = spfn;
> >-
> >-		nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
> >-		touch_nmi_watchdog();
> >-
> >-		/* We should only stop along section boundaries */
> >-		if ((first_deferred_pfn ^ spfn) < PAGES_PER_SECTION)
> >-			continue;
> >-
> >-		/* If our quota has been met we can stop here */
> >-		if (nr_pages >= nr_pages_needed)
> >-			break;
> >-	}
> >+	pgdat->first_deferred_pfn = nr_pages ? spfn : ULONG_MAX;
> 
> If we come here because spfn >= zone_end_pfn(zone), first_deferred_pfn is left
> a "valid" value and deferred_init_memmap() will try to do its job. But
> actually nothing left to initialize.

We anyway run a thread for each node with memory. In the very unlikely case
we've completely initialized a deferred zone that thread will finish much
faster :)
 
> For this case, I suggest to set it ULONG_MAX too. But this is really corner
> case.

-- 
Sincerely yours,
Mike.
Re: [PATCH 1/4] mm/mm_init: use deferred_init_memmap_chunk() in deferred_grow_zone()
Posted by Wei Yang 1 month, 2 weeks ago
On Tue, Aug 19, 2025 at 01:54:46PM +0300, Mike Rapoport wrote:
>On Tue, Aug 19, 2025 at 09:52:23AM +0000, Wei Yang wrote:
>> Hi, Mike
>> 
>> After going through the code again, I have some trivial thoughts to discuss
>> with you. If not right, please let me know.
>> 
>> On Mon, Aug 18, 2025 at 09:46:12AM +0300, Mike Rapoport wrote:
>> [...]
>> > bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
>> > {
>> >-	unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
>> >+	unsigned long nr_pages_needed = SECTION_ALIGN_UP(1 << order);
>> > 	pg_data_t *pgdat = zone->zone_pgdat;
>> > 	unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
>> > 	unsigned long spfn, epfn, flags;
>> > 	unsigned long nr_pages = 0;
>> >-	u64 i = 0;
>> > 
>> > 	/* Only the last zone may have deferred pages */
>> > 	if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
>> >@@ -2262,37 +2272,26 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
>> > 		return true;
>> > 	}
>> 
>> In the file above this line, there is a compare between first_deferred_pfn and
>> its original value after grab pgdat_resize_lock.
>
>Do you mean this one:
>
>	if (first_deferred_pfn != pgdat->first_deferred_pfn) {
>		pgdat_resize_unlock(pgdat, &flags);
>		return true;
>	}
> 

Yes.

I am thinking something like this:

 	if (first_deferred_pfn != pgdat->first_deferred_pfn || 
	    first_deferred_pfn == ULONG_MAX)

This means

  * someone else has grow zone before we grab the lock
  * or the whole zone has already been initialized

>> I am thinking to compare first_deferred_pfn with ULONG_MAX, as it compared in
>> deferred_init_memmap(). This indicate this zone has already been initialized
>> totally.
>
>It may be another CPU ran deferred_grow_zone() and won the race for resize
>lock. Then pgdat->first_deferred_pfn will be larger than
>first_deferred_pfn, but still not entire zone would be initialized.
> 
>> Current code guard this by spfn < zone_end_pfn(zone). Maybe a check ahead
>> would be more clear?
>
>Not sure I follow you here. The check that we don't pass zone_end_pfn is
>inside the loop for every section we initialize.
> 

In case the zone has been initialized totally, first_deferred_pfn = ULONG_MAX.

Then we come to the loop with initial state:

    spfn = ULONG_MAX
    epfn = 0 (which is wrap around)

And loop condition check (spfn < zone_end_pfn(zone)) is false, so the loop is
skipped. This is how we handle a fully initialized zone now.

Would this be a little un-common?

>> > 
>> >-	/* If the zone is empty somebody else may have cleared out the zone */
>> >-	if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
>> >-						 first_deferred_pfn)) {
>> >-		pgdat->first_deferred_pfn = ULONG_MAX;
>> >-		pgdat_resize_unlock(pgdat, &flags);
>> >-		/* Retry only once. */
>> >-		return first_deferred_pfn != ULONG_MAX;
>> >+	/*
>> >+	 * Initialize at least nr_pages_needed in section chunks.
>> >+	 * If a section has less free memory than nr_pages_needed, the next
>> >+	 * section will be also initalized.

Nit, one typo here. s/initalized/initialized/

>> >+	 * Note, that it still does not guarantee that allocation of order can
>> >+	 * be satisfied if the sections are fragmented because of memblock
>> >+	 * allocations.
>> >+	 */
>> >+	for (spfn = first_deferred_pfn, epfn = SECTION_ALIGN_UP(spfn + 1);

-- 
Wei Yang
Help you, Help me
Re: [PATCH 1/4] mm/mm_init: use deferred_init_memmap_chunk() in deferred_grow_zone()
Posted by Mike Rapoport 1 month, 2 weeks ago
On Tue, Aug 19, 2025 at 11:51:58PM +0000, Wei Yang wrote:
> On Tue, Aug 19, 2025 at 01:54:46PM +0300, Mike Rapoport wrote:
> >On Tue, Aug 19, 2025 at 09:52:23AM +0000, Wei Yang wrote:
> >> Hi, Mike
> >> 
> >> After going through the code again, I have some trivial thoughts to discuss
> >> with you. If not right, please let me know.
> >> 
> >> On Mon, Aug 18, 2025 at 09:46:12AM +0300, Mike Rapoport wrote:
> >> 
> >> In the file above this line, there is a compare between first_deferred_pfn and
> >> its original value after grab pgdat_resize_lock.
> >
> >Do you mean this one:
> >
> >	if (first_deferred_pfn != pgdat->first_deferred_pfn) {
> >		pgdat_resize_unlock(pgdat, &flags);
> >		return true;
> >	}
> > 
> 
> Yes.
> 
> I am thinking something like this:
> 
>  	if (first_deferred_pfn != pgdat->first_deferred_pfn || 
> 	    first_deferred_pfn == ULONG_MAX)
> 
> This means
> 
>   * someone else has grow zone before we grab the lock
>   * or the whole zone has already been initialized

deferred_grow_zone() can be called only before deferred_init_memmap(), so
it's very unlikely that a zone will be completely initialized here. We
start with at least one section with each deferred zone and every call to
deferred_grow_zone() adds a section.

And even if that was a case and first_deferred_pfn is ULONG_MAX, the loop
below will end immediately, so I don't think additional condition here
would be helpful.
 
> >> I am thinking to compare first_deferred_pfn with ULONG_MAX, as it compared in
> >> deferred_init_memmap(). This indicate this zone has already been initialized
> >> totally.
> >
> >It may be another CPU ran deferred_grow_zone() and won the race for resize
> >lock. Then pgdat->first_deferred_pfn will be larger than
> >first_deferred_pfn, but still not entire zone would be initialized.
> > 
> >> Current code guard this by spfn < zone_end_pfn(zone). Maybe a check ahead
> >> would be more clear?
> >
> >Not sure I follow you here. The check that we don't pass zone_end_pfn is
> >inside the loop for every section we initialize.
> > 
> 
> In case the zone has been initialized totally, first_deferred_pfn = ULONG_MAX.
> 
> Then we come to the loop with initial state:
> 
>     spfn = ULONG_MAX
>     epfn = 0 (which is wrap around)
> 
> And loop condition check (spfn < zone_end_pfn(zone)) is false, so the loop is
> skipped. This is how we handle a fully initialized zone now.
> 
> Would this be a little un-common?

Why? The important thing is (spfn < zone_end_pfn(zone)) is false, and I
think that's good enough.
 
> >> > 
> >> >-	/* If the zone is empty somebody else may have cleared out the zone */
> >> >-	if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
> >> >-						 first_deferred_pfn)) {
> >> >-		pgdat->first_deferred_pfn = ULONG_MAX;
> >> >-		pgdat_resize_unlock(pgdat, &flags);
> >> >-		/* Retry only once. */
> >> >-		return first_deferred_pfn != ULONG_MAX;
> >> >+	/*
> >> >+	 * Initialize at least nr_pages_needed in section chunks.
> >> >+	 * If a section has less free memory than nr_pages_needed, the next
> >> >+	 * section will be also initalized.
> 
> Nit, one typo here. s/initalized/initialized/

Thanks, will fix.
 
-- 
Sincerely yours,
Mike.
Re: [PATCH 1/4] mm/mm_init: use deferred_init_memmap_chunk() in deferred_grow_zone()
Posted by Wei Yang 1 month, 2 weeks ago
On Wed, Aug 20, 2025 at 12:20:10PM +0300, Mike Rapoport wrote:
>On Tue, Aug 19, 2025 at 11:51:58PM +0000, Wei Yang wrote:
>> On Tue, Aug 19, 2025 at 01:54:46PM +0300, Mike Rapoport wrote:
>> >On Tue, Aug 19, 2025 at 09:52:23AM +0000, Wei Yang wrote:
>> >> Hi, Mike
>> >> 
>> >> After going through the code again, I have some trivial thoughts to discuss
>> >> with you. If not right, please let me know.
>> >> 
>> >> On Mon, Aug 18, 2025 at 09:46:12AM +0300, Mike Rapoport wrote:
>> >> 
>> >> In the file above this line, there is a compare between first_deferred_pfn and
>> >> its original value after grab pgdat_resize_lock.
>> >
>> >Do you mean this one:
>> >
>> >	if (first_deferred_pfn != pgdat->first_deferred_pfn) {
>> >		pgdat_resize_unlock(pgdat, &flags);
>> >		return true;
>> >	}
>> > 
>> 
>> Yes.
>> 
>> I am thinking something like this:
>> 
>>  	if (first_deferred_pfn != pgdat->first_deferred_pfn || 
>> 	    first_deferred_pfn == ULONG_MAX)
>> 
>> This means
>> 
>>   * someone else has grow zone before we grab the lock
>>   * or the whole zone has already been initialized
>
>deferred_grow_zone() can be called only before deferred_init_memmap(), so
>it's very unlikely that a zone will be completely initialized here. We
>start with at least one section with each deferred zone and every call to
>deferred_grow_zone() adds a section.
>
>And even if that was a case and first_deferred_pfn is ULONG_MAX, the loop
>below will end immediately, so I don't think additional condition here
>would be helpful.
> 

I think you are right.

>> >> I am thinking to compare first_deferred_pfn with ULONG_MAX, as it compared in
>> >> deferred_init_memmap(). This indicate this zone has already been initialized
>> >> totally.
>> >
>> >It may be another CPU ran deferred_grow_zone() and won the race for resize
>> >lock. Then pgdat->first_deferred_pfn will be larger than
>> >first_deferred_pfn, but still not entire zone would be initialized.
>> > 
>> >> Current code guard this by spfn < zone_end_pfn(zone). Maybe a check ahead
>> >> would be more clear?
>> >
>> >Not sure I follow you here. The check that we don't pass zone_end_pfn is
>> >inside the loop for every section we initialize.
>> > 
>> 
>> In case the zone has been initialized totally, first_deferred_pfn = ULONG_MAX.
>> 
>> Then we come to the loop with initial state:
>> 
>>     spfn = ULONG_MAX
>>     epfn = 0 (which is wrap around)
>> 
>> And loop condition check (spfn < zone_end_pfn(zone)) is false, so the loop is
>> skipped. This is how we handle a fully initialized zone now.
>> 
>> Would this be a little un-common?
>
>Why? The important thing is (spfn < zone_end_pfn(zone)) is false, and I
>think that's good enough.
> 

Well, no more else.

>> >> > 
>> >> >-	/* If the zone is empty somebody else may have cleared out the zone */
>> >> >-	if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
>> >> >-						 first_deferred_pfn)) {
>> >> >-		pgdat->first_deferred_pfn = ULONG_MAX;
>> >> >-		pgdat_resize_unlock(pgdat, &flags);
>> >> >-		/* Retry only once. */
>> >> >-		return first_deferred_pfn != ULONG_MAX;
>> >> >+	/*
>> >> >+	 * Initialize at least nr_pages_needed in section chunks.
>> >> >+	 * If a section has less free memory than nr_pages_needed, the next
>> >> >+	 * section will be also initalized.
>> 
>> Nit, one typo here. s/initalized/initialized/
>
>Thanks, will fix.
> 
>-- 
>Sincerely yours,
>Mike.

-- 
Wei Yang
Help you, Help me
Re: [PATCH 1/4] mm/mm_init: use deferred_init_memmap_chunk() in deferred_grow_zone()
Posted by David Hildenbrand 1 month, 2 weeks ago
On 18.08.25 08:46, Mike Rapoport wrote:
> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
> 
> deferred_grow_zone() initializes one or more sections in the memory map
> if buddy runs out of initialized struct pages when
> CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled.
> 
> It loops through memblock regions and initializes and frees pages in
> MAX_ORDER_NR_PAGES chunks.
> 
> Essentially the same loop is implemented in deferred_init_memmap_chunk(),
> the only actual difference is that deferred_init_memmap_chunk() does not
> count initialized pages.
> 
> Make deferred_init_memmap_chunk() count the initialized pages and return
> their number, wrap it with deferred_init_memmap_job() for multithreaded
> initialization with padata_do_multithreaded() and replace open-coded
> initialization of struct pages in deferred_grow_zone() with a call to
> deferred_init_memmap_chunk().
> 
> Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>

Reviewed-by: David Hildenbrand <david@redhat.com>

-- 
Cheers

David / dhildenb