From nobody Mon May 25 00:09:00 2026 Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.11]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 611A43B27E3 for ; Wed, 20 May 2026 09:35:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=198.175.65.11 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779269717; cv=none; b=UH+DUGG0aNOm0Wn+d3zN1J+UJFie+JRLWi28IfXTqjgEMjfnwYafMpJJ2jCRbN39ef8HmgPYURPDhOzCeqSavQmz3OGThixjLTjMmuB13N58uLdKiX7EXPE9iHoqSyLzUetlfg5+gfKbcdXXOkGUzWwPD1ObW9j6b8BgTwMKUWY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779269717; c=relaxed/simple; bh=H6pG8TzQarYorWbLMhJac6ahvD81snEqzPj7eqL2S7s=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=p6RJNu3Bo8+o3mAAg9CrgZJLZZyjjGu5/CJdYHJhtHtkAyav7h8ouaMF6UcCLvKJyjdujHNEUYaCE1dHtKuAmW4kaTlTKHYPxlz6NxFQtx4ae0jHX7q1Kaq9QjTO0vpdORwpa3tq30i/7ARXUbO0dXEnx7myz0znk3tc0LHSifo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=intel.com; spf=pass smtp.mailfrom=intel.com; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b=Qztdi6D5; arc=none smtp.client-ip=198.175.65.11 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=intel.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=intel.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b="Qztdi6D5" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1779269716; x=1810805716; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=H6pG8TzQarYorWbLMhJac6ahvD81snEqzPj7eqL2S7s=; b=Qztdi6D5tef/IEJ+dwBY+zn1++smYy12cLubJGwNm+FiJhoor/McMSVw yB89oBbVT70owOg4YbJOeCCW2eZLrVoGcmRblzGDY/e2Z4i6lzdc6udOk ZwtugSc/yp/vqAKno4P/dyUOmATSQjP0NEonTT+Yh5+wLaPUkS7dlBw5z 1YafHYOh6DC1jFk9cXNqQPgeCN+AZndbABbK+oiyR/JP/yEvTCEuiPepp 84IGm79+/q6xVNwuT4qzZ50CeQ8d2IyMBy3gv07DCqldfIpzL1dXkZp1J 0Gc6TCyjdlrq7lCAkdPM72U4vcxp7XabbdMhrP4F+mMdS9feGl2HPGp/O w==; X-CSE-ConnectionGUID: rHDBKwGnSdaqysrGMsQFrg== X-CSE-MsgGUID: SgW3aLi8QNyuifGGgq/oIA== X-IronPort-AV: E=McAfee;i="6800,10657,11791"; a="90461007" X-IronPort-AV: E=Sophos;i="6.23,244,1770624000"; d="scan'208";a="90461007" Received: from fmviesa004.fm.intel.com ([10.60.135.144]) by orvoesa103.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 May 2026 02:35:16 -0700 X-CSE-ConnectionGUID: XgoZLKfWQZmCeiLlpJwhWg== X-CSE-MsgGUID: e9a92kdZRBiXWkJqqNKp7Q== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="6.23,244,1770624000"; d="scan'208";a="241925189" Received: from spr10.sh.intel.com (HELO localhost) ([10.239.23.75]) by fmviesa004.fm.intel.com with ESMTP; 20 May 2026 02:35:13 -0700 From: Yuan Liu To: David Hildenbrand , Oscar Salvador , Mike Rapoport , Wei Yang Cc: linux-mm@kvack.org, Yong Hu , Nanhai Zou , Yuan Liu , Tim Chen , Qiuxu Zhuo , Yu C Chen , Pan Deng , Tianyou Li , Chen Zhang , Jason Zeng , linux-kernel@vger.kernel.org Subject: [PATCH v5 1/5] mm: move mirrored memory overlap checking to the outer loop Date: Wed, 20 May 2026 05:34:53 -0400 Message-ID: <20260520093457.3719960-2-yuan1.liu@intel.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260520093457.3719960-1-yuan1.liu@intel.com> References: <20260520093457.3719960-1-yuan1.liu@intel.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Move the overlap memmap initialization check from memmap_init_range() to memmap_init(), and replace the per-PFN check with a memblock-based check. Reviewed-by: Wei Yang Reviewed-by: Jason Zeng Signed-off-by: Yuan Liu --- mm/mm_init.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index f9f8e1af921c..24e103a402b0 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -783,28 +783,6 @@ void __meminit init_deferred_page(unsigned long pfn, i= nt nid) __init_deferred_page(pfn, nid); } =20 -/* If zone is ZONE_MOVABLE but memory is mirrored, it is an overlapped ini= t */ -static bool __meminit -overlap_memmap_init(unsigned long zone, unsigned long *pfn) -{ - static struct memblock_region *r __meminitdata; - - if (mirrored_kernelcore && zone =3D=3D ZONE_MOVABLE) { - if (!r || *pfn >=3D memblock_region_memory_end_pfn(r)) { - for_each_mem_region(r) { - if (*pfn < memblock_region_memory_end_pfn(r)) - break; - } - } - if (*pfn >=3D memblock_region_memory_base_pfn(r) && - memblock_is_mirror(r)) { - *pfn =3D memblock_region_memory_end_pfn(r); - return true; - } - } - return false; -} - /* * Only struct pages that correspond to ranges defined by memblock.memory * are zeroed and initialized by going through __init_single_page() during @@ -891,8 +869,6 @@ void __meminit memmap_init_range(unsigned long size, in= t nid, unsigned long zone * function. They do not exist on hotplugged memory. */ if (context =3D=3D MEMINIT_EARLY) { - if (overlap_memmap_init(zone, &pfn)) - continue; if (defer_init(nid, pfn, zone_end_pfn)) { deferred_struct_pages =3D true; break; @@ -956,6 +932,7 @@ static void __init memmap_init(void) int i, j, zone_id =3D 0, nid; =20 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { + struct memblock_region *r =3D &memblock.memory.regions[i]; struct pglist_data *node =3D NODE_DATA(nid); =20 for (j =3D 0; j < MAX_NR_ZONES; j++) { @@ -964,6 +941,10 @@ static void __init memmap_init(void) if (!populated_zone(zone)) continue; =20 + if (mirrored_kernelcore && j =3D=3D ZONE_MOVABLE && + memblock_is_mirror(r)) + continue; + memmap_init_zone_range(zone, start_pfn, end_pfn, &hole_pfn); zone_id =3D j; --=20 2.47.3 From nobody Mon May 25 00:09:00 2026 Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.19]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E7A8A3C7E19 for ; Wed, 20 May 2026 09:35:21 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=192.198.163.19 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779269724; cv=none; b=Metuw5PXgAs4M/+Nyr8wLdYLfVQ+rUv6dn54X8EZr95ca3ngW4Rkh5SsX3pXH8UBKgbacqdA6bxCbjJ2/tUwSmw/M401PGOn5deLik8kFheykGTGJLaZMK7xkvEEnT7lLMbMVJgqU7iQMVaFgp7/arwuhGh9YACzq+rwRAuv/zg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779269724; c=relaxed/simple; bh=6ohoYsGOhjBP3A4hNjgXYU3ZasHnjBPl+8YWmnesUfo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=jDMZaqi042dnMNcKHFv6DVkPh2VddgGxXSdxEpXglby1pwpEtunDd18uwdT3MN5K0a4hBNeVjKFhsJtsTgswyV1ETzN2dhcAURU0PbJds+UvRFAuMvS3ULbYqe7JREYHhLi2ZwrC1h4Wf16aGJvMFpvSPUewyt7kbCd8w7/dZ0U= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=intel.com; spf=pass smtp.mailfrom=intel.com; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b=mu+OIuz9; arc=none smtp.client-ip=192.198.163.19 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=intel.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=intel.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b="mu+OIuz9" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1779269722; x=1810805722; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=6ohoYsGOhjBP3A4hNjgXYU3ZasHnjBPl+8YWmnesUfo=; b=mu+OIuz9IT27iqgkiXc8LvFCp3bT5i6vU2qpiMIgzeWDN2O3w82sIKRz Z63I9pvySq8/zpXyMeATZA3eUq38VX0xEuG4vEi1bzw5zCesOE/u3M7eA lJumVcu56bm+Fr7SL3QHRkiip78ghVHz4m+mwNqTAb4LQqDlT6SZDQZev vPFyJBvTETRdr6rIFZnBNn/hA7BRG9N6tucynovMawBn/XH5aE5VBdhGU qb4Nu3kQjFFUVsEkfeoSuUledx07vPz+7rQfKRS9JC44O7jbuRHFs13px bnTyrZzG6pXXhQO85apx0Am/MP1P0fTBmtxspmdmG4ANFOJOzELS2JFYH A==; X-CSE-ConnectionGUID: YtjP8j67QPmLIGEryhdQGw== X-CSE-MsgGUID: M4cQ6O8iTJeq3z8tymoQWA== X-IronPort-AV: E=McAfee;i="6800,10657,11791"; a="79187298" X-IronPort-AV: E=Sophos;i="6.23,244,1770624000"; d="scan'208";a="79187298" Received: from orviesa001.jf.intel.com ([10.64.159.141]) by fmvoesa113.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 May 2026 02:35:21 -0700 X-CSE-ConnectionGUID: vfvG5KIgSrK0SiSF4N0twQ== X-CSE-MsgGUID: JQszQG3USpew1dD8Mt0OkQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="6.23,244,1770624000"; d="scan'208";a="278207294" Received: from spr10.sh.intel.com (HELO localhost) ([10.239.23.75]) by orviesa001.jf.intel.com with ESMTP; 20 May 2026 02:35:18 -0700 From: Yuan Liu To: David Hildenbrand , Oscar Salvador , Mike Rapoport , Wei Yang Cc: linux-mm@kvack.org, Yong Hu , Nanhai Zou , Yuan Liu , Tim Chen , Qiuxu Zhuo , Yu C Chen , Pan Deng , Tianyou Li , Chen Zhang , Jason Zeng , linux-kernel@vger.kernel.org Subject: [PATCH v5 2/5] mm: skip non-mirrored ZONE_NORMAL memory map init when kernelcore=mirror Date: Wed, 20 May 2026 05:34:54 -0400 Message-ID: <20260520093457.3719960-3-yuan1.liu@intel.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260520093457.3719960-1-yuan1.liu@intel.com> References: <20260520093457.3719960-1-yuan1.liu@intel.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Mirrored regions are already skipped when initializing ZONE_MOVABLE, but overlapping PFNs can still be initialized from the ZONE_NORMAL path when ZONE_MOVABLE is present on the node. When zone_movable_pfn[nid] is set, skip ZONE_NORMAL initialization for non-mirrored regions, keep skipping mirrored regions for ZONE_MOVABLE. Reviewed-by: Wei Yang Reviewed-by: Jason Zeng Co-developed-by: Wei Yang Signed-off-by: Wei Yang Signed-off-by: Yuan Liu --- mm/mm_init.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index 24e103a402b0..2a5ac175d5dd 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -941,9 +941,18 @@ static void __init memmap_init(void) if (!populated_zone(zone)) continue; =20 - if (mirrored_kernelcore && j =3D=3D ZONE_MOVABLE && - memblock_is_mirror(r)) - continue; + if (mirrored_kernelcore) { + /* + * Avoid double initialization of PFNs that overlap + * between Normal and Movable zones. + */ + if (j =3D=3D ZONE_NORMAL && !memblock_is_mirror(r) && + zone_movable_pfn[nid]) + continue; + + if (j =3D=3D ZONE_MOVABLE && memblock_is_mirror(r)) + continue; + } =20 memmap_init_zone_range(zone, start_pfn, end_pfn, &hole_pfn); --=20 2.47.3 From nobody Mon May 25 00:09:00 2026 Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.11]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 620D63C9EF3 for ; Wed, 20 May 2026 09:35:26 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=198.175.65.11 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779269727; cv=none; b=ghP+SgRctr+IKqKnWRaRBRKHP1iSD9JFsu5NeT6vZ1ubsW8ZR8SFTk0LmrkwSArXjuk6GOzisQxnaQJYP8h04YjtKrF+B56mQk81KbU4eqh5zt0Rx8FD9pavWOdMgvh7iOAZEUucZl2syY1D5GF8iRPBzDtaUUwPhMYq6eczzQ8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779269727; c=relaxed/simple; bh=MqIJN0Fy32kCmW8Oj4ICCcOYTdwWCmyNlkmwYInBsYw=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Xp5R9Ied/hmeYFK2KAL4GhVd1C/uvlgicLb1A9RZPAjx6+RkDpDoVnlNPgbNVmzwusQS/duPn7zsC8IHrlMIqyTHolWsZyKHCFY7wF8HNs9ww8GhVGsBvsHDajpmcV6YefaJ+WQRdN0ID/ZqmYQtOd3HDYo6WsKCxzBeqAhYCHk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=intel.com; spf=pass smtp.mailfrom=intel.com; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b=n3ft4QHB; arc=none smtp.client-ip=198.175.65.11 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=intel.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=intel.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b="n3ft4QHB" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1779269726; x=1810805726; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=MqIJN0Fy32kCmW8Oj4ICCcOYTdwWCmyNlkmwYInBsYw=; b=n3ft4QHBNppf+YbaMK+h8sewuUSIJRUXzfSE0razyy/ocu/AO5sljWyg w0RL4iN7nzAVhH07x0a71T9988m7wihwk2VaIwOQZPOm31NZ6DGZX482A 8kSMbJcUDn7ngUxBrUQg2hB7RYq+fiTta3UB7sQpxQCWM3xisDfoC/XRW xseQrWNDr7q1bmFVYRjvVzRkIzt+3LBZqOa9lsVsAcgEoeTsNuDLzbgmG ef4K3foN2narDdHU1SkrWUrsSq50WghIQ2/IclYVz2lOMWcFtv2/Qjd/+ KfNN62fNZolwAGcdNrfLenLqHIGFBawqbdiTx3/I0K0N9JFtRzoXpcGgn A==; X-CSE-ConnectionGUID: PT1QMGoiSXOp3VhU0LZdow== X-CSE-MsgGUID: w4kRk2ARTsOZw6OQ1xXTaQ== X-IronPort-AV: E=McAfee;i="6800,10657,11791"; a="90461032" X-IronPort-AV: E=Sophos;i="6.23,244,1770624000"; d="scan'208";a="90461032" Received: from fmviesa004.fm.intel.com ([10.60.135.144]) by orvoesa103.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 May 2026 02:35:26 -0700 X-CSE-ConnectionGUID: 3NbMSwL1QkG0TB/48wOTEQ== X-CSE-MsgGUID: J2pJTsYSS1exCHtEuEvOGA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="6.23,244,1770624000"; d="scan'208";a="241925198" Received: from spr10.sh.intel.com (HELO localhost) ([10.239.23.75]) by fmviesa004.fm.intel.com with ESMTP; 20 May 2026 02:35:22 -0700 From: Yuan Liu To: David Hildenbrand , Oscar Salvador , Mike Rapoport , Wei Yang Cc: linux-mm@kvack.org, Yong Hu , Nanhai Zou , Yuan Liu , Tim Chen , Qiuxu Zhuo , Yu C Chen , Pan Deng , Tianyou Li , Chen Zhang , Jason Zeng , linux-kernel@vger.kernel.org Subject: [PATCH v5 3/5] mm: remove the special early-section handling from pfn_valid() and for_each_valid_pfn() Date: Wed, 20 May 2026 05:34:55 -0400 Message-ID: <20260520093457.3719960-4-yuan1.liu@intel.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260520093457.3719960-1-yuan1.liu@intel.com> References: <20260520093457.3719960-1-yuan1.liu@intel.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Make pfn_valid() return 0 for PFNs that fall into invalid subsections in early sections. Make for_each_valid_pfn() skip PFNs that fall into invalid subsections in early sections. This change is in preparation for optimizing zone contiguity checks based on pages_with_online_memmap. Reviewed-by: Wei Yang Reviewed-by: Jason Zeng Signed-off-by: Yuan Liu --- include/linux/mmzone.h | 13 ++++++------- mm/sparse-vmemmap.c | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9adb2ad21da5..783084f8bbfe 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -2259,6 +2259,10 @@ void sparse_init_early_section(int nid, struct page = *map, unsigned long pnum, * there is actual usable memory at that @pfn. The struct page may * represent a hole or an unusable page frame. * + * Note that this function returns 0 for PFNs that fall into + * invalid subsections as part of early sections, even though there would + * currently be a memmap allocated (that should not be touched). + * * Return: 1 for PFNs that have memory map entries and 0 otherwise */ static inline int pfn_valid(unsigned long pfn) @@ -2283,11 +2287,7 @@ static inline int pfn_valid(unsigned long pfn) rcu_read_unlock_sched(); return 0; } - /* - * Traditionally early sections always returned pfn_valid() for - * the entire section-sized span. - */ - ret =3D early_section(ms) || pfn_section_valid(ms, pfn); + ret =3D pfn_section_valid(ms, pfn); rcu_read_unlock_sched(); =20 return ret; @@ -2303,8 +2303,7 @@ static inline unsigned long first_valid_pfn(unsigned = long pfn, unsigned long end while (nr <=3D __highest_present_section_nr && pfn < end_pfn) { struct mem_section *ms =3D __pfn_to_section(pfn); =20 - if (valid_section(ms) && - (early_section(ms) || pfn_section_first_valid(ms, &pfn))) { + if (valid_section(ms) && pfn_section_first_valid(ms, &pfn)) { rcu_read_unlock_sched(); return pfn; } diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 6eadb9d116e4..c6eefbb6013f 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -771,8 +771,8 @@ static void section_deactivate(unsigned long pfn, unsig= ned long nr_pages, } =20 /* - * The memmap of early sections is always fully populated. See - * section_activate() and pfn_valid() . + * The memmap of early sections is currently always fully populated. See + * section_activate(). */ if (!section_is_early) { memmap_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), PAG= E_SIZE))); --=20 2.47.3 From nobody Mon May 25 00:09:00 2026 Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.11]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7C4553CA4B9 for ; Wed, 20 May 2026 09:35:30 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=198.175.65.11 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779269734; cv=none; b=IX9XRQz3okfrHiacN4epiR85RPKCW0QpoL/tkHk1Qr1Ke1olfQFMzoMuSQwq/lGWj9SpSpx42btAxi5kzGtGGnGiF5zsj23iwG/Qh0VDuJpHd7C0E5b1ovTVhupAyX1Ly47KDenrKO3CZkXGhbalY6UZ77il1bQhh1bLzfYSEzo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779269734; c=relaxed/simple; bh=yiegu5SCV3ey1svPySDj2kdNA8INhudP17CCZrO2NTk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=htwGHhGXtOS2m9Ig8MAgCHoKMpGr/cT7QMLh6AEzmxHtQtBYfuWWOp+y92Ai4uGqHuPn3Dua8vbLRptzCk4NOQKj1Ygji9GIdT5H1GbjdmWjH0mbeF3FPHrOyGhfmU1jJ9EaPxptVDcJaxdKxqa04wpB+EQ866giAwv8HEcZ4nI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=intel.com; spf=pass smtp.mailfrom=intel.com; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b=FiCMUN3r; arc=none smtp.client-ip=198.175.65.11 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=intel.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=intel.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b="FiCMUN3r" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1779269731; x=1810805731; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=yiegu5SCV3ey1svPySDj2kdNA8INhudP17CCZrO2NTk=; b=FiCMUN3rrmBrqYehmN5OP3sBxroaxL60D+lcTl3yubnJfSw9tavOE12q RleKYM/dgGAGes5rkfv4yE8SEc2FvqmenvbEyZl4JJE/HDh1JM9jDbU+H Omw7jndXxvFOdFwn6oJgYInHr22ylQRZfSZvuVlZP+P/azRkWyjmFk96N OC7RoIANkGeJQ4kaj6jTaKGfRjQkr4bTzbcVcNqTnarn/mA6M7+npC97D dg5svgRgnppDj6tb6H64Xf7esMssduxqpDJVQs2WXBOrnXBGgLSGnmGoc VvD1bwkf3YekAcqS/2RPgx2Bzwz8boT1Qhl/ATbDTOweWBZTBuP59xUVu Q==; X-CSE-ConnectionGUID: 2lczr1kJTiSRn8GQesQhYw== X-CSE-MsgGUID: Sg74ZcgxTEKvMk/4GzVDCg== X-IronPort-AV: E=McAfee;i="6800,10657,11791"; a="90461042" X-IronPort-AV: E=Sophos;i="6.23,244,1770624000"; d="scan'208";a="90461042" Received: from fmviesa004.fm.intel.com ([10.60.135.144]) by orvoesa103.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 May 2026 02:35:30 -0700 X-CSE-ConnectionGUID: UKiBz0riS7CuNJRsPTpnXg== X-CSE-MsgGUID: WVYgMd/9RqGITW+w5ybs9Q== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="6.23,244,1770624000"; d="scan'208";a="241925219" Received: from spr10.sh.intel.com (HELO localhost) ([10.239.23.75]) by fmviesa004.fm.intel.com with ESMTP; 20 May 2026 02:35:26 -0700 From: Yuan Liu To: David Hildenbrand , Oscar Salvador , Mike Rapoport , Wei Yang Cc: linux-mm@kvack.org, Yong Hu , Nanhai Zou , Yuan Liu , Tim Chen , Qiuxu Zhuo , Yu C Chen , Pan Deng , Tianyou Li , Chen Zhang , Jason Zeng , linux-kernel@vger.kernel.org Subject: [PATCH v5 4/5] mm/memory_hotplug: optimize zone contiguous check when changing pfn range Date: Wed, 20 May 2026 05:34:56 -0400 Message-ID: <20260520093457.3719960-5-yuan1.liu@intel.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260520093457.3719960-1-yuan1.liu@intel.com> References: <20260520093457.3719960-1-yuan1.liu@intel.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" When move_pfn_range_to_zone() or remove_pfn_range_from_zone() updates a zone, set_zone_contiguous() rescans the entire zone pageblock-by-pageblock to rebuild zone->contiguous. For large zones this is a significant cost during memory hotplug and hot-unplug. Add a new zone member pages_with_online_memmap that tracks the number of pages within the zone span that have an online memory map (including present pages and memory holes whose memory map has been initialized). When spanned_pages =3D=3D pages_with_online_memmap the zone is contiguous a= nd pfn_to_page() can be called on any PFN in the zone span without further pfn_valid() checks. Only pages that fall within the current zone span are accounted towards pages_with_online_memmap. A "too small" value is safe, it merely prevents detecting a contiguous zone. The following test cases of memory hotplug for a VM [1], tested in the environment [2], show that this optimization can significantly reduce the memory hotplug time [3]. +----------------+------+---------------+--------------+----------------+ | | Size | Time (before) | Time (after) | Time Reduction | | +------+---------------+--------------+----------------+ | Plug Memory | 256G | 10s | 3s | 70% | | +------+---------------+--------------+----------------+ | | 512G | 36s | 7s | 81% | +----------------+------+---------------+--------------+----------------+ +----------------+------+---------------+--------------+----------------+ | | Size | Time (before) | Time (after) | Time Reduction | | +------+---------------+--------------+----------------+ | Unplug Memory | 256G | 11s | 4s | 64% | | +------+---------------+--------------+----------------+ | | 512G | 36s | 9s | 75% | +----------------+------+---------------+--------------+----------------+ [1] Qemu commands to hotplug 256G/512G memory for a VM: object_add memory-backend-ram,id=3Dhotmem0,size=3D256G/512G,share=3Don device_add virtio-mem-pci,id=3Dvmem1,memdev=3Dhotmem0,bus=3Dport1 qom-set vmem1 requested-size 256G/512G (Plug Memory) qom-set vmem1 requested-size 0G (Unplug Memory) [2] Hardware : Intel Icelake server Guest Kernel : v7.0-rc4 Qemu : v9.0.0 Launch VM : qemu-system-x86_64 -accel kvm -cpu host \ -drive file=3D./Centos10_cloud.qcow2,format=3Dqcow2,if=3Dvirtio \ -drive file=3D./seed.img,format=3Draw,if=3Dvirtio \ -smp 3,cores=3D3,threads=3D1,sockets=3D1,maxcpus=3D3 \ -m 2G,slots=3D10,maxmem=3D2052472M \ -device pcie-root-port,id=3Dport1,bus=3Dpcie.0,slot=3D1,multifunction= =3Don \ -device pcie-root-port,id=3Dport2,bus=3Dpcie.0,slot=3D2 \ -nographic -machine q35 \ -nic user,hostfwd=3Dtcp::3000-:22 Guest kernel auto-onlines newly added memory blocks: echo online > /sys/devices/system/memory/auto_online_blocks [3] The time from typing the QEMU commands in [1] to when the output of 'grep MemTotal /proc/meminfo' on Guest reflects that all hotplugged memory is recognized. Reported-by: Nanhai Zou Reported-by: Chen Zhang Tested-by: Yuan Liu Reviewed-by: Tim Chen Reviewed-by: Qiuxu Zhuo Reviewed-by: Yu C Chen Reviewed-by: Pan Deng Reviewed-by: Nanhai Zou Reviewed-by: Wei Yang Co-developed-by: Tianyou Li Signed-off-by: Tianyou Li Signed-off-by: Yuan Liu Acked-by: David Hildenbrand (Arm) --- Documentation/mm/physical_memory.rst | 13 ++++++++ drivers/base/memory.c | 6 ++++ include/linux/mmzone.h | 47 ++++++++++++++++++++++++++++ mm/internal.h | 8 +---- mm/memory_hotplug.c | 12 ++----- mm/mm_init.c | 45 +++++++++++--------------- 6 files changed, 87 insertions(+), 44 deletions(-) diff --git a/Documentation/mm/physical_memory.rst b/Documentation/mm/physic= al_memory.rst index b76183545e5b..0aa65e6b5499 100644 --- a/Documentation/mm/physical_memory.rst +++ b/Documentation/mm/physical_memory.rst @@ -483,6 +483,19 @@ General ``present_pages`` should use ``get_online_mems()`` to get a stable value= . It is initialized by ``calculate_node_totalpages()``. =20 +``pages_with_online_memmap`` + Tracks pages within the zone that have an online memory map (present pag= es + and memory holes whose memory map has been initialized). When + ``spanned_pages`` =3D=3D ``pages_with_online_memmap``, ``pfn_to_page()``= can be + performed without further checks on any PFN within the zone span. + + Note: this counter may temporarily undercount when pages with an online + memory map exist outside the current zone span. This can only happen dur= ing + boot, when initializing the memory map of pages that do not fall into any + zone span. Growing the zone to cover such pages and later shrinking it b= ack + may result in a "too small" value. This is safe: it merely prevents + detecting a contiguous zone. + ``present_early_pages`` The present pages existing within the zone located on memory available s= ince early boot, excluding hotplugged memory. Defined only when diff --git a/drivers/base/memory.c b/drivers/base/memory.c index f806a683b767..e029699d89a6 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -246,6 +246,7 @@ static int memory_block_online(struct memory_block *mem) nr_vmemmap_pages =3D mem->altmap->free; =20 mem_hotplug_begin(); + clear_zone_contiguous(zone); if (nr_vmemmap_pages) { ret =3D mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone); if (ret) @@ -270,6 +271,7 @@ static int memory_block_online(struct memory_block *mem) =20 mem->zone =3D zone; out: + set_zone_contiguous(zone); mem_hotplug_done(); return ret; } @@ -282,6 +284,7 @@ static int memory_block_offline(struct memory_block *me= m) unsigned long start_pfn =3D section_nr_to_pfn(mem->start_section_nr); unsigned long nr_pages =3D PAGES_PER_SECTION * sections_per_block; unsigned long nr_vmemmap_pages =3D 0; + struct zone *zone; int ret; =20 if (!mem->zone) @@ -294,7 +297,9 @@ static int memory_block_offline(struct memory_block *me= m) if (mem->altmap) nr_vmemmap_pages =3D mem->altmap->free; =20 + zone =3D mem->zone; mem_hotplug_begin(); + clear_zone_contiguous(zone); if (nr_vmemmap_pages) adjust_present_page_count(pfn_to_page(start_pfn), mem->group, -nr_vmemmap_pages); @@ -314,6 +319,7 @@ static int memory_block_offline(struct memory_block *me= m) =20 mem->zone =3D NULL; out: + set_zone_contiguous(zone); mem_hotplug_done(); return ret; } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 783084f8bbfe..374e73ec1356 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1031,6 +1031,20 @@ struct zone { * cma pages is present pages that are assigned for CMA use * (MIGRATE_CMA). * + * pages_with_online_memmap tracks pages within the zone that have + * an online memory map (present pages and memory holes whose memory + * map has been initialized). When spanned_pages =3D=3D + * pages_with_online_memmap, pfn_to_page() can be performed without + * further checks on any PFN within the zone span. + * + * Note: this counter may temporarily undercount when pages with an + * online memory map exist outside the current zone span. This can + * only happen during boot, when initializing the memory map of + * pages that do not fall into any zone span. Growing the zone to + * cover such pages and later shrinking it back may result in a + * "too small" value. This is safe: it merely prevents detecting a + * contiguous zone. + * * So present_pages may be used by memory hotplug or memory power * management logic to figure out unmanaged pages by checking * (present_pages - managed_pages). And managed_pages should be used @@ -1055,6 +1069,7 @@ struct zone { atomic_long_t managed_pages; unsigned long spanned_pages; unsigned long present_pages; + unsigned long pages_with_online_memmap; #if defined(CONFIG_MEMORY_HOTPLUG) unsigned long present_early_pages; #endif @@ -1692,6 +1707,38 @@ static inline bool zone_is_zone_device(const struct = zone *zone) } #endif =20 +/** + * zone_is_contiguous - test whether a zone is contiguous + * @zone: the zone to test. + * + * In a contiguous zone, it is valid to call pfn_to_page() on any PFN in t= he + * spanned zone without requiring pfn_valid() or pfn_to_online_page() chec= ks. + * + * Note that missing synchronization with memory offlining makes any PFN + * traversal prone to races. + * + * ZONE_DEVICE zones are always marked non-contiguous. + * + * Return: true if contiguous, otherwise false. + */ +static inline bool zone_is_contiguous(const struct zone *zone) +{ + return zone->contiguous; +} + +static inline void set_zone_contiguous(struct zone *zone) +{ + if (zone_is_zone_device(zone)) + return; + if (zone->spanned_pages =3D=3D zone->pages_with_online_memmap) + zone->contiguous =3D true; +} + +static inline void clear_zone_contiguous(struct zone *zone) +{ + zone->contiguous =3D false; +} + /* * Returns true if a zone has pages managed by the buddy allocator. * All the reclaim decisions have to use this function rather than diff --git a/mm/internal.h b/mm/internal.h index 5a2ddcf68e0b..a047c7caef6f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -814,21 +814,15 @@ extern struct page *__pageblock_pfn_to_page(unsigned = long start_pfn, static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn, unsigned long end_pfn, struct zone *zone) { - if (zone->contiguous) + if (zone_is_contiguous(zone)) return pfn_to_page(start_pfn); =20 return __pageblock_pfn_to_page(start_pfn, end_pfn, zone); } =20 -void set_zone_contiguous(struct zone *zone); bool pfn_range_intersects_zones(int nid, unsigned long start_pfn, unsigned long nr_pages); =20 -static inline void clear_zone_contiguous(struct zone *zone) -{ - zone->contiguous =3D false; -} - extern int __isolate_free_page(struct page *page, unsigned int order); extern void __putback_isolated_page(struct page *page, unsigned int order, int mt); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 2a943ec57c85..fbe863441761 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -557,18 +557,13 @@ void remove_pfn_range_from_zone(struct zone *zone, =20 /* * Zone shrinking code cannot properly deal with ZONE_DEVICE. So - * we will not try to shrink the zones - which is okay as - * set_zone_contiguous() cannot deal with ZONE_DEVICE either way. + * we will not try to shrink it. */ if (zone_is_zone_device(zone)) return; =20 - clear_zone_contiguous(zone); - shrink_zone_span(zone, start_pfn, start_pfn + nr_pages); update_pgdat_span(pgdat); - - set_zone_contiguous(zone); } =20 /** @@ -745,8 +740,6 @@ void move_pfn_range_to_zone(struct zone *zone, unsigned= long start_pfn, struct pglist_data *pgdat =3D zone->zone_pgdat; int nid =3D pgdat->node_id; =20 - clear_zone_contiguous(zone); - if (zone_is_empty(zone)) init_currently_empty_zone(zone, start_pfn, nr_pages); resize_zone_range(zone, start_pfn, nr_pages); @@ -774,8 +767,6 @@ void move_pfn_range_to_zone(struct zone *zone, unsigned= long start_pfn, memmap_init_range(nr_pages, nid, zone_idx(zone), start_pfn, 0, MEMINIT_HOTPLUG, altmap, migratetype, isolate_pageblock); - - set_zone_contiguous(zone); } =20 struct auto_movable_stats { @@ -1071,6 +1062,7 @@ void adjust_present_page_count(struct page *page, str= uct memory_group *group, if (early_section(__pfn_to_section(page_to_pfn(page)))) zone->present_early_pages +=3D nr_pages; zone->present_pages +=3D nr_pages; + zone->pages_with_online_memmap +=3D nr_pages; zone->zone_pgdat->node_present_pages +=3D nr_pages; =20 if (group && movable) diff --git a/mm/mm_init.c b/mm/mm_init.c index 2a5ac175d5dd..05c616c857ec 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -806,9 +806,9 @@ void __meminit init_deferred_page(unsigned long pfn, in= t nid) * zone/node above the hole except for the trailing pages in the last * section that will be appended to the zone/node below. */ -static void __init init_unavailable_range(unsigned long spfn, - unsigned long epfn, - int zone, int node) +static unsigned long __init init_unavailable_range(unsigned long spfn, + unsigned long epfn, + int zone, int node) { unsigned long pfn; u64 pgcnt =3D 0; @@ -822,6 +822,7 @@ static void __init init_unavailable_range(unsigned long= spfn, if (pgcnt) pr_info("On node %d, zone %s: %lld pages in unavailable ranges\n", node, zone_names[zone], pgcnt); + return pgcnt; } =20 /* @@ -918,9 +919,21 @@ static void __init memmap_init_zone_range(struct zone = *zone, memmap_init_range(end_pfn - start_pfn, nid, zone_id, start_pfn, zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE, false); + zone->pages_with_online_memmap +=3D end_pfn - start_pfn; =20 - if (*hole_pfn < start_pfn) - init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid); + if (*hole_pfn < start_pfn) { + unsigned long hole_start_pfn =3D *hole_pfn; + unsigned long pgcnt; + + if (hole_start_pfn < zone_start_pfn) { + init_unavailable_range(hole_start_pfn, zone_start_pfn, + zone_id, nid); + hole_start_pfn =3D zone_start_pfn; + } + pgcnt =3D init_unavailable_range(hole_start_pfn, start_pfn, + zone_id, nid); + zone->pages_with_online_memmap +=3D pgcnt; + } =20 *hole_pfn =3D end_pfn; } @@ -2237,28 +2250,6 @@ void __init init_cma_pageblock(struct page *page) } #endif =20 -void set_zone_contiguous(struct zone *zone) -{ - unsigned long block_start_pfn =3D zone->zone_start_pfn; - unsigned long block_end_pfn; - - block_end_pfn =3D pageblock_end_pfn(block_start_pfn); - for (; block_start_pfn < zone_end_pfn(zone); - block_start_pfn =3D block_end_pfn, - block_end_pfn +=3D pageblock_nr_pages) { - - block_end_pfn =3D min(block_end_pfn, zone_end_pfn(zone)); - - if (!__pageblock_pfn_to_page(block_start_pfn, - block_end_pfn, zone)) - return; - cond_resched(); - } - - /* We confirm that there is no hole */ - zone->contiguous =3D true; -} - /* * Check if a PFN range intersects multiple zones on one or more * NUMA nodes. Specify the @nid argument if it is known that this --=20 2.47.3 From nobody Mon May 25 00:09:00 2026 Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.14]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 52D963C8731 for ; Wed, 20 May 2026 09:35:35 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=192.198.163.14 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779269737; cv=none; b=nQs58jcRB5Y7kC1uF0k4eOUa/3NBephf0XfvSZOR8EUw9UkW8Qr+Ch3O5P0dCPbwbmVulE8pJPLRJJKqYCkFSa6L7Pu8Bl+gvZ7pUqyyMJA0/tQRy17NxKsBfD9EOdLwaIMum7B/gLHZCPQjjfcyOna3O5guqHzqBmw8i8+BZ1E= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779269737; c=relaxed/simple; bh=XavP1vUJllc7NA9zCR5LarxqZxkiJyUBCYCui0GExNc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=mUdlxqDd9YeHZj5gYWzhRXVKtjIYYt2W3AJ/5AIDsYax7OSNIAnHLVF/Dj9Y4V7KXg85zUEUe45Z6VJxhQNCdFBIoJsAHgEe/yFhkQdVH1GpEaG27GqfyLfnF24tEJbvb0vi3gfrJj+evQWcDvPXRsv2WdE/jZ8fr/FoSx3MwQg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=intel.com; spf=pass smtp.mailfrom=intel.com; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b=cteyFSB2; arc=none smtp.client-ip=192.198.163.14 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=intel.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=intel.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b="cteyFSB2" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1779269735; x=1810805735; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=XavP1vUJllc7NA9zCR5LarxqZxkiJyUBCYCui0GExNc=; b=cteyFSB2mQzheUdnPvotq97eYX045WCL081VHTxxLL8TSsxx+mdU7weq F3W56Kue8Sfwk8tbSsw6GE6TbqgahO2ItYHZiR8u0qnK2bC9KfSueVNHT 34clyOvCZjaVwS7LEy5J0JhJHTov8YchEccKSDYuN2SJCryletl86zeEX 6VOMwxWWuxfXP7BRbEqFTLMEAAlk9T4tfaJfEcELW9IDJDEBIykh7WL66 eLwJ4DSEg0tSwOwkHuOSI2mbqMh9TMhQUS59/Mnpr0ffRhtqZNPuZDYE3 X91P3cXDRme9RQ98rWknDrlqYFvVn8WWw2oXx8ERHgUTmDTBusfPiM8OU w==; X-CSE-ConnectionGUID: lmkmFEpkSCiTMXeVceGOdA== X-CSE-MsgGUID: CVjqcBzuT4OZusOOSpMFzg== X-IronPort-AV: E=McAfee;i="6800,10657,11791"; a="80196886" X-IronPort-AV: E=Sophos;i="6.23,244,1770624000"; d="scan'208";a="80196886" Received: from fmviesa006.fm.intel.com ([10.60.135.146]) by fmvoesa108.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 May 2026 02:35:35 -0700 X-CSE-ConnectionGUID: Vm3WiHyLQSmXacs/f4mnbQ== X-CSE-MsgGUID: RAgbtuazSsGhzDvr0km2mg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="6.23,244,1770624000"; d="scan'208";a="235672601" Received: from spr10.sh.intel.com (HELO localhost) ([10.239.23.75]) by fmviesa006.fm.intel.com with ESMTP; 20 May 2026 02:35:31 -0700 From: Yuan Liu To: David Hildenbrand , Oscar Salvador , Mike Rapoport , Wei Yang Cc: linux-mm@kvack.org, Yong Hu , Nanhai Zou , Yuan Liu , Tim Chen , Qiuxu Zhuo , Yu C Chen , Pan Deng , Tianyou Li , Chen Zhang , Jason Zeng , linux-kernel@vger.kernel.org Subject: [PATCH v5 5/5] mm/memory_hotplug: improve shrink_zone_span() subsection boundary checks Date: Wed, 20 May 2026 05:34:57 -0400 Message-ID: <20260520093457.3719960-6-yuan1.liu@intel.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260520093457.3719960-1-yuan1.liu@intel.com> References: <20260520093457.3719960-1-yuan1.liu@intel.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" When shrinking a zone span after removing a PFN range, find_smallest_section_pfn() and find_biggest_section_pfn() only checked one edge PFN in each subsection for nid/zone matching. If a memory or hole boundary falls in the middle of a subsection, that edge PFN may belong to a different nid/zone, causing the helpers to miss a valid PFN within that subsection. Fix this by checking both subsection edge PFNs for nid/zone matching. Keep a single pfn_to_online_page() check per subsection, since online state is the same for all PFNs in a subsection. Reviewed-by: Wei Yang Reviewed-by: Jason Zeng Signed-off-by: Yuan Liu --- mm/memory_hotplug.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fbe863441761..20b61f70cd81 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -427,17 +427,24 @@ static unsigned long find_smallest_section_pfn(int ni= d, struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { - for (; start_pfn < end_pfn; start_pfn +=3D PAGES_PER_SUBSECTION) { - if (unlikely(!pfn_to_online_page(start_pfn))) - continue; + unsigned long next_pfn; =20 - if (unlikely(pfn_to_nid(start_pfn) !=3D nid)) - continue; + for (; start_pfn < end_pfn; start_pfn =3D next_pfn) { + unsigned long tail_pfn; =20 - if (zone !=3D page_zone(pfn_to_page(start_pfn))) + next_pfn =3D start_pfn + PAGES_PER_SUBSECTION; + tail_pfn =3D next_pfn - 1; + + if (unlikely(!pfn_to_online_page(start_pfn))) continue; =20 - return start_pfn; + if (likely(pfn_to_nid(start_pfn) =3D=3D nid) && + zone =3D=3D page_zone(pfn_to_page(start_pfn))) + return start_pfn; + + if (likely(pfn_to_nid(tail_pfn) =3D=3D nid) && + zone =3D=3D page_zone(pfn_to_page(tail_pfn))) + return start_pfn; } =20 return 0; @@ -448,21 +455,26 @@ static unsigned long find_biggest_section_pfn(int nid= , struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { - unsigned long pfn; + unsigned long pfn, prev_pfn; =20 /* pfn is the end pfn of a memory section. */ pfn =3D end_pfn - 1; - for (; pfn >=3D start_pfn; pfn -=3D PAGES_PER_SUBSECTION) { - if (unlikely(!pfn_to_online_page(pfn))) - continue; + for (; pfn >=3D start_pfn; pfn =3D prev_pfn) { + unsigned long head_pfn; =20 - if (unlikely(pfn_to_nid(pfn) !=3D nid)) - continue; + prev_pfn =3D pfn - PAGES_PER_SUBSECTION; + head_pfn =3D prev_pfn + 1; =20 - if (zone !=3D page_zone(pfn_to_page(pfn))) + if (unlikely(!pfn_to_online_page(pfn))) continue; =20 - return pfn; + if (likely(pfn_to_nid(pfn) =3D=3D nid) && + zone =3D=3D page_zone(pfn_to_page(pfn))) + return pfn; + + if (likely(pfn_to_nid(head_pfn) =3D=3D nid) && + zone =3D=3D page_zone(pfn_to_page(head_pfn))) + return pfn; } =20 return 0; --=20 2.47.3