From nobody Sat Mar 22 09:01:10 2025 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id AD30B824A1 for ; Fri, 12 Apr 2024 13:19:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712927962; cv=none; b=WAec3QYQa8gbGpCyyLnAWQGtd1OL8kQjLMY9hxLnplnI8rhNWHmPpsfY3xIf7rnq1KyAVlSWXkKgOMWHqCns0L3IVm9SR59cn8S0UYMAY5W5rKp23CeVwLYGou45vQl5in6XqJ6vPOtEr/tkGPKkepAV7/POdhT/sznkulsuG2Q= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712927962; c=relaxed/simple; bh=6GOvSwEX9Td7SESPCI99hrQz+78agXQpQFSBUKolXkU=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=MdC20IN9sP0m1NsAXP7Y9+ZFocB2H853B/IxEnInnZPXf2PTOLx5C7axiizY025bov5T6d1tX7glRtcaGhXnD/4lsquqA1HhJQnNYcL5PIjFYZIosBFR3xfctSbTh5gUXlOKSu5erE/m2uCgG2lLVkzDkPqtruIqNJp/m7jHF9o= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 565FB113E; Fri, 12 Apr 2024 06:19:49 -0700 (PDT) Received: from e125769.cambridge.arm.com (e125769.cambridge.arm.com [10.1.196.27]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id B89813F766; Fri, 12 Apr 2024 06:19:18 -0700 (PDT) From: Ryan Roberts To: Catalin Marinas , Will Deacon , Mark Rutland , Ard Biesheuvel , David Hildenbrand , Donald Dutile , Eric Chanudet Cc: Ryan Roberts , linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, Itaru Kitayama Subject: [PATCH v3 1/3] arm64: mm: Don't remap pgtables per-cont(pte|pmd) block Date: Fri, 12 Apr 2024 14:19:06 +0100 Message-Id: <20240412131908.433043-2-ryan.roberts@arm.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20240412131908.433043-1-ryan.roberts@arm.com> References: <20240412131908.433043-1-ryan.roberts@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" A large part of the kernel boot time is creating the kernel linear map page tables. When rodata=3Dfull, all memory is mapped by pte. And when there is lots of physical ram, there are lots of pte tables to populate. The primary cost associated with this is mapping and unmapping the pte table memory in the fixmap; at unmap time, the TLB entry must be invalidated and this is expensive. Previously, each pmd and pte table was fixmapped/fixunmapped for each cont(pte|pmd) block of mappings (16 entries with 4K granule). This means we ended up issuing 32 TLBIs per (pmd|pte) table during the population phase. Let's fix that, and fixmap/fixunmap each page once per population, for a saving of 31 TLBIs per (pmd|pte) table. This gives a significant boot speedup. Execution time of map_mem(), which creates the kernel linear map page tables, was measured on different machines with different RAM configs: | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra | VM, 16G | VM, 64G | VM, 256G | Metal, 512G Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Tested-by: Eric Chanudet Tested-by: Itaru Kitayama ---------------|-------------|-------------|-------------|------------- | ms (%) | ms (%) | ms (%) | ms (%) ---------------|-------------|-------------|-------------|------------- before | 168 (0%) | 2198 (0%) | 8644 (0%) | 17447 (0%) after | 78 (-53%) | 435 (-80%) | 1723 (-80%) | 3779 (-78%) Signed-off-by: Ryan Roberts Tested-by: Itaru Kitayama Tested-by: Eric Chanudet --- arch/arm64/mm/mmu.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 495b732d5af3..9f1d69b7b494 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -172,12 +172,9 @@ bool pgattr_change_is_safe(u64 old, u64 new) return ((old ^ new) & ~mask) =3D=3D 0; } =20 -static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, +static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot) { - pte_t *ptep; - - ptep =3D pte_set_fixmap_offset(pmdp, addr); do { pte_t old_pte =3D __ptep_get(ptep); =20 @@ -192,8 +189,6 @@ static void init_pte(pmd_t *pmdp, unsigned long addr, u= nsigned long end, =20 phys +=3D PAGE_SIZE; } while (ptep++, addr +=3D PAGE_SIZE, addr !=3D end); - - pte_clear_fixmap(); } =20 static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, @@ -204,6 +199,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned l= ong addr, { unsigned long next; pmd_t pmd =3D READ_ONCE(*pmdp); + pte_t *ptep; =20 BUG_ON(pmd_sect(pmd)); if (pmd_none(pmd)) { @@ -219,6 +215,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned l= ong addr, } BUG_ON(pmd_bad(pmd)); =20 + ptep =3D pte_set_fixmap_offset(pmdp, addr); do { pgprot_t __prot =3D prot; =20 @@ -229,20 +226,21 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned= long addr, (flags & NO_CONT_MAPPINGS) =3D=3D 0) __prot =3D __pgprot(pgprot_val(prot) | PTE_CONT); =20 - init_pte(pmdp, addr, next, phys, __prot); + init_pte(ptep, addr, next, phys, __prot); =20 + ptep +=3D pte_index(next) - pte_index(addr); phys +=3D next - addr; } while (addr =3D next, addr !=3D end); + + pte_clear_fixmap(); } =20 -static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, +static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(int), int flags) { unsigned long next; - pmd_t *pmdp; =20 - pmdp =3D pmd_set_fixmap_offset(pudp, addr); do { pmd_t old_pmd =3D READ_ONCE(*pmdp); =20 @@ -268,8 +266,6 @@ static void init_pmd(pud_t *pudp, unsigned long addr, u= nsigned long end, } phys +=3D next - addr; } while (pmdp++, addr =3D next, addr !=3D end); - - pmd_clear_fixmap(); } =20 static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, @@ -279,6 +275,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned l= ong addr, { unsigned long next; pud_t pud =3D READ_ONCE(*pudp); + pmd_t *pmdp; =20 /* * Check for initial section mappings in the pgd/pud. @@ -297,6 +294,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned l= ong addr, } BUG_ON(pud_bad(pud)); =20 + pmdp =3D pmd_set_fixmap_offset(pudp, addr); do { pgprot_t __prot =3D prot; =20 @@ -307,10 +305,13 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned= long addr, (flags & NO_CONT_MAPPINGS) =3D=3D 0) __prot =3D __pgprot(pgprot_val(prot) | PTE_CONT); =20 - init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags); + init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); =20 + pmdp +=3D pmd_index(next) - pmd_index(addr); phys +=3D next - addr; } while (addr =3D next, addr !=3D end); + + pmd_clear_fixmap(); } =20 static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long = end, --=20 2.25.1 From nobody Sat Mar 22 09:01:10 2025 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 67A7C824A3 for ; Fri, 12 Apr 2024 13:19:22 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712927964; cv=none; b=K3C311ImXdhtEmMphDz1Fa5MeGIHPkMl25H51ww8Wj//4wTJcSjlGfLh2mum3mjYyz7HeLo0SEPAmS37BEGBYcb/xufiOEFaqrpa+DN9xlYdVWXCfBGHOzj2NjkBppSEskacurZrVRL+XPR4EA23I+OG2al+q2m635fR1g8cmDI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712927964; c=relaxed/simple; bh=rTIiiTX/0GDPQFzvBo0C+aUzO0YBV2V1wPty4ExbMiM=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=Zjn1tYl5M8ik9Y+6DEKfY4VlNnK+QPMxXQVNoqgEXCdQ2JWZtosGr7ep6IqGlwaLu2t/kwl/a0AHZjIYnAIF1FmjVt3mlYq9jAJ25a1RbxlqQ8pdIEgIkjFPVi8I4shX5kLr4QXL9z4kxbNCcKehxwuD3/dytJ19VrnB5B5RR/M= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id F2A111474; Fri, 12 Apr 2024 06:19:50 -0700 (PDT) Received: from e125769.cambridge.arm.com (e125769.cambridge.arm.com [10.1.196.27]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 602A43F766; Fri, 12 Apr 2024 06:19:20 -0700 (PDT) From: Ryan Roberts To: Catalin Marinas , Will Deacon , Mark Rutland , Ard Biesheuvel , David Hildenbrand , Donald Dutile , Eric Chanudet Cc: Ryan Roberts , linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, Itaru Kitayama Subject: [PATCH v3 2/3] arm64: mm: Batch dsb and isb when populating pgtables Date: Fri, 12 Apr 2024 14:19:07 +0100 Message-Id: <20240412131908.433043-3-ryan.roberts@arm.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20240412131908.433043-1-ryan.roberts@arm.com> References: <20240412131908.433043-1-ryan.roberts@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" After removing uneccessary TLBIs, the next bottleneck when creating the page tables for the linear map is DSB and ISB, which were previously issued per-pte in __set_pte(). Since we are writing multiple ptes in a given pte table, we can elide these barriers and insert them once we have finished writing to the table. Execution time of map_mem(), which creates the kernel linear map page tables, was measured on different machines with different RAM configs: | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra | VM, 16G | VM, 64G | VM, 256G | Metal, 512G Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Tested-by: Eric Chanudet Tested-by: Itaru Kitayama ---------------|-------------|-------------|-------------|------------- | ms (%) | ms (%) | ms (%) | ms (%) ---------------|-------------|-------------|-------------|------------- before | 78 (0%) | 435 (0%) | 1723 (0%) | 3779 (0%) after | 11 (-86%) | 161 (-63%) | 656 (-62%) | 1654 (-56%) Signed-off-by: Ryan Roberts Tested-by: Itaru Kitayama Tested-by: Eric Chanudet --- arch/arm64/include/asm/pgtable.h | 7 ++++++- arch/arm64/mm/mmu.c | 11 ++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgta= ble.h index afdd56d26ad7..105a95a8845c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -271,9 +271,14 @@ static inline pte_t pte_mkdevmap(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); } =20 -static inline void __set_pte(pte_t *ptep, pte_t pte) +static inline void __set_pte_nosync(pte_t *ptep, pte_t pte) { WRITE_ONCE(*ptep, pte); +} + +static inline void __set_pte(pte_t *ptep, pte_t pte) +{ + __set_pte_nosync(ptep, pte); =20 /* * Only if the new pte is valid and kernel, otherwise TLB maintenance diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9f1d69b7b494..ac88b89770a6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -178,7 +178,11 @@ static void init_pte(pte_t *ptep, unsigned long addr, = unsigned long end, do { pte_t old_pte =3D __ptep_get(ptep); =20 - __set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot)); + /* + * Required barriers to make this visible to the table walker + * are deferred to the end of alloc_init_cont_pte(). + */ + __set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot)); =20 /* * After the PTE entry has been populated once, we @@ -232,6 +236,11 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned = long addr, phys +=3D next - addr; } while (addr =3D next, addr !=3D end); =20 + /* + * Note: barriers and maintenance necessary to clear the fixmap slot + * ensure that all previous pgtable writes are visible to the table + * walker. + */ pte_clear_fixmap(); } =20 --=20 2.25.1 From nobody Sat Mar 22 09:01:10 2025 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 37B1483A1D for ; Fri, 12 Apr 2024 13:19:24 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712927965; cv=none; b=Lswqkqce1qbkshEyVpqsWsM+qvggdcI3TfWsJJsaGpPB9XQTqefLSORu3CAFuVp8VPkZ54u1cM9Wsu1IBdc3DhSiZX8NI1gT9N3rJP4JQ09DxZk77cDQNagkAw24f13N2m1ACCtfbav8YvZgdSS6E/zOmCJ1viUYVLKXGa3+RTY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1712927965; c=relaxed/simple; bh=WrWwUylMw9n+c2YUx9UoDASRMHnmhqUDxagQ8/rue4M=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=cbdRodbma1/xSqn1mWgHIFl1fVaPnsZT3Qj7AlyZn2fbSXAuJzvo/TIHjMKvKpF7/py65T37i2TogJRm5j7sniTpBPnSNDf8AScZReFyP28ubTlmGAo8Ubn7g1J1o9xfLAdb4B+JmkZX+Ex0/5y9N7Uw8V4rOUSs6dBZVfzLeFU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 9A70915A1; Fri, 12 Apr 2024 06:19:52 -0700 (PDT) Received: from e125769.cambridge.arm.com (e125769.cambridge.arm.com [10.1.196.27]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 087523F766; Fri, 12 Apr 2024 06:19:21 -0700 (PDT) From: Ryan Roberts To: Catalin Marinas , Will Deacon , Mark Rutland , Ard Biesheuvel , David Hildenbrand , Donald Dutile , Eric Chanudet Cc: Ryan Roberts , linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, Itaru Kitayama Subject: [PATCH v3 3/3] arm64: mm: Don't remap pgtables for allocate vs populate Date: Fri, 12 Apr 2024 14:19:08 +0100 Message-Id: <20240412131908.433043-4-ryan.roberts@arm.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20240412131908.433043-1-ryan.roberts@arm.com> References: <20240412131908.433043-1-ryan.roberts@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" During linear map pgtable creation, each pgtable is fixmapped / fixunmapped twice; once during allocation to zero the memory, and a again during population to write the entries. This means each table has 2 TLB invalidations issued against it. Let's fix this so that each table is only fixmapped/fixunmapped once, halving the number of TLBIs, and improving performance. Achieve this by separating allocation and initialization (zeroing) of the page. The allocated page is now fixmapped directly by the walker and initialized, before being populated and finally fixunmapped. This approach keeps the change small, but has the side effect that late allocations (using __get_free_page()) must also go through the generic memory clearing routine. So let's tell __get_free_page() not to zero the memory to avoid duplication. Additionally this approach means that fixmap/fixunmap is still used for late pgtable modifications. That's not technically needed since the memory is all mapped in the linear map by that point. That's left as a possible future optimization if found to be needed. Execution time of map_mem(), which creates the kernel linear map page tables, was measured on different machines with different RAM configs: | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra | VM, 16G | VM, 64G | VM, 256G | Metal, 512G Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Suggested-by: Mark Rutland Tested-by: Eric Chanudet Tested-by: Itaru Kitayama ---------------|-------------|-------------|-------------|------------- | ms (%) | ms (%) | ms (%) | ms (%) ---------------|-------------|-------------|-------------|------------- before | 11 (0%) | 161 (0%) | 656 (0%) | 1654 (0%) after | 10 (-11%) | 104 (-35%) | 438 (-33%) | 1223 (-26%) Signed-off-by: Ryan Roberts Suggested-by: Mark Rutland Tested-by: Itaru Kitayama Tested-by: Eric Chanudet --- arch/arm64/include/asm/pgtable.h | 2 + arch/arm64/mm/mmu.c | 67 +++++++++++++++++--------------- 2 files changed, 37 insertions(+), 32 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgta= ble.h index 105a95a8845c..92c9aed5e7af 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1010,6 +1010,8 @@ static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64= addr) =20 static inline bool pgtable_l5_enabled(void) { return false; } =20 +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + /* Match p4d_offset folding in */ #define p4d_set_fixmap(addr) NULL #define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ac88b89770a6..c927e9312f10 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -109,28 +109,12 @@ EXPORT_SYMBOL(phys_mem_access_prot); static phys_addr_t __init early_pgtable_alloc(int shift) { phys_addr_t phys; - void *ptr; =20 phys =3D memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, MEMBLOCK_ALLOC_NOLEAKTRACE); if (!phys) panic("Failed to allocate page table page\n"); =20 - /* - * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE - * slot will be free, so we can (ab)use the FIX_PTE slot to initialise - * any level of table. - */ - ptr =3D pte_set_fixmap(phys); - - memset(ptr, 0, PAGE_SIZE); - - /* - * Implicit barriers also ensure the zeroed page is visible to the page - * table walker - */ - pte_clear_fixmap(); - return phys; } =20 @@ -172,6 +156,14 @@ bool pgattr_change_is_safe(u64 old, u64 new) return ((old ^ new) & ~mask) =3D=3D 0; } =20 +static void init_clear_pgtable(void *table) +{ + clear_page(table); + + /* Ensure the zeroing is observed by page table walks. */ + dsb(ishst); +} + static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot) { @@ -214,12 +206,15 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned= long addr, pmdval |=3D PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); pte_phys =3D pgtable_alloc(PAGE_SHIFT); + ptep =3D pte_set_fixmap(pte_phys); + init_clear_pgtable(ptep); + ptep +=3D pte_index(addr); __pmd_populate(pmdp, pte_phys, pmdval); - pmd =3D READ_ONCE(*pmdp); + } else { + BUG_ON(pmd_bad(pmd)); + ptep =3D pte_set_fixmap_offset(pmdp, addr); } - BUG_ON(pmd_bad(pmd)); =20 - ptep =3D pte_set_fixmap_offset(pmdp, addr); do { pgprot_t __prot =3D prot; =20 @@ -298,12 +293,15 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned= long addr, pudval |=3D PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); pmd_phys =3D pgtable_alloc(PMD_SHIFT); + pmdp =3D pmd_set_fixmap(pmd_phys); + init_clear_pgtable(pmdp); + pmdp +=3D pmd_index(addr); __pud_populate(pudp, pmd_phys, pudval); - pud =3D READ_ONCE(*pudp); + } else { + BUG_ON(pud_bad(pud)); + pmdp =3D pmd_set_fixmap_offset(pudp, addr); } - BUG_ON(pud_bad(pud)); =20 - pmdp =3D pmd_set_fixmap_offset(pudp, addr); do { pgprot_t __prot =3D prot; =20 @@ -340,12 +338,15 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long= addr, unsigned long end, p4dval |=3D P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); pud_phys =3D pgtable_alloc(PUD_SHIFT); + pudp =3D pud_set_fixmap(pud_phys); + init_clear_pgtable(pudp); + pudp +=3D pud_index(addr); __p4d_populate(p4dp, pud_phys, p4dval); - p4d =3D READ_ONCE(*p4dp); + } else { + BUG_ON(p4d_bad(p4d)); + pudp =3D pud_set_fixmap_offset(p4dp, addr); } - BUG_ON(p4d_bad(p4d)); =20 - pudp =3D pud_set_fixmap_offset(p4dp, addr); do { pud_t old_pud =3D READ_ONCE(*pudp); =20 @@ -395,12 +396,15 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long= addr, unsigned long end, pgdval |=3D PGD_TABLE_PXN; BUG_ON(!pgtable_alloc); p4d_phys =3D pgtable_alloc(P4D_SHIFT); + p4dp =3D p4d_set_fixmap(p4d_phys); + init_clear_pgtable(p4dp); + p4dp +=3D p4d_index(addr); __pgd_populate(pgdp, p4d_phys, pgdval); - pgd =3D READ_ONCE(*pgdp); + } else { + BUG_ON(pgd_bad(pgd)); + p4dp =3D p4d_set_fixmap_offset(pgdp, addr); } - BUG_ON(pgd_bad(pgd)); =20 - p4dp =3D p4d_set_fixmap_offset(pgdp, addr); do { p4d_t old_p4d =3D READ_ONCE(*p4dp); =20 @@ -467,11 +471,10 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_= t phys, unsigned long virt, =20 static phys_addr_t __pgd_pgtable_alloc(int shift) { - void *ptr =3D (void *)__get_free_page(GFP_PGTABLE_KERNEL); - BUG_ON(!ptr); + /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */ + void *ptr =3D (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO); =20 - /* Ensure the zeroed page is visible to the page table walker */ - dsb(ishst); + BUG_ON(!ptr); return __pa(ptr); } =20 --=20 2.25.1