When splitting kernel leaf mappings, either via
split_kernel_leaf_mapping_locked() or linear_map_split_to_ptes(),
previously a leaf mapping was always split to the next size down. e.g.
pud -> contpmd -> pmd -> contpte -> pte. But for
linear_map_split_to_ptes() we can avoid the contpmd and contpte states
because we know we want to split all the way down to ptes.
This avoids visiting all the ptes in a table if it was created by
splitting a pmd, which is noticible on systems with a lot of memory.
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
---
arch/arm64/mm/mmu.c | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 6bd0b065bd97..8e45cd08bf3a 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -550,7 +550,7 @@ static void split_contpte(pte_t *ptep)
__set_pte(ptep, pte_mknoncont(__ptep_get(ptep)));
}
-static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp)
+static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
{
pmdval_t tableprot = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF;
unsigned long pfn = pmd_pfn(pmd);
@@ -568,7 +568,9 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp)
tableprot |= PMD_TABLE_PXN;
prot = __pgprot((pgprot_val(prot) & ~PTE_TYPE_MASK) | PTE_TYPE_PAGE);
- prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+ prot = __pgprot(pgprot_val(prot) & ~PTE_CONT);
+ if (to_cont)
+ prot = __pgprot(pgprot_val(prot) | PTE_CONT);
for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++)
__set_pte(ptep, pfn_pte(pfn, prot));
@@ -592,7 +594,7 @@ static void split_contpmd(pmd_t *pmdp)
set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp)));
}
-static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp)
+static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont)
{
pudval_t tableprot = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF;
unsigned int step = PMD_SIZE >> PAGE_SHIFT;
@@ -611,7 +613,9 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp)
tableprot |= PUD_TABLE_PXN;
prot = __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT);
- prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+ prot = __pgprot(pgprot_val(prot) & ~PTE_CONT);
+ if (to_cont)
+ prot = __pgprot(pgprot_val(prot) | PTE_CONT);
for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step)
set_pmd(pmdp, pfn_pmd(pfn, prot));
@@ -669,7 +673,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr)
if (!pud_present(pud))
goto out;
if (pud_leaf(pud)) {
- ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL);
+ ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL, true);
if (ret)
goto out;
}
@@ -694,7 +698,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr)
*/
if (ALIGN_DOWN(addr, PMD_SIZE) == addr)
goto out;
- ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL);
+ ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL, true);
if (ret)
goto out;
}
@@ -771,7 +775,7 @@ static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr,
int ret = 0;
if (pud_leaf(pud))
- ret = split_pud(pudp, pud, GFP_ATOMIC);
+ ret = split_pud(pudp, pud, GFP_ATOMIC, false);
return ret;
}
@@ -786,7 +790,13 @@ static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
if (pmd_leaf(pmd)) {
if (pmd_cont(pmd))
split_contpmd(pmdp);
- ret = split_pmd(pmdp, pmd, GFP_ATOMIC);
+ ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false);
+
+ /*
+ * We have split the pmd directly to ptes so there is no need to
+ * visit each pte to check if they are contpte.
+ */
+ walk->action = ACTION_CONTINUE;
}
return ret;
--
2.43.0
On Fri, Aug 29, 2025 at 12:52:47PM +0100, Ryan Roberts wrote: > When splitting kernel leaf mappings, either via > split_kernel_leaf_mapping_locked() or linear_map_split_to_ptes(), > previously a leaf mapping was always split to the next size down. e.g. > pud -> contpmd -> pmd -> contpte -> pte. But for > linear_map_split_to_ptes() we can avoid the contpmd and contpte states > because we know we want to split all the way down to ptes. > > This avoids visiting all the ptes in a table if it was created by > splitting a pmd, which is noticible on systems with a lot of memory. > > Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
On 8/29/25 4:52 AM, Ryan Roberts wrote: > When splitting kernel leaf mappings, either via > split_kernel_leaf_mapping_locked() or linear_map_split_to_ptes(), > previously a leaf mapping was always split to the next size down. e.g. > pud -> contpmd -> pmd -> contpte -> pte. But for > linear_map_split_to_ptes() we can avoid the contpmd and contpte states > because we know we want to split all the way down to ptes. > > This avoids visiting all the ptes in a table if it was created by > splitting a pmd, which is noticible on systems with a lot of memory. Similar to patch #4, this patch should be squashed into patch #5 IMHO. Thanks, Yang > > Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> > --- > arch/arm64/mm/mmu.c | 26 ++++++++++++++++++-------- > 1 file changed, 18 insertions(+), 8 deletions(-) > > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index 6bd0b065bd97..8e45cd08bf3a 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -550,7 +550,7 @@ static void split_contpte(pte_t *ptep) > __set_pte(ptep, pte_mknoncont(__ptep_get(ptep))); > } > > -static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp) > +static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont) > { > pmdval_t tableprot = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF; > unsigned long pfn = pmd_pfn(pmd); > @@ -568,7 +568,9 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp) > tableprot |= PMD_TABLE_PXN; > > prot = __pgprot((pgprot_val(prot) & ~PTE_TYPE_MASK) | PTE_TYPE_PAGE); > - prot = __pgprot(pgprot_val(prot) | PTE_CONT); > + prot = __pgprot(pgprot_val(prot) & ~PTE_CONT); > + if (to_cont) > + prot = __pgprot(pgprot_val(prot) | PTE_CONT); > > for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++) > __set_pte(ptep, pfn_pte(pfn, prot)); > @@ -592,7 +594,7 @@ static void split_contpmd(pmd_t *pmdp) > set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp))); > } > > -static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp) > +static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont) > { > pudval_t tableprot = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF; > unsigned int step = PMD_SIZE >> PAGE_SHIFT; > @@ -611,7 +613,9 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp) > tableprot |= PUD_TABLE_PXN; > > prot = __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT); > - prot = __pgprot(pgprot_val(prot) | PTE_CONT); > + prot = __pgprot(pgprot_val(prot) & ~PTE_CONT); > + if (to_cont) > + prot = __pgprot(pgprot_val(prot) | PTE_CONT); > > for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step) > set_pmd(pmdp, pfn_pmd(pfn, prot)); > @@ -669,7 +673,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr) > if (!pud_present(pud)) > goto out; > if (pud_leaf(pud)) { > - ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL); > + ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL, true); > if (ret) > goto out; > } > @@ -694,7 +698,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr) > */ > if (ALIGN_DOWN(addr, PMD_SIZE) == addr) > goto out; > - ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL); > + ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL, true); > if (ret) > goto out; > } > @@ -771,7 +775,7 @@ static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, > int ret = 0; > > if (pud_leaf(pud)) > - ret = split_pud(pudp, pud, GFP_ATOMIC); > + ret = split_pud(pudp, pud, GFP_ATOMIC, false); > > return ret; > } > @@ -786,7 +790,13 @@ static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, > if (pmd_leaf(pmd)) { > if (pmd_cont(pmd)) > split_contpmd(pmdp); > - ret = split_pmd(pmdp, pmd, GFP_ATOMIC); > + ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false); > + > + /* > + * We have split the pmd directly to ptes so there is no need to > + * visit each pte to check if they are contpte. > + */ > + walk->action = ACTION_CONTINUE; > } > > return ret;
On 29/08/2025 23:27, Yang Shi wrote: > > > On 8/29/25 4:52 AM, Ryan Roberts wrote: >> When splitting kernel leaf mappings, either via >> split_kernel_leaf_mapping_locked() or linear_map_split_to_ptes(), >> previously a leaf mapping was always split to the next size down. e.g. >> pud -> contpmd -> pmd -> contpte -> pte. But for >> linear_map_split_to_ptes() we can avoid the contpmd and contpte states >> because we know we want to split all the way down to ptes. >> >> This avoids visiting all the ptes in a table if it was created by >> splitting a pmd, which is noticible on systems with a lot of memory. > > Similar to patch #4, this patch should be squashed into patch #5 IMHO. That's fine by me. I was just trying to make the review easier by splitting non-essential stuff out. Let's squash for next version. > > Thanks, > Yang > >> >> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> >> --- >> arch/arm64/mm/mmu.c | 26 ++++++++++++++++++-------- >> 1 file changed, 18 insertions(+), 8 deletions(-) >> >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >> index 6bd0b065bd97..8e45cd08bf3a 100644 >> --- a/arch/arm64/mm/mmu.c >> +++ b/arch/arm64/mm/mmu.c >> @@ -550,7 +550,7 @@ static void split_contpte(pte_t *ptep) >> __set_pte(ptep, pte_mknoncont(__ptep_get(ptep))); >> } >> -static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp) >> +static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont) >> { >> pmdval_t tableprot = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF; >> unsigned long pfn = pmd_pfn(pmd); >> @@ -568,7 +568,9 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp) >> tableprot |= PMD_TABLE_PXN; >> prot = __pgprot((pgprot_val(prot) & ~PTE_TYPE_MASK) | PTE_TYPE_PAGE); >> - prot = __pgprot(pgprot_val(prot) | PTE_CONT); >> + prot = __pgprot(pgprot_val(prot) & ~PTE_CONT); >> + if (to_cont) >> + prot = __pgprot(pgprot_val(prot) | PTE_CONT); >> for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++) >> __set_pte(ptep, pfn_pte(pfn, prot)); >> @@ -592,7 +594,7 @@ static void split_contpmd(pmd_t *pmdp) >> set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp))); >> } >> -static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp) >> +static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont) >> { >> pudval_t tableprot = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF; >> unsigned int step = PMD_SIZE >> PAGE_SHIFT; >> @@ -611,7 +613,9 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp) >> tableprot |= PUD_TABLE_PXN; >> prot = __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT); >> - prot = __pgprot(pgprot_val(prot) | PTE_CONT); >> + prot = __pgprot(pgprot_val(prot) & ~PTE_CONT); >> + if (to_cont) >> + prot = __pgprot(pgprot_val(prot) | PTE_CONT); >> for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step) >> set_pmd(pmdp, pfn_pmd(pfn, prot)); >> @@ -669,7 +673,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long >> addr) >> if (!pud_present(pud)) >> goto out; >> if (pud_leaf(pud)) { >> - ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL); >> + ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL, true); >> if (ret) >> goto out; >> } >> @@ -694,7 +698,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long >> addr) >> */ >> if (ALIGN_DOWN(addr, PMD_SIZE) == addr) >> goto out; >> - ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL); >> + ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL, true); >> if (ret) >> goto out; >> } >> @@ -771,7 +775,7 @@ static int __init split_to_ptes_pud_entry(pud_t *pudp, >> unsigned long addr, >> int ret = 0; >> if (pud_leaf(pud)) >> - ret = split_pud(pudp, pud, GFP_ATOMIC); >> + ret = split_pud(pudp, pud, GFP_ATOMIC, false); >> return ret; >> } >> @@ -786,7 +790,13 @@ static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, >> unsigned long addr, >> if (pmd_leaf(pmd)) { >> if (pmd_cont(pmd)) >> split_contpmd(pmdp); >> - ret = split_pmd(pmdp, pmd, GFP_ATOMIC); >> + ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false); >> + >> + /* >> + * We have split the pmd directly to ptes so there is no need to >> + * visit each pte to check if they are contpte. >> + */ >> + walk->action = ACTION_CONTINUE; >> } >> return ret; >
On 9/4/25 4:10 AM, Ryan Roberts wrote: > On 29/08/2025 23:27, Yang Shi wrote: >> >> On 8/29/25 4:52 AM, Ryan Roberts wrote: >>> When splitting kernel leaf mappings, either via >>> split_kernel_leaf_mapping_locked() or linear_map_split_to_ptes(), >>> previously a leaf mapping was always split to the next size down. e.g. >>> pud -> contpmd -> pmd -> contpte -> pte. But for >>> linear_map_split_to_ptes() we can avoid the contpmd and contpte states >>> because we know we want to split all the way down to ptes. >>> >>> This avoids visiting all the ptes in a table if it was created by >>> splitting a pmd, which is noticible on systems with a lot of memory. >> Similar to patch #4, this patch should be squashed into patch #5 IMHO. > That's fine by me. I was just trying to make the review easier by splitting > non-essential stuff out. Let's squash for next version. Understood. Thanks, Yang > >> Thanks, >> Yang >> >>> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> >>> --- >>> arch/arm64/mm/mmu.c | 26 ++++++++++++++++++-------- >>> 1 file changed, 18 insertions(+), 8 deletions(-) >>> >>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >>> index 6bd0b065bd97..8e45cd08bf3a 100644 >>> --- a/arch/arm64/mm/mmu.c >>> +++ b/arch/arm64/mm/mmu.c >>> @@ -550,7 +550,7 @@ static void split_contpte(pte_t *ptep) >>> __set_pte(ptep, pte_mknoncont(__ptep_get(ptep))); >>> } >>> -static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp) >>> +static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont) >>> { >>> pmdval_t tableprot = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF; >>> unsigned long pfn = pmd_pfn(pmd); >>> @@ -568,7 +568,9 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp) >>> tableprot |= PMD_TABLE_PXN; >>> prot = __pgprot((pgprot_val(prot) & ~PTE_TYPE_MASK) | PTE_TYPE_PAGE); >>> - prot = __pgprot(pgprot_val(prot) | PTE_CONT); >>> + prot = __pgprot(pgprot_val(prot) & ~PTE_CONT); >>> + if (to_cont) >>> + prot = __pgprot(pgprot_val(prot) | PTE_CONT); >>> for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++) >>> __set_pte(ptep, pfn_pte(pfn, prot)); >>> @@ -592,7 +594,7 @@ static void split_contpmd(pmd_t *pmdp) >>> set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp))); >>> } >>> -static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp) >>> +static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont) >>> { >>> pudval_t tableprot = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF; >>> unsigned int step = PMD_SIZE >> PAGE_SHIFT; >>> @@ -611,7 +613,9 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp) >>> tableprot |= PUD_TABLE_PXN; >>> prot = __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT); >>> - prot = __pgprot(pgprot_val(prot) | PTE_CONT); >>> + prot = __pgprot(pgprot_val(prot) & ~PTE_CONT); >>> + if (to_cont) >>> + prot = __pgprot(pgprot_val(prot) | PTE_CONT); >>> for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step) >>> set_pmd(pmdp, pfn_pmd(pfn, prot)); >>> @@ -669,7 +673,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long >>> addr) >>> if (!pud_present(pud)) >>> goto out; >>> if (pud_leaf(pud)) { >>> - ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL); >>> + ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL, true); >>> if (ret) >>> goto out; >>> } >>> @@ -694,7 +698,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long >>> addr) >>> */ >>> if (ALIGN_DOWN(addr, PMD_SIZE) == addr) >>> goto out; >>> - ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL); >>> + ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL, true); >>> if (ret) >>> goto out; >>> } >>> @@ -771,7 +775,7 @@ static int __init split_to_ptes_pud_entry(pud_t *pudp, >>> unsigned long addr, >>> int ret = 0; >>> if (pud_leaf(pud)) >>> - ret = split_pud(pudp, pud, GFP_ATOMIC); >>> + ret = split_pud(pudp, pud, GFP_ATOMIC, false); >>> return ret; >>> } >>> @@ -786,7 +790,13 @@ static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, >>> unsigned long addr, >>> if (pmd_leaf(pmd)) { >>> if (pmd_cont(pmd)) >>> split_contpmd(pmdp); >>> - ret = split_pmd(pmdp, pmd, GFP_ATOMIC); >>> + ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false); >>> + >>> + /* >>> + * We have split the pmd directly to ptes so there is no need to >>> + * visit each pte to check if they are contpte. >>> + */ >>> + walk->action = ACTION_CONTINUE; >>> } >>> return ret;
© 2016 - 2025 Red Hat, Inc.