The sparc implementation of hugetlb_free_pgd_range() is identical
to free_pgd_range() with the exception of checking for and skipping
possible leaf entries at the PUD and PMD levels. These checks are
unnecessary because any huge pages have been freed and their PTEs
cleared by the time page tables needed to map them are freed. While
some huge page sizes do populate the page table with multiple PTEs,
they are correctly cleared by huge_ptep_get_and_clear(). To verify
this, libhugetlbfs tests were run for 64K, 8M, and 256M page sizes
with an instrumented kernel on a qemu guest modified to support the
256M page size. The same tests were used to verify no regressions
after applying this patch and were also run on x86 for both 2M and
1G page sizes.
Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
---
arch/sparc/include/asm/hugetlb.h | 5 --
arch/sparc/mm/hugetlbpage.c | 119 -------------------------------
2 files changed, 124 deletions(-)
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index e7a9cdd498dc..d3bc16fbcbbd 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -50,11 +50,6 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
return changed;
}
-#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
-void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
- unsigned long end, unsigned long floor,
- unsigned long ceiling);
-
#include <asm-generic/hugetlb.h>
#endif /* _ASM_SPARC64_HUGETLB_H */
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 2048b5c42ca8..4652e868663b 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -315,122 +315,3 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
return entry;
}
-
-static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
- unsigned long addr)
-{
- pgtable_t token = pmd_pgtable(*pmd);
-
- pmd_clear(pmd);
- pte_free_tlb(tlb, token, addr);
- mm_dec_nr_ptes(tlb->mm);
-}
-
-static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pmd_t *pmd;
- unsigned long next;
- unsigned long start;
-
- start = addr;
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none(*pmd))
- continue;
- if (is_hugetlb_pmd(*pmd))
- pmd_clear(pmd);
- else
- hugetlb_free_pte_range(tlb, pmd, addr);
- } while (pmd++, addr = next, addr != end);
-
- start &= PUD_MASK;
- if (start < floor)
- return;
- if (ceiling) {
- ceiling &= PUD_MASK;
- if (!ceiling)
- return;
- }
- if (end - 1 > ceiling - 1)
- return;
-
- pmd = pmd_offset(pud, start);
- pud_clear(pud);
- pmd_free_tlb(tlb, pmd, start);
- mm_dec_nr_pmds(tlb->mm);
-}
-
-static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pud_t *pud;
- unsigned long next;
- unsigned long start;
-
- start = addr;
- pud = pud_offset(p4d, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- if (is_hugetlb_pud(*pud))
- pud_clear(pud);
- else
- hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
- ceiling);
- } while (pud++, addr = next, addr != end);
-
- start &= PGDIR_MASK;
- if (start < floor)
- return;
- if (ceiling) {
- ceiling &= PGDIR_MASK;
- if (!ceiling)
- return;
- }
- if (end - 1 > ceiling - 1)
- return;
-
- pud = pud_offset(p4d, start);
- p4d_clear(p4d);
- pud_free_tlb(tlb, pud, start);
- mm_dec_nr_puds(tlb->mm);
-}
-
-void hugetlb_free_pgd_range(struct mmu_gather *tlb,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pgd_t *pgd;
- p4d_t *p4d;
- unsigned long next;
-
- addr &= PMD_MASK;
- if (addr < floor) {
- addr += PMD_SIZE;
- if (!addr)
- return;
- }
- if (ceiling) {
- ceiling &= PMD_MASK;
- if (!ceiling)
- return;
- }
- if (end - 1 > ceiling - 1)
- end -= PMD_SIZE;
- if (addr > end - 1)
- return;
-
- pgd = pgd_offset(tlb->mm, addr);
- p4d = p4d_offset(pgd, addr);
- do {
- next = p4d_addr_end(addr, end);
- if (p4d_none_or_clear_bad(p4d))
- continue;
- hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling);
- } while (p4d++, addr = next, addr != end);
-}
--
2.47.1
On 16.07.25 03:26, Anthony Yznaga wrote: > The sparc implementation of hugetlb_free_pgd_range() is identical > to free_pgd_range() with the exception of checking for and skipping > possible leaf entries at the PUD and PMD levels. And the pgd loop was optimized out, because probably not applicable. > These checks are > unnecessary because any huge pages have been freed and their PTEs > cleared by the time page tables needed to map them are freed. Do we know why that handling was added in the first place, and why it no longer applies? These is_hugetlb_pmd/is_hugetlb_pud are rather weird on the code path. Looks like a very nice cleanup. -- Cheers, David / dhildenb
On 7/16/25 1:20 AM, David Hildenbrand wrote: > On 16.07.25 03:26, Anthony Yznaga wrote: >> The sparc implementation of hugetlb_free_pgd_range() is identical >> to free_pgd_range() with the exception of checking for and skipping >> possible leaf entries at the PUD and PMD levels. > > And the pgd loop was optimized out, because probably not applicable. > >> These checks are >> unnecessary because any huge pages have been freed and their PTEs >> cleared by the time page tables needed to map them are freed. > > Do we know why that handling was added in the first place, and why it no > longer applies? The PMD handling was added by 7bc3777ca19c (sparc64: Trim page tables for 8M hugepages). The only clue is that the commit message has the sentence, "Also, when freeing page table for 8M hugepage backed region, make sure we don't try to access non-existent PTE level." I'd guess that it was something left over from development that snuck in. The patch is changing from storing 1024 PTEs at the PTE level for an 8M hugetlb page to storing a single PMD entry. > > These is_hugetlb_pmd/is_hugetlb_pud are rather weird on the code path. > > Looks like a very nice cleanup. >
© 2016 - 2025 Red Hat, Inc.