Add large folio mapping establishment support for finish_fault() as a
preparation, to support multi-size THP allocation of anonymous shmem pages
in the following patches.
Keep the same behavior (per-page fault) for non-anon shmem to avoid inflating
the RSS unintentionally, and we can discuss what size of mapping to build
when extending mTHP to control non-anon shmem in the future.
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
mm/memory.c | 57 +++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 47 insertions(+), 10 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index eef4e482c0c2..1f7be4c6aac4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4831,9 +4831,12 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct page *page;
+ struct folio *folio;
vm_fault_t ret;
bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) &&
!(vma->vm_flags & VM_SHARED);
+ int type, nr_pages, i;
+ unsigned long addr = vmf->address;
/* Did we COW the page? */
if (is_cow)
@@ -4864,24 +4867,58 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
return VM_FAULT_OOM;
}
+ folio = page_folio(page);
+ nr_pages = folio_nr_pages(folio);
+
+ /*
+ * Using per-page fault to maintain the uffd semantics, and same
+ * approach also applies to non-anonymous-shmem faults to avoid
+ * inflating the RSS of the process.
+ */
+ if (!vma_is_anon_shmem(vma) || unlikely(userfaultfd_armed(vma))) {
+ nr_pages = 1;
+ } else if (nr_pages > 1) {
+ pgoff_t idx = folio_page_idx(folio, page);
+ /* The page offset of vmf->address within the VMA. */
+ pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff;
+
+ /*
+ * Fallback to per-page fault in case the folio size in page
+ * cache beyond the VMA limits.
+ */
+ if (unlikely(vma_off < idx ||
+ vma_off + (nr_pages - idx) > vma_pages(vma))) {
+ nr_pages = 1;
+ } else {
+ /* Now we can set mappings for the whole large folio. */
+ addr = vmf->address - idx * PAGE_SIZE;
+ page = &folio->page;
+ }
+ }
+
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
- vmf->address, &vmf->ptl);
+ addr, &vmf->ptl);
if (!vmf->pte)
return VM_FAULT_NOPAGE;
/* Re-check under ptl */
- if (likely(!vmf_pte_changed(vmf))) {
- struct folio *folio = page_folio(page);
- int type = is_cow ? MM_ANONPAGES : mm_counter_file(folio);
-
- set_pte_range(vmf, folio, page, 1, vmf->address);
- add_mm_counter(vma->vm_mm, type, 1);
- ret = 0;
- } else {
- update_mmu_tlb(vma, vmf->address, vmf->pte);
+ if (nr_pages == 1 && unlikely(vmf_pte_changed(vmf))) {
+ update_mmu_tlb(vma, addr, vmf->pte);
+ ret = VM_FAULT_NOPAGE;
+ goto unlock;
+ } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) {
+ update_mmu_tlb_range(vma, addr, vmf->pte, nr_pages);
ret = VM_FAULT_NOPAGE;
+ goto unlock;
}
+ folio_ref_add(folio, nr_pages - 1);
+ set_pte_range(vmf, folio, page, nr_pages, addr);
+ type = is_cow ? MM_ANONPAGES : mm_counter_file(folio);
+ add_mm_counter(vma->vm_mm, type, nr_pages);
+ ret = 0;
+
+unlock:
pte_unmap_unlock(vmf->pte, vmf->ptl);
return ret;
}
--
2.39.3
Hi Baolin,
kernel test robot noticed the following build warnings:
[auto build test WARNING on akpm-mm/mm-everything]
[also build test WARNING on linus/master v6.10-rc2 next-20240604]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Baolin-Wang/mm-memory-extend-finish_fault-to-support-large-folio/20240604-182028
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/r/bee11bfd9157e60aaea6db033a4af7c13c982c82.1717495894.git.baolin.wang%40linux.alibaba.com
patch subject: [PATCH v4 1/6] mm: memory: extend finish_fault() to support large folio
config: openrisc-allnoconfig (https://download.01.org/0day-ci/archive/20240604/202406042210.20LfqwNd-lkp@intel.com/config)
compiler: or1k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240604/202406042210.20LfqwNd-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202406042210.20LfqwNd-lkp@intel.com/
All warnings (new ones prefixed by >>):
mm/memory.c: In function 'finish_fault':
>> mm/memory.c:4838:29: warning: unused variable 'i' [-Wunused-variable]
4838 | int type, nr_pages, i;
| ^
vim +/i +4838 mm/memory.c
4814
4815 /**
4816 * finish_fault - finish page fault once we have prepared the page to fault
4817 *
4818 * @vmf: structure describing the fault
4819 *
4820 * This function handles all that is needed to finish a page fault once the
4821 * page to fault in is prepared. It handles locking of PTEs, inserts PTE for
4822 * given page, adds reverse page mapping, handles memcg charges and LRU
4823 * addition.
4824 *
4825 * The function expects the page to be locked and on success it consumes a
4826 * reference of a page being mapped (for the PTE which maps it).
4827 *
4828 * Return: %0 on success, %VM_FAULT_ code in case of error.
4829 */
4830 vm_fault_t finish_fault(struct vm_fault *vmf)
4831 {
4832 struct vm_area_struct *vma = vmf->vma;
4833 struct page *page;
4834 struct folio *folio;
4835 vm_fault_t ret;
4836 bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) &&
4837 !(vma->vm_flags & VM_SHARED);
> 4838 int type, nr_pages, i;
4839 unsigned long addr = vmf->address;
4840
4841 /* Did we COW the page? */
4842 if (is_cow)
4843 page = vmf->cow_page;
4844 else
4845 page = vmf->page;
4846
4847 /*
4848 * check even for read faults because we might have lost our CoWed
4849 * page
4850 */
4851 if (!(vma->vm_flags & VM_SHARED)) {
4852 ret = check_stable_address_space(vma->vm_mm);
4853 if (ret)
4854 return ret;
4855 }
4856
4857 if (pmd_none(*vmf->pmd)) {
4858 if (PageTransCompound(page)) {
4859 ret = do_set_pmd(vmf, page);
4860 if (ret != VM_FAULT_FALLBACK)
4861 return ret;
4862 }
4863
4864 if (vmf->prealloc_pte)
4865 pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte);
4866 else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
4867 return VM_FAULT_OOM;
4868 }
4869
4870 folio = page_folio(page);
4871 nr_pages = folio_nr_pages(folio);
4872
4873 /*
4874 * Using per-page fault to maintain the uffd semantics, and same
4875 * approach also applies to non-anonymous-shmem faults to avoid
4876 * inflating the RSS of the process.
4877 */
4878 if (!vma_is_anon_shmem(vma) || unlikely(userfaultfd_armed(vma))) {
4879 nr_pages = 1;
4880 } else if (nr_pages > 1) {
4881 pgoff_t idx = folio_page_idx(folio, page);
4882 /* The page offset of vmf->address within the VMA. */
4883 pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff;
4884
4885 /*
4886 * Fallback to per-page fault in case the folio size in page
4887 * cache beyond the VMA limits.
4888 */
4889 if (unlikely(vma_off < idx ||
4890 vma_off + (nr_pages - idx) > vma_pages(vma))) {
4891 nr_pages = 1;
4892 } else {
4893 /* Now we can set mappings for the whole large folio. */
4894 addr = vmf->address - idx * PAGE_SIZE;
4895 page = &folio->page;
4896 }
4897 }
4898
4899 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
4900 addr, &vmf->ptl);
4901 if (!vmf->pte)
4902 return VM_FAULT_NOPAGE;
4903
4904 /* Re-check under ptl */
4905 if (nr_pages == 1 && unlikely(vmf_pte_changed(vmf))) {
4906 update_mmu_tlb(vma, addr, vmf->pte);
4907 ret = VM_FAULT_NOPAGE;
4908 goto unlock;
4909 } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) {
4910 update_mmu_tlb_range(vma, addr, vmf->pte, nr_pages);
4911 ret = VM_FAULT_NOPAGE;
4912 goto unlock;
4913 }
4914
4915 folio_ref_add(folio, nr_pages - 1);
4916 set_pte_range(vmf, folio, page, nr_pages, addr);
4917 type = is_cow ? MM_ANONPAGES : mm_counter_file(folio);
4918 add_mm_counter(vma->vm_mm, type, nr_pages);
4919 ret = 0;
4920
4921 unlock:
4922 pte_unmap_unlock(vmf->pte, vmf->ptl);
4923 return ret;
4924 }
4925
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On 2024/6/4 22:58, kernel test robot wrote: > Hi Baolin, > > kernel test robot noticed the following build warnings: > > [auto build test WARNING on akpm-mm/mm-everything] > [also build test WARNING on linus/master v6.10-rc2 next-20240604] > [If your patch is applied to the wrong git tree, kindly drop us a note. > And when submitting patch, we suggest to use '--base' as documented in > https://git-scm.com/docs/git-format-patch#_base_tree_information] > > url: https://github.com/intel-lab-lkp/linux/commits/Baolin-Wang/mm-memory-extend-finish_fault-to-support-large-folio/20240604-182028 > base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything > patch link: https://lore.kernel.org/r/bee11bfd9157e60aaea6db033a4af7c13c982c82.1717495894.git.baolin.wang%40linux.alibaba.com > patch subject: [PATCH v4 1/6] mm: memory: extend finish_fault() to support large folio > config: openrisc-allnoconfig (https://download.01.org/0day-ci/archive/20240604/202406042210.20LfqwNd-lkp@intel.com/config) > compiler: or1k-linux-gcc (GCC) 13.2.0 > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240604/202406042210.20LfqwNd-lkp@intel.com/reproduce) > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <lkp@intel.com> > | Closes: https://lore.kernel.org/oe-kbuild-all/202406042210.20LfqwNd-lkp@intel.com/ > > All warnings (new ones prefixed by >>): > > mm/memory.c: In function 'finish_fault': >>> mm/memory.c:4838:29: warning: unused variable 'i' [-Wunused-variable] > 4838 | int type, nr_pages, i; > | ^ Oops, thanks for reporting. Forgot to remove the variable 'i' when changing to use update_mmu_tlb_range(). I see Andrew has already helped to remove this unused variable 'i'. Thanks Andrew.
© 2016 - 2026 Red Hat, Inc.