During the user-space hot patching, the involved executable file
segments of private mapping will be modified. If the modification
meets THP mapping, the PMD entry will be cleared at first and do
page COW fault handle.
Currently, khugepaged may attempt to merge scattered file pages
into THP. However, due to the single page COW, the modified
executable segments can not be mapped in THP once again for hot
patched process. Hence it can not benefit form khugepaged efforts.
The executable segment mapped in page granularity may reduce the
iTLB cache hit rate compared with the original THP mapping.
For user-space hot patching, we introduce THP COW support for the
executable mapping. If the exec COW meets THP mapping, it will
allocate a anonymous THP and map it to remain PMD mapping.
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
---
include/linux/huge_mm.h | 1 +
mm/huge_memory.c | 87 +++++++++++++++++++++++++++++++++++++++++
mm/memory.c | 12 ++++++
3 files changed, 100 insertions(+)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f381339842fa..bae856a53e1f 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -23,10 +23,11 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
{
}
#endif
vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);
+vm_fault_t do_huge_pmd_exec_cow(struct vm_fault *vmf);
bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr, unsigned long next);
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr);
int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index aae283b00857..35ecd62f64c4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2137,10 +2137,97 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
fallback:
__split_huge_pmd(vma, vmf->pmd, vmf->address, false);
return VM_FAULT_FALLBACK;
}
+vm_fault_t do_huge_pmd_exec_cow(struct vm_fault *vmf)
+{
+ vm_fault_t ret;
+ struct vm_area_struct *vma = vmf->vma;
+ struct folio *folio, *src_folio;
+ pmd_t orig_pmd = vmf->orig_pmd;
+ unsigned long haddr = vmf->address & PMD_MASK;
+ struct mmu_notifier_range range;
+ pgtable_t pgtable = NULL;
+
+ ret = vmf_anon_prepare(vmf);
+ if (ret)
+ return ret;
+
+ folio = vma_alloc_anon_folio_pmd(vma, haddr);
+ if (!folio)
+ return VM_FAULT_FALLBACK;
+
+ if (!arch_needs_pgtable_deposit()) {
+ pgtable = pte_alloc_one(vma->vm_mm);
+ if (!pgtable) {
+ ret = VM_FAULT_OOM;
+ goto release;
+ }
+ }
+
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
+ haddr, haddr + HPAGE_PMD_SIZE);
+ mmu_notifier_invalidate_range_start(&range);
+ vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+ if (unlikely(!pmd_same(pmdp_get(vmf->pmd), orig_pmd)))
+ goto unlock_ptl;
+
+ ret = check_stable_address_space(vma->vm_mm);
+ if (ret)
+ goto unlock_ptl;
+
+ src_folio = pmd_folio(orig_pmd);
+ if (!folio_trylock(src_folio)) {
+ ret = VM_FAULT_FALLBACK;
+ goto unlock_ptl;
+ }
+
+ /*
+ * If uptodate bit is not set, it means this source folio is
+ * stale or invalid now, this memory data in it is not
+ * untrustworthy. So we just avoid copying it and fallback.
+ */
+ if (!folio_test_uptodate(src_folio)) {
+ ret = VM_FAULT_FALLBACK;
+ goto unlock_folio;
+ }
+
+ if (copy_user_large_folio(folio, src_folio, haddr, vma)) {
+ ret = VM_FAULT_HWPOISON;
+ goto unlock_folio;
+ }
+ folio_mark_uptodate(folio);
+
+ folio_unlock(src_folio);
+ pmdp_huge_clear_flush(vma, haddr, vmf->pmd);
+ folio_remove_rmap_pmd(src_folio, folio_page(src_folio, 0), vma);
+ add_mm_counter(vma->vm_mm, mm_counter_file(src_folio), -HPAGE_PMD_NR);
+ folio_put(src_folio);
+
+ map_anon_folio_pmd_pf(folio, vmf->pmd, vma, haddr);
+ if (pgtable)
+ pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+ mm_inc_nr_ptes(vma->vm_mm);
+ spin_unlock(vmf->ptl);
+ mmu_notifier_invalidate_range_end(&range);
+
+ return ret;
+
+unlock_folio:
+ folio_unlock(src_folio);
+unlock_ptl:
+ spin_unlock(vmf->ptl);
+ mmu_notifier_invalidate_range_end(&range);
+release:
+ if (pgtable)
+ pte_free(vma->vm_mm, pgtable);
+ folio_put(folio);
+
+ return ret;
+}
+
static inline bool can_change_pmd_writable(struct vm_area_struct *vma,
unsigned long addr, pmd_t pmd)
{
struct page *page;
diff --git a/mm/memory.c b/mm/memory.c
index 1c66ee83a7ab..e282adec9165 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6133,10 +6133,22 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
if (!(ret & VM_FAULT_FALLBACK))
return ret;
}
}
+
+ if (is_exec_mapping(vma->vm_flags) &&
+ is_cow_mapping(vma->vm_flags)) {
+ /* Skip special and shmem */
+ if (vma_is_special_huge(vma) || vma_is_shmem(vma))
+ goto split;
+
+ ret = do_huge_pmd_exec_cow(vmf);
+ if (!(ret & VM_FAULT_FALLBACK))
+ return ret;
+ }
+
split:
/* COW or write-notify handled on pte level: split pmd. */
__split_huge_pmd(vma, vmf->pmd, vmf->address, false);
return VM_FAULT_FALLBACK;
--
2.43.0
Hi Zhang,
kernel test robot noticed the following build errors:
[auto build test ERROR on next-20251215]
url: https://github.com/intel-lab-lkp/linux/commits/Zhang-Qilong/mm-huge_memory-Implementation-of-THP-COW-for-executable-file-mmap/20251215-204035
base: next-20251215
patch link: https://lore.kernel.org/r/20251215123407.380813-2-zhangqilong3%40huawei.com
patch subject: [PATCH next 1/2] mm/huge_memory: Implementation of THP COW for executable file mmap
config: x86_64-kexec (https://download.01.org/0day-ci/archive/20251216/202512161154.iIbhbvhS-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251216/202512161154.iIbhbvhS-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202512161154.iIbhbvhS-lkp@intel.com/
All errors (new ones prefixed by >>):
>> mm/memory.c:6134:7: error: call to undeclared function 'vma_is_special_huge'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
6134 | if (vma_is_special_huge(vma) || vma_is_shmem(vma))
| ^
1 error generated.
vim +/vma_is_special_huge +6134 mm/memory.c
6104
6105 /* `inline' is required to avoid gcc 4.1.2 build error */
6106 static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
6107 {
6108 struct vm_area_struct *vma = vmf->vma;
6109 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
6110 vm_fault_t ret;
6111
6112 if (vma_is_anonymous(vma)) {
6113 if (likely(!unshare) &&
6114 userfaultfd_huge_pmd_wp(vma, vmf->orig_pmd)) {
6115 if (userfaultfd_wp_async(vmf->vma))
6116 goto split;
6117 return handle_userfault(vmf, VM_UFFD_WP);
6118 }
6119 return do_huge_pmd_wp_page(vmf);
6120 }
6121
6122 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
6123 if (vma->vm_ops->huge_fault) {
6124 ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER);
6125 if (!(ret & VM_FAULT_FALLBACK))
6126 return ret;
6127 }
6128 }
6129
6130
6131 if (is_exec_mapping(vma->vm_flags) &&
6132 is_cow_mapping(vma->vm_flags)) {
6133 /* Skip special and shmem */
> 6134 if (vma_is_special_huge(vma) || vma_is_shmem(vma))
6135 goto split;
6136
6137 ret = do_huge_pmd_exec_cow(vmf);
6138 if (!(ret & VM_FAULT_FALLBACK))
6139 return ret;
6140 }
6141
6142 split:
6143 /* COW or write-notify handled on pte level: split pmd. */
6144 __split_huge_pmd(vma, vmf->pmd, vmf->address, false);
6145
6146 return VM_FAULT_FALLBACK;
6147 }
6148
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Zhang,
kernel test robot noticed the following build errors:
[auto build test ERROR on next-20251215]
url: https://github.com/intel-lab-lkp/linux/commits/Zhang-Qilong/mm-huge_memory-Implementation-of-THP-COW-for-executable-file-mmap/20251215-204035
base: next-20251215
patch link: https://lore.kernel.org/r/20251215123407.380813-2-zhangqilong3%40huawei.com
patch subject: [PATCH next 1/2] mm/huge_memory: Implementation of THP COW for executable file mmap
config: x86_64-allnoconfig (https://download.01.org/0day-ci/archive/20251216/202512161240.Ej5BIvIk-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251216/202512161240.Ej5BIvIk-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202512161240.Ej5BIvIk-lkp@intel.com/
All errors (new ones prefixed by >>):
>> mm/memory.c:6134:7: error: call to undeclared function 'vma_is_special_huge'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
6134 | if (vma_is_special_huge(vma) || vma_is_shmem(vma))
| ^
1 error generated.
vim +/vma_is_special_huge +6134 mm/memory.c
6104
6105 /* `inline' is required to avoid gcc 4.1.2 build error */
6106 static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
6107 {
6108 struct vm_area_struct *vma = vmf->vma;
6109 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
6110 vm_fault_t ret;
6111
6112 if (vma_is_anonymous(vma)) {
6113 if (likely(!unshare) &&
6114 userfaultfd_huge_pmd_wp(vma, vmf->orig_pmd)) {
6115 if (userfaultfd_wp_async(vmf->vma))
6116 goto split;
6117 return handle_userfault(vmf, VM_UFFD_WP);
6118 }
6119 return do_huge_pmd_wp_page(vmf);
6120 }
6121
6122 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
6123 if (vma->vm_ops->huge_fault) {
6124 ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER);
6125 if (!(ret & VM_FAULT_FALLBACK))
6126 return ret;
6127 }
6128 }
6129
6130
6131 if (is_exec_mapping(vma->vm_flags) &&
6132 is_cow_mapping(vma->vm_flags)) {
6133 /* Skip special and shmem */
> 6134 if (vma_is_special_huge(vma) || vma_is_shmem(vma))
6135 goto split;
6136
6137 ret = do_huge_pmd_exec_cow(vmf);
6138 if (!(ret & VM_FAULT_FALLBACK))
6139 return ret;
6140 }
6141
6142 split:
6143 /* COW or write-notify handled on pte level: split pmd. */
6144 __split_huge_pmd(vma, vmf->pmd, vmf->address, false);
6145
6146 return VM_FAULT_FALLBACK;
6147 }
6148
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Zhang,
kernel test robot noticed the following build errors:
[auto build test ERROR on next-20251215]
url: https://github.com/intel-lab-lkp/linux/commits/Zhang-Qilong/mm-huge_memory-Implementation-of-THP-COW-for-executable-file-mmap/20251215-204035
base: next-20251215
patch link: https://lore.kernel.org/r/20251215123407.380813-2-zhangqilong3%40huawei.com
patch subject: [PATCH next 1/2] mm/huge_memory: Implementation of THP COW for executable file mmap
config: nios2-allnoconfig (https://download.01.org/0day-ci/archive/20251216/202512161243.Dm4FDP0T-lkp@intel.com/config)
compiler: nios2-linux-gcc (GCC) 11.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251216/202512161243.Dm4FDP0T-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202512161243.Dm4FDP0T-lkp@intel.com/
All errors (new ones prefixed by >>):
mm/memory.c: In function 'wp_huge_pmd':
>> mm/memory.c:6134:21: error: implicit declaration of function 'vma_is_special_huge'; did you mean 'vma_is_special_mapping'? [-Werror=implicit-function-declaration]
6134 | if (vma_is_special_huge(vma) || vma_is_shmem(vma))
| ^~~~~~~~~~~~~~~~~~~
| vma_is_special_mapping
cc1: some warnings being treated as errors
vim +6134 mm/memory.c
6104
6105 /* `inline' is required to avoid gcc 4.1.2 build error */
6106 static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
6107 {
6108 struct vm_area_struct *vma = vmf->vma;
6109 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
6110 vm_fault_t ret;
6111
6112 if (vma_is_anonymous(vma)) {
6113 if (likely(!unshare) &&
6114 userfaultfd_huge_pmd_wp(vma, vmf->orig_pmd)) {
6115 if (userfaultfd_wp_async(vmf->vma))
6116 goto split;
6117 return handle_userfault(vmf, VM_UFFD_WP);
6118 }
6119 return do_huge_pmd_wp_page(vmf);
6120 }
6121
6122 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
6123 if (vma->vm_ops->huge_fault) {
6124 ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER);
6125 if (!(ret & VM_FAULT_FALLBACK))
6126 return ret;
6127 }
6128 }
6129
6130
6131 if (is_exec_mapping(vma->vm_flags) &&
6132 is_cow_mapping(vma->vm_flags)) {
6133 /* Skip special and shmem */
> 6134 if (vma_is_special_huge(vma) || vma_is_shmem(vma))
6135 goto split;
6136
6137 ret = do_huge_pmd_exec_cow(vmf);
6138 if (!(ret & VM_FAULT_FALLBACK))
6139 return ret;
6140 }
6141
6142 split:
6143 /* COW or write-notify handled on pte level: split pmd. */
6144 __split_huge_pmd(vma, vmf->pmd, vmf->address, false);
6145
6146 return VM_FAULT_FALLBACK;
6147 }
6148
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.