From nobody Sat Feb 7 21:15:21 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 994BF2512FF for ; Tue, 29 Apr 2025 05:24:02 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904244; cv=none; b=doEPycxKMG1t0zvUJGkcZlQTHdBizgijTJqvrcE2W2DDFx5vjYvURvt0MKLnkaazEpvlThQjlbavxdNFieZDuX1mvQS5FUj38vW64ogfTTcgpJR5AFZSFLY0KIS+tK3nkIStoDBlY1Jb0tJ6EWAkm+IfkLnzYcHz6uWdcVquLD0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904244; c=relaxed/simple; bh=4hin14bzeWTrOJettsy0ZOrgToaRCaKMDdQbc4jv4xs=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=ZsazDUlxgnmHbKFac32VIJgd19bTLNW5aRgYqNLlXf1uFYqjJ+4/5Jv1wOWuVWL7dRnYtQehik2wL75pizxp1LnSazwlwYfjQXmjtcIHhGfTrESJYp0UKIOGDhzP2p/D2WyUxKbpoUKobkyEoYMcCrOY5A0XT97Ii6TyQajCV9U= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 259331515; Mon, 28 Apr 2025 22:23:55 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.253]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 421023F5A1; Mon, 28 Apr 2025 22:23:51 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH v2 1/7] mm: Refactor code in mprotect Date: Tue, 29 Apr 2025 10:53:30 +0530 Message-Id: <20250429052336.18912-2-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250429052336.18912-1-dev.jain@arm.com> References: <20250429052336.18912-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Reduce indentation in change_pte_range() by refactoring some of the code into a new function. No functional change. Signed-off-by: Dev Jain --- mm/mprotect.c | 116 +++++++++++++++++++++++++++++--------------------- 1 file changed, 68 insertions(+), 48 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 88608d0dc2c2..70f59aa8c2a8 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -83,6 +83,71 @@ bool can_change_pte_writable(struct vm_area_struct *vma,= unsigned long addr, return pte_dirty(pte); } =20 + + +static bool prot_numa_skip(struct vm_area_struct *vma, struct folio *folio, + int target_node) +{ + bool toptier; + int nid; + + /* Also skip shared copy-on-write pages */ + if (is_cow_mapping(vma->vm_flags) && + (folio_maybe_dma_pinned(folio) || + folio_maybe_mapped_shared(folio))) + return true; + + /* + * While migration can move some dirty pages, + * it cannot move them all from MIGRATE_ASYNC + * context. + */ + if (folio_is_file_lru(folio) && + folio_test_dirty(folio)) + return true; + + /* + * Don't mess with PTEs if page is already on the node + * a single-threaded process is running on. + */ + nid =3D folio_nid(folio); + if (target_node =3D=3D nid) + return true; + toptier =3D node_is_toptier(nid); + + /* + * Skip scanning top tier node if normal numa + * balancing is disabled + */ + if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && + toptier) + return true; + return false; +} + +static bool prot_numa_avoid_fault(struct vm_area_struct *vma, + unsigned long addr, pte_t oldpte, int target_node) +{ + struct folio *folio; + int ret; + + /* Avoid TLB flush if possible */ + if (pte_protnone(oldpte)) + return true; + + folio =3D vm_normal_folio(vma, addr, oldpte); + if (!folio || folio_is_zone_device(folio) || + folio_test_ksm(folio)) + return true; + ret =3D prot_numa_skip(vma, folio, target_node); + if (ret) + return ret; + if (folio_use_access_time(folio)) + folio_xchg_access_time(folio, + jiffies_to_msecs(jiffies)); + return false; +} + static long change_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) @@ -116,56 +181,11 @@ static long change_pte_range(struct mmu_gather *tlb, * Avoid trapping faults against the zero or KSM * pages. See similar comment in change_huge_pmd. */ - if (prot_numa) { - struct folio *folio; - int nid; - bool toptier; - - /* Avoid TLB flush if possible */ - if (pte_protnone(oldpte)) - continue; - - folio =3D vm_normal_folio(vma, addr, oldpte); - if (!folio || folio_is_zone_device(folio) || - folio_test_ksm(folio)) - continue; - - /* Also skip shared copy-on-write pages */ - if (is_cow_mapping(vma->vm_flags) && - (folio_maybe_dma_pinned(folio) || - folio_maybe_mapped_shared(folio))) - continue; - - /* - * While migration can move some dirty pages, - * it cannot move them all from MIGRATE_ASYNC - * context. - */ - if (folio_is_file_lru(folio) && - folio_test_dirty(folio)) + if (prot_numa && + prot_numa_avoid_fault(vma, addr, + oldpte, target_node)) continue; =20 - /* - * Don't mess with PTEs if page is already on the node - * a single-threaded process is running on. - */ - nid =3D folio_nid(folio); - if (target_node =3D=3D nid) - continue; - toptier =3D node_is_toptier(nid); - - /* - * Skip scanning top tier node if normal numa - * balancing is disabled - */ - if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && - toptier) - continue; - if (folio_use_access_time(folio)) - folio_xchg_access_time(folio, - jiffies_to_msecs(jiffies)); - } - oldpte =3D ptep_modify_prot_start(vma, addr, pte); ptent =3D pte_modify(oldpte, newprot); =20 --=20 2.30.2 From nobody Sat Feb 7 21:15:21 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 953B9210F59 for ; Tue, 29 Apr 2025 05:24:12 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904254; cv=none; b=rnnNawUHX9pqFvuoRYxuKE0cJ61a+lyCnj7aWJgumLBYU9OwnyYp9qkexZ01zIsGCFCpYsM/uVvTj8gJX3GNfmyxSgJMTA8gcZccC9BcyzIvJPf+bsjG6J62pcX770q3LGoxsufvlT5jshO2bubfU2z6tjYnUC9Q1ZXmTPK8atA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904254; c=relaxed/simple; bh=yC8EkzjfJnpRtwbK8PCtq1kwykP5W1Rc6CSHuPNLLOc=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=rwg50v5bSLAyPRXkmNfmIuCREvJLjrlNWY+E40lujjFkmn1YBgg6PPmaeSCODisbzugllRF8YkSaQwOqNR4ySTneIcDZdIrYXOLfiqGelgtK5v7Kt0nSTBdTSK776srGRTSa6ydIKvPymsRaRBkQk9J+BBafgewIem7G52DYtP0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 12E031515; Mon, 28 Apr 2025 22:24:05 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.253]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 98F553F5A1; Mon, 28 Apr 2025 22:24:02 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH v2 2/7] mm: Optimize mprotect() by batch-skipping PTEs Date: Tue, 29 Apr 2025 10:53:31 +0530 Message-Id: <20250429052336.18912-3-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250429052336.18912-1-dev.jain@arm.com> References: <20250429052336.18912-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" In case of prot_numa, there are various cases in which we can skip to the next iteration. Since the skip condition is based on the folio and not the PTEs, we can skip a PTE batch. Signed-off-by: Dev Jain --- mm/mprotect.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 70f59aa8c2a8..ec5d17af7650 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -91,6 +91,9 @@ static bool prot_numa_skip(struct vm_area_struct *vma, st= ruct folio *folio, bool toptier; int nid; =20 + if (folio_is_zone_device(folio) || folio_test_ksm(folio)) + return true; + /* Also skip shared copy-on-write pages */ if (is_cow_mapping(vma->vm_flags) && (folio_maybe_dma_pinned(folio) || @@ -126,8 +129,10 @@ static bool prot_numa_skip(struct vm_area_struct *vma,= struct folio *folio, } =20 static bool prot_numa_avoid_fault(struct vm_area_struct *vma, - unsigned long addr, pte_t oldpte, int target_node) + unsigned long addr, pte_t *pte, pte_t oldpte, int target_node, + int max_nr, int *nr) { + const fpb_t flags =3D FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; struct folio *folio; int ret; =20 @@ -136,12 +141,16 @@ static bool prot_numa_avoid_fault(struct vm_area_stru= ct *vma, return true; =20 folio =3D vm_normal_folio(vma, addr, oldpte); - if (!folio || folio_is_zone_device(folio) || - folio_test_ksm(folio)) + if (!folio) return true; + ret =3D prot_numa_skip(vma, folio, target_node); - if (ret) + if (ret) { + if (folio_test_large(folio) && max_nr !=3D 1) + *nr =3D folio_pte_batch(folio, addr, pte, oldpte, + max_nr, flags, NULL, NULL, NULL); return ret; + } if (folio_use_access_time(folio)) folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); @@ -159,6 +168,7 @@ static long change_pte_range(struct mmu_gather *tlb, bool prot_numa =3D cp_flags & MM_CP_PROT_NUMA; bool uffd_wp =3D cp_flags & MM_CP_UFFD_WP; bool uffd_wp_resolve =3D cp_flags & MM_CP_UFFD_WP_RESOLVE; + int nr; =20 tlb_change_page_size(tlb, PAGE_SIZE); pte =3D pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); @@ -173,8 +183,10 @@ static long change_pte_range(struct mmu_gather *tlb, flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); do { + nr =3D 1; oldpte =3D ptep_get(pte); if (pte_present(oldpte)) { + int max_nr =3D (end - addr) >> PAGE_SHIFT; pte_t ptent; =20 /* @@ -182,8 +194,9 @@ static long change_pte_range(struct mmu_gather *tlb, * pages. See similar comment in change_huge_pmd. */ if (prot_numa && - prot_numa_avoid_fault(vma, addr, - oldpte, target_node)) + prot_numa_avoid_fault(vma, addr, pte, + oldpte, target_node, + max_nr, &nr)) continue; =20 oldpte =3D ptep_modify_prot_start(vma, addr, pte); @@ -300,7 +313,7 @@ static long change_pte_range(struct mmu_gather *tlb, pages++; } } - } while (pte++, addr +=3D PAGE_SIZE, addr !=3D end); + } while (pte +=3D nr, addr +=3D nr * PAGE_SIZE, addr !=3D end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); =20 --=20 2.30.2 From nobody Sat Feb 7 21:15:21 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id BEDB02512FF for ; Tue, 29 Apr 2025 05:24:22 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904264; cv=none; b=gGkUtu7KFGqTKB/6BgrTlD5/uOXDJjfaX5gOUXh/bGY8hml5N1ZGU1pWC++HXIUV8QFyt/mchkKo7yG6KGrro7wfpSDQaJVy9B/4eO4DLWyKtSlBfTCpkcLZRAXm4LFQdvuQ7JRZoWL27fh1P4wpuuSLOuBHRgcRJvXYRl2BocU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904264; c=relaxed/simple; bh=MEf9UWmFTT/bt+07zDYWjS5BDus75XQTfdSRt1CVO4M=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=r58X+phCmwK/CcC2UfoDcrMnxkOzGrjP5qcxoS3VDT5B4vFSBw/3xUZEodikskg5njalp5yqAV4hms1jrRN9NEQTmFEAA4DDwVXEl/XW8yPTVbFBmfUKSt6RkSkDOdsVzgPjStCjMA/LRTnwnH2gcHKUKmGzVUrhow4GK6ByqF8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 609C31515; Mon, 28 Apr 2025 22:24:15 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.253]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 6D4633F5A1; Mon, 28 Apr 2025 22:24:12 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH v2 3/7] mm: Add batched versions of ptep_modify_prot_start/commit Date: Tue, 29 Apr 2025 10:53:32 +0530 Message-Id: <20250429052336.18912-4-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250429052336.18912-1-dev.jain@arm.com> References: <20250429052336.18912-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Batch ptep_modify_prot_start/commit in preparation for optimizing mprotect. Architecture can override these helpers. Signed-off-by: Dev Jain --- include/linux/pgtable.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index b50447ef1c92..ed287289335f 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -891,6 +891,44 @@ static inline void wrprotect_ptes(struct mm_struct *mm= , unsigned long addr, } #endif =20 +/* See the comment for ptep_modify_prot_start */ +#ifndef modify_prot_start_ptes +static inline pte_t modify_prot_start_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned int nr) +{ + pte_t pte, tmp_pte; + + pte =3D ptep_modify_prot_start(vma, addr, ptep); + while (--nr) { + ptep++; + addr +=3D PAGE_SIZE; + tmp_pte =3D ptep_modify_prot_start(vma, addr, ptep); + if (pte_dirty(tmp_pte)) + pte =3D pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte =3D pte_mkyoung(pte); + } + return pte; +} +#endif + +/* See the comment for ptep_modify_prot_commit */ +#ifndef modify_prot_commit_ptes +static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, uns= igned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte, unsigned int nr) +{ + for (;;) { + ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte); + if (--nr =3D=3D 0) + break; + ptep++; + addr +=3D PAGE_SIZE; + old_pte =3D pte_next_pfn(old_pte); + pte =3D pte_next_pfn(pte); + } +} +#endif + /* * On some architectures hardware does not set page access bit when access= ing * memory page, it is responsibility of software setting this bit. It brin= gs --=20 2.30.2 From nobody Sat Feb 7 21:15:21 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 805F420C488 for ; Tue, 29 Apr 2025 05:24:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904274; cv=none; b=uQzq9pAaMFandFhMqGJZcPWle67u1XmIiPS3kwkAfszi26oJlvobqcWDM5948ZokSB7jarziQjzDZXdJiWQgAIeTcw7qUsHYOIZ/NCBjNEqC1Wtkj3ALUSjfLtX04HjB76gXCiUSCdXH2x5KcU9w2bFEhsHFvMIoT2ynWllUII4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904274; c=relaxed/simple; bh=KDuhHkG7OgF2QPC8tt/KvdEQGK1r1t8g8TWhJTJjJAE=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=ElcP/k7GO6keZAL21wp1mp8RCO3/bI7yt6MdlGCzpoJCrbNxy16W15V2gVEnKQ2NFAhPztK4umR+APeyQhcLLoWLAwF3akccmkfMBVXOwxW1onESAWfFVrmRglgyiC7bc/YpdBK+4LCvG8zGEdAozFDCBt+m404EcIB1W5/Sicc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 148241515; Mon, 28 Apr 2025 22:24:25 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.253]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id BED453F5A1; Mon, 28 Apr 2025 22:24:22 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH v2 4/7] arm64: Add batched version of ptep_modify_prot_start Date: Tue, 29 Apr 2025 10:53:33 +0530 Message-Id: <20250429052336.18912-5-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250429052336.18912-1-dev.jain@arm.com> References: <20250429052336.18912-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Override the generic definition to use get_and_clear_full_ptes(), so that we do a TLBI possibly only on the "contpte-edges" of the large PTE block, instead of doing it for every contpte block, which happens for ptep_get_and= _clear(). Signed-off-by: Dev Jain --- arch/arm64/include/asm/pgtable.h | 5 +++++ arch/arm64/mm/mmu.c | 12 +++++++++--- include/linux/pgtable.h | 4 ++++ mm/pgtable-generic.c | 16 +++++++++++----- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgta= ble.h index 2a77f11b78d5..8872ea5f0642 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1553,6 +1553,11 @@ extern void ptep_modify_prot_commit(struct vm_area_s= truct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t new_pte); =20 +#define modify_prot_start_ptes modify_prot_start_ptes +extern pte_t modify_prot_start_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + unsigned int nr); + #ifdef CONFIG_ARM64_CONTPTE =20 /* diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8fcf59ba39db..fe60be8774f4 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1523,7 +1523,8 @@ static int __init prevent_bootmem_remove_init(void) early_initcall(prevent_bootmem_remove_init); #endif =20 -pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long add= r, pte_t *ptep) +pte_t modify_prot_start_ptes(struct vm_area_struct *vma, unsigned long add= r, + pte_t *ptep, unsigned int nr) { if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) { /* @@ -1532,9 +1533,14 @@ pte_t ptep_modify_prot_start(struct vm_area_struct *= vma, unsigned long addr, pte * in cases where cpu is affected with errata #2645198. */ if (pte_user_exec(ptep_get(ptep))) - return ptep_clear_flush(vma, addr, ptep); + return clear_flush_ptes(vma, addr, ptep, nr); } - return ptep_get_and_clear(vma->vm_mm, addr, ptep); + return get_and_clear_full_ptes(vma->vm_mm, addr, ptep, nr, 0); +} + +pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long add= r, pte_t *ptep) +{ + return modify_prot_start_ptes(vma, addr, ptep, 1); } =20 void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long add= r, pte_t *ptep, diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index ed287289335f..10cdb87ccecf 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -828,6 +828,10 @@ extern pte_t ptep_clear_flush(struct vm_area_struct *v= ma, pte_t *ptep); #endif =20 +extern pte_t clear_flush_ptes(struct vm_area_struct *vma, + unsigned long address, + pte_t *ptep, unsigned int nr); + #ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 5a882f2b10f9..e238f88c3cac 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -90,17 +90,23 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, } #endif =20 -#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH -pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, - pte_t *ptep) +pte_t clear_flush_ptes(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, unsigned int nr) { struct mm_struct *mm =3D (vma)->vm_mm; pte_t pte; - pte =3D ptep_get_and_clear(mm, address, ptep); + pte =3D get_and_clear_full_ptes(mm, address, ptep, nr, 0); if (pte_accessible(mm, pte)) - flush_tlb_page(vma, address); + flush_tlb_range(vma, address, address + nr * PAGE_SIZE); return pte; } + +#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH +pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep) +{ + return clear_flush_ptes(vma, address, ptep, 1); +} #endif =20 #ifdef CONFIG_TRANSPARENT_HUGEPAGE --=20 2.30.2 From nobody Sat Feb 7 21:15:21 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 46803210F59 for ; Tue, 29 Apr 2025 05:24:41 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904283; cv=none; b=uvFNcIaK3swzCLXJ7Ewe39VnlYti2kfsoDrRJoakcIU11RfVoi6hCTCaJtIvOZnK5Amxr1p428sKYEeyCF5PORYH22oP949z2KB9TdeJoJpsuBEz1Rw/AIQaTgmwqDtsqhcC9J37RyAX9KDL1la8UoYmXRnzHgfUCKHIG1O+aDA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904283; c=relaxed/simple; bh=FPC3c1UtZeXKvhNu9xElMb/9OS9+eIdnWELS1tZE6Ww=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=bEasQCUs9VKKaW/FyPcWHkRkL82jjTgzKqHQpSRcq8rDl//Auh0l/JWyCA4o50+sqVuWeep+tAGV35YW/RFwlHaX0fFN24rHxox5kk1wI7VckI5jdHNpP/jU1lO+vev0yi0gsW9d6uanP9zBsf/vhmjHK2UmZZOuEnENN8TMGrg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id D8AEE1515; Mon, 28 Apr 2025 22:24:34 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.253]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 7C8C03F5A1; Mon, 28 Apr 2025 22:24:32 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH v2 5/7] arm64: Add batched version of ptep_modify_prot_commit Date: Tue, 29 Apr 2025 10:53:34 +0530 Message-Id: <20250429052336.18912-6-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250429052336.18912-1-dev.jain@arm.com> References: <20250429052336.18912-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Override the generic definition to simply use set_ptes() to map the new ptes into the pagetable. Signed-off-by: Dev Jain --- arch/arm64/include/asm/pgtable.h | 5 +++++ arch/arm64/mm/mmu.c | 9 ++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgta= ble.h index 8872ea5f0642..0b13ca38f80c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1558,6 +1558,11 @@ extern pte_t modify_prot_start_ptes(struct vm_area_s= truct *vma, unsigned long addr, pte_t *ptep, unsigned int nr); =20 +#define modify_prot_commit_ptes modify_prot_commit_ptes +extern void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned l= ong addr, + pte_t *ptep, pte_t old_pte, pte_t pte, + unsigned int nr); + #ifdef CONFIG_ARM64_CONTPTE =20 /* diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index fe60be8774f4..5f04bcdcd946 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1543,10 +1543,17 @@ pte_t ptep_modify_prot_start(struct vm_area_struct = *vma, unsigned long addr, pte return modify_prot_start_ptes(vma, addr, ptep, 1); } =20 +void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long add= r, + pte_t *ptep, pte_t old_pte, pte_t pte, + unsigned int nr) +{ + set_ptes(vma->vm_mm, addr, ptep, pte, nr); +} + void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long add= r, pte_t *ptep, pte_t old_pte, pte_t pte) { - set_pte_at(vma->vm_mm, addr, ptep, pte); + modify_prot_commit_ptes(vma, addr, ptep, old_pte, pte, 1); } =20 /* --=20 2.30.2 From nobody Sat Feb 7 21:15:21 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id E41622741A9 for ; Tue, 29 Apr 2025 05:24:52 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904297; cv=none; b=dysSLfE86e3+3gWh9fWa0hccJyib7W2ZDUM3D4cyhK+eU7rOtzOB/n6pMP9O7a4LpsBkM+Muq6puUkpghXkrNlA8SSof+kCs4xzr/lfHFT6gyrKxAxmpFBvT21ywYYqLKIclsI2cRHvPGal0nkDELXx0zTcKuyoINOF07dLZhSU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904297; c=relaxed/simple; bh=CNCr1BK1I5eNOQUzCtO35c3OcbzUiLc3O9lx/hnWPRk=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=tc0up4BB2Q0DQaAjIJeKl1V8YO4Swsx6n6iuJtLtKNummgQGXLh2TsJZur3IR2tmIxgLc5vIJDtHCX3Q0+JDLKCOfTGy1L2vusPxZR0PwyXbrvepcuJEZDHLpjsefntDr5o5wsa6WhHylRKei3CI0WNq/rEjZ404KjGgXbT9gIk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 748811515; Mon, 28 Apr 2025 22:24:45 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.253]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 4330A3F5A1; Mon, 28 Apr 2025 22:24:41 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH v2 6/7] mm: Batch around can_change_pte_writable() Date: Tue, 29 Apr 2025 10:53:35 +0530 Message-Id: <20250429052336.18912-7-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250429052336.18912-1-dev.jain@arm.com> References: <20250429052336.18912-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" In preparation for patch 7, we need to properly batch around can_change_pte_writable(). We batch around pte_needs_soft_dirty_wp() by the corresponding fpb flag, we batch around the page-anon exclusive check using folio_maybe_mapped_shared(); modify_prot_start_ptes() collects the dirty and access bits across the batch, therefore batching across pte_dirty(): this is correct since the dirty bit on the PTE really is just an indication that the folio got written to, so even if the PTE is not actually dirty (but one of the PTEs in the batch is), the wp-fault optimization can be made. Signed-off-by: Dev Jain --- include/linux/mm.h | 4 ++-- mm/gup.c | 2 +- mm/huge_memory.c | 4 ++-- mm/memory.c | 6 +++--- mm/mprotect.c | 11 ++++++----- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 21dd110b6655..2f639f6d93f9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2487,8 +2487,8 @@ int get_cmdline(struct task_struct *task, char *buffe= r, int buflen); #define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \ MM_CP_UFFD_WP_RESOLVE) =20 -bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long add= r, - pte_t pte); +bool can_change_ptes_writable(struct vm_area_struct *vma, unsigned long ad= dr, + pte_t pte, struct folio *folio, unsigned int nr); extern long change_protection(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long cp_flags); diff --git a/mm/gup.c b/mm/gup.c index f32168339390..c39f587842a0 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -614,7 +614,7 @@ static inline bool can_follow_write_common(struct page = *page, return false; =20 /* - * See can_change_pte_writable(): we broke COW and could map the page + * See can_change_ptes_writable(): we broke COW and could map the page * writable if we have an exclusive anonymous page ... */ return page && PageAnon(page) && PageAnonExclusive(page); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2780a12b25f0..a58445fcedfc 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2032,12 +2032,12 @@ static inline bool can_change_pmd_writable(struct v= m_area_struct *vma, return false; =20 if (!(vma->vm_flags & VM_SHARED)) { - /* See can_change_pte_writable(). */ + /* See can_change_ptes_writable(). */ page =3D vm_normal_page_pmd(vma, addr, pmd); return page && PageAnon(page) && PageAnonExclusive(page); } =20 - /* See can_change_pte_writable(). */ + /* See can_change_ptes_writable(). */ return pmd_dirty(pmd); } =20 diff --git a/mm/memory.c b/mm/memory.c index 68c1d962d0ad..e7ebc6b70421 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -750,7 +750,7 @@ static void restore_exclusive_pte(struct vm_area_struct= *vma, pte =3D pte_mkuffd_wp(pte); =20 if ((vma->vm_flags & VM_WRITE) && - can_change_pte_writable(vma, address, pte)) { + can_change_ptes_writable(vma, address, pte, NULL, 1)) { if (folio_test_dirty(folio)) pte =3D pte_mkdirty(pte); pte =3D pte_mkwrite(pte, vma); @@ -5796,7 +5796,7 @@ static void numa_rebuild_large_mapping(struct vm_faul= t *vmf, struct vm_area_stru ptent =3D pte_modify(ptent, vma->vm_page_prot); writable =3D pte_write(ptent); if (!writable && pte_write_upgrade && - can_change_pte_writable(vma, addr, ptent)) + can_change_ptes_writable(vma, addr, ptent, NULL, 1)) writable =3D true; } =20 @@ -5837,7 +5837,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) */ writable =3D pte_write(pte); if (!writable && pte_write_upgrade && - can_change_pte_writable(vma, vmf->address, pte)) + can_change_ptes_writable(vma, vmf->address, pte, NULL, 1)) writable =3D true; =20 folio =3D vm_normal_folio(vma, vmf->address, pte); diff --git a/mm/mprotect.c b/mm/mprotect.c index ec5d17af7650..baff009fc981 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -40,8 +40,8 @@ =20 #include "internal.h" =20 -bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long add= r, - pte_t pte) +bool can_change_ptes_writable(struct vm_area_struct *vma, unsigned long ad= dr, + pte_t pte, struct folio *folio, unsigned int nr) { struct page *page; =20 @@ -67,8 +67,9 @@ bool can_change_pte_writable(struct vm_area_struct *vma, = unsigned long addr, * write-fault handler similarly would map them writable without * any additional checks while holding the PT lock. */ - page =3D vm_normal_page(vma, addr, pte); - return page && PageAnon(page) && PageAnonExclusive(page); + if (!folio) + folio =3D vm_normal_folio(vma, addr, pte); + return folio_test_anon(folio) && !folio_maybe_mapped_shared(folio); } =20 VM_WARN_ON_ONCE(is_zero_pfn(pte_pfn(pte)) && pte_dirty(pte)); @@ -222,7 +223,7 @@ static long change_pte_range(struct mmu_gather *tlb, */ if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pte_write(ptent) && - can_change_pte_writable(vma, addr, ptent)) + can_change_ptes_writable(vma, addr, ptent, folio, 1)) ptent =3D pte_mkwrite(ptent, vma); =20 ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent); --=20 2.30.2 From nobody Sat Feb 7 21:15:21 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 204C926F477 for ; Tue, 29 Apr 2025 05:25:02 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904305; cv=none; b=fCoKZURy8UKYZSIIIys9dE8a2IsJixid+7/qsn7JypgTy/XAXrTWvdb181S2ZYXx5iG6TyK4AZwSxaDz+JtSV54iia2kxhqdtH1Ov0nFQ6CroFW48BGVIruQ98mjIGDpmztLAWgagR9dCvoxXfGZTYjHnEs4NEkAtcF3ysCkP0E= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745904305; c=relaxed/simple; bh=2xM9wh9wkAVhFS7+jEnvQsGZZAorFhDs7x3mpEykd7o=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=kWhO/k0A7t2SKyVi6OC08/qTAdXHONFcCP18XM6mBbnawshHZkze7fEAs4xEs4VTKKk1zEwo5J03SEc3xsvdI3/Jicc6NUqtqO7k0SB1bTpr5eXm7qeW+Q3TN8sZTmFvnwAG8W4dxLPLVTP64MUrF89G7gIi66MLf1DMSKt+QRA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id A65EB1515; Mon, 28 Apr 2025 22:24:55 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.253]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id D2B4A3F5A1; Mon, 28 Apr 2025 22:24:52 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH v2 7/7] mm: Optimize mprotect() through PTE-batching Date: Tue, 29 Apr 2025 10:53:36 +0530 Message-Id: <20250429052336.18912-8-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250429052336.18912-1-dev.jain@arm.com> References: <20250429052336.18912-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The common pte_present case does not require the folio. Elide the overhead = of vm_normal_folio() for the small folio case, by making an approximation: for arm64, pte_batch_hint() is conclusive. For other arches, if the pfns pointed to by the current and the next PTE are contiguous, check whether a large folio is actually mapped, and only then make the batch optimization. Reuse the folio from prot_numa case if possible. Since modify_prot_start_pt= es() gathers access/dirty bits, it lets us batch around pte_needs_flush() (for parisc, the definition includes the access bit). Signed-off-by: Dev Jain --- mm/mprotect.c | 49 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index baff009fc981..f8382806611f 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -129,7 +129,7 @@ static bool prot_numa_skip(struct vm_area_struct *vma, = struct folio *folio, return false; } =20 -static bool prot_numa_avoid_fault(struct vm_area_struct *vma, +static struct folio *prot_numa_avoid_fault(struct vm_area_struct *vma, unsigned long addr, pte_t *pte, pte_t oldpte, int target_node, int max_nr, int *nr) { @@ -139,25 +139,37 @@ static bool prot_numa_avoid_fault(struct vm_area_stru= ct *vma, =20 /* Avoid TLB flush if possible */ if (pte_protnone(oldpte)) - return true; + return NULL; =20 folio =3D vm_normal_folio(vma, addr, oldpte); if (!folio) - return true; + return NULL; =20 ret =3D prot_numa_skip(vma, folio, target_node); if (ret) { if (folio_test_large(folio) && max_nr !=3D 1) *nr =3D folio_pte_batch(folio, addr, pte, oldpte, max_nr, flags, NULL, NULL, NULL); - return ret; + return NULL; } if (folio_use_access_time(folio)) folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); - return false; + return folio; } =20 +static bool maybe_contiguous_pte_pfns(pte_t *ptep, pte_t pte) +{ + pte_t *next_ptep, next_pte; + + if (pte_batch_hint(ptep, pte) !=3D 1) + return true; + + next_ptep =3D ptep + 1; + next_pte =3D ptep_get(next_ptep); + + return unlikely(pte_pfn(next_pte) - pte_pfn(pte) =3D=3D PAGE_SIZE); +} static long change_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) @@ -188,19 +200,28 @@ static long change_pte_range(struct mmu_gather *tlb, oldpte =3D ptep_get(pte); if (pte_present(oldpte)) { int max_nr =3D (end - addr) >> PAGE_SHIFT; + const fpb_t flags =3D FPB_IGNORE_DIRTY; + struct folio *folio =3D NULL; pte_t ptent; =20 /* * Avoid trapping faults against the zero or KSM * pages. See similar comment in change_huge_pmd. */ - if (prot_numa && - prot_numa_avoid_fault(vma, addr, pte, - oldpte, target_node, - max_nr, &nr)) + if (prot_numa) { + folio =3D prot_numa_avoid_fault(vma, addr, pte, + oldpte, target_node, max_nr, &nr); + if (!folio) continue; + } =20 - oldpte =3D ptep_modify_prot_start(vma, addr, pte); + if (!folio && (max_nr !=3D 1) && maybe_contiguous_pte_pfns(pte, oldpte)= ) { + folio =3D vm_normal_folio(vma, addr, oldpte); + if (folio_test_large(folio)) + nr =3D folio_pte_batch(folio, addr, pte, + oldpte, max_nr, flags, NULL, NULL, NULL); + } + oldpte =3D modify_prot_start_ptes(vma, addr, pte, nr); ptent =3D pte_modify(oldpte, newprot); =20 if (uffd_wp) @@ -223,13 +244,13 @@ static long change_pte_range(struct mmu_gather *tlb, */ if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pte_write(ptent) && - can_change_ptes_writable(vma, addr, ptent, folio, 1)) + can_change_ptes_writable(vma, addr, ptent, folio, nr)) ptent =3D pte_mkwrite(ptent, vma); =20 - ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent); + modify_prot_commit_ptes(vma, addr, pte, oldpte, ptent, nr); if (pte_needs_flush(oldpte, ptent)) - tlb_flush_pte_range(tlb, addr, PAGE_SIZE); - pages++; + tlb_flush_pte_range(tlb, addr, nr * PAGE_SIZE); + pages +=3D nr; } else if (is_swap_pte(oldpte)) { swp_entry_t entry =3D pte_to_swp_entry(oldpte); pte_t newpte; --=20 2.30.2