From nobody Mon Feb 9 05:53:19 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 572541DED76 for ; Mon, 28 Apr 2025 12:04:49 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841891; cv=none; b=DaxAdmAI+9kww5Em8d8dqgdA09aTwCV5BdOVNXMTJTe6lo4N3p1xGlyx0juyfMskEAK+7ORw2xe5PJOwt+cHiEdqGFW85eudO1oeIhpovo3erVR5y7gNbR/20dUFy1NkUZrNUuJfnnZ7uEOAbECo1TEimiiQAvqFoFBtFwGUM4A= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841891; c=relaxed/simple; bh=hwxkMnclYf5Zs62B6QLsdUOgbUYXM61oPXiiHhRpfWU=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=E7sGmrosf0ubIiye/wGgDtD32atRcwiIDspf8g7VXwJ5tNJjKkFHdJfLdxtrGL91vUwL94nZArmksDMXfJffz3b8HEnhTasAcAu/PtKuMxD+WQl/oJC+duIB5WgRZks8sTG0889r5oEgPbftFDKQxGspNrClzv6+ghSMjTMx8HE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id BF0C21595; Mon, 28 Apr 2025 05:04:41 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.210]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 54F5D3F66E; Mon, 28 Apr 2025 05:04:37 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH 1/7] mm: Refactor code in mprotect Date: Mon, 28 Apr 2025 17:34:08 +0530 Message-Id: <20250428120414.12101-2-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250428120414.12101-1-dev.jain@arm.com> References: <20250428120414.12101-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Reduce indentation in change_pte_range() by refactoring some of the code into a new function. No functional change. Signed-off-by: Dev Jain --- mm/mprotect.c | 116 +++++++++++++++++++++++++++++--------------------- 1 file changed, 68 insertions(+), 48 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 62c1f7945741..8d635c7fc81f 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -83,6 +83,71 @@ bool can_change_pte_writable(struct vm_area_struct *vma,= unsigned long addr, return pte_dirty(pte); } =20 + + +static bool prot_numa_skip(struct vm_area_struct *vma, struct folio *folio, + int target_node) +{ + bool toptier; + int nid; + + /* Also skip shared copy-on-write pages */ + if (is_cow_mapping(vma->vm_flags) && + (folio_maybe_dma_pinned(folio) || + folio_maybe_mapped_shared(folio))) + return true; + + /* + * While migration can move some dirty pages, + * it cannot move them all from MIGRATE_ASYNC + * context. + */ + if (folio_is_file_lru(folio) && + folio_test_dirty(folio)) + return true; + + /* + * Don't mess with PTEs if page is already on the node + * a single-threaded process is running on. + */ + nid =3D folio_nid(folio); + if (target_node =3D=3D nid) + return true; + toptier =3D node_is_toptier(nid); + + /* + * Skip scanning top tier node if normal numa + * balancing is disabled + */ + if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && + toptier) + return true; + return false; +} + +static bool prot_numa_avoid_fault(struct vm_area_struct *vma, + unsigned long addr, pte_t oldpte, int target_node) +{ + struct folio *folio; + int ret; + + /* Avoid TLB flush if possible */ + if (pte_protnone(oldpte)) + return true; + + folio =3D vm_normal_folio(vma, addr, oldpte); + if (!folio || folio_is_zone_device(folio) || + folio_test_ksm(folio)) + return true; + ret =3D prot_numa_skip(vma, folio, target_node); + if (ret) + return ret; + if (folio_use_access_time(folio)) + folio_xchg_access_time(folio, + jiffies_to_msecs(jiffies)); + return false; +} + static long change_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) @@ -116,56 +181,11 @@ static long change_pte_range(struct mmu_gather *tlb, * Avoid trapping faults against the zero or KSM * pages. See similar comment in change_huge_pmd. */ - if (prot_numa) { - struct folio *folio; - int nid; - bool toptier; - - /* Avoid TLB flush if possible */ - if (pte_protnone(oldpte)) - continue; - - folio =3D vm_normal_folio(vma, addr, oldpte); - if (!folio || folio_is_zone_device(folio) || - folio_test_ksm(folio)) - continue; - - /* Also skip shared copy-on-write pages */ - if (is_cow_mapping(vma->vm_flags) && - (folio_maybe_dma_pinned(folio) || - folio_maybe_mapped_shared(folio))) - continue; - - /* - * While migration can move some dirty pages, - * it cannot move them all from MIGRATE_ASYNC - * context. - */ - if (folio_is_file_lru(folio) && - folio_test_dirty(folio)) + if (prot_numa && + prot_numa_avoid_fault(vma, addr, + oldpte, target_node)) continue; =20 - /* - * Don't mess with PTEs if page is already on the node - * a single-threaded process is running on. - */ - nid =3D folio_nid(folio); - if (target_node =3D=3D nid) - continue; - toptier =3D node_is_toptier(nid); - - /* - * Skip scanning top tier node if normal numa - * balancing is disabled - */ - if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && - toptier) - continue; - if (folio_use_access_time(folio)) - folio_xchg_access_time(folio, - jiffies_to_msecs(jiffies)); - } - oldpte =3D ptep_modify_prot_start(vma, addr, pte); ptent =3D pte_modify(oldpte, newprot); =20 --=20 2.30.2 From nobody Mon Feb 9 05:53:19 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id E3A98262FE8 for ; Mon, 28 Apr 2025 12:04:58 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841900; cv=none; b=Cgs1/DAFQPivqKImY+8KKpk+zdV2KzbB+XhXs61YNKOGFpiRz0jE3432aNJN70pPrNphSnfFs60PwYSxAuWiD+CEhZTa9r58BNZcjwwx1xVm2vn6UY+c4Gr6dED8UJsUTD17P5WkL0PaFRQl8FioUrJqK9HWamePEMMVtYJgmmQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841900; c=relaxed/simple; bh=DP6gVzoEmQon6hJY+7rCTVMiPVhGyiqEGrn7/DcB4LA=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=Lt40GmOYoJUWpd+mThy1TT7hts21PfRj6/YcV0VSrytmNPgfsirAPHEGlNlyrooWHFMw8tE/JyM8CG57JwDo1az7n6ouNDvvG/lnlrMaPPELyOrXGDYckAq0MX5aPe2iwl9QPmrHrDvOiyFDykQgKsNN+uOFyBmTJKLTPzAPito= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id B16941762; Mon, 28 Apr 2025 05:04:51 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.210]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 939A63F7BD; Mon, 28 Apr 2025 05:04:48 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH 2/7] mm: Optimize mprotect() by batch-skipping PTEs Date: Mon, 28 Apr 2025 17:34:09 +0530 Message-Id: <20250428120414.12101-3-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250428120414.12101-1-dev.jain@arm.com> References: <20250428120414.12101-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" In case of prot_numa, there are various cases in which we can skip to the next iteration. Since the skip condition is based on the folio and not the PTEs, we can skip a PTE batch. Signed-off-by: Dev Jain --- mm/mprotect.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 8d635c7fc81f..33eabc995584 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -91,6 +91,9 @@ static bool prot_numa_skip(struct vm_area_struct *vma, st= ruct folio *folio, bool toptier; int nid; =20 + if (folio_is_zone_device(folio) || folio_test_ksm(folio)) + return true; + /* Also skip shared copy-on-write pages */ if (is_cow_mapping(vma->vm_flags) && (folio_maybe_dma_pinned(folio) || @@ -126,8 +129,10 @@ static bool prot_numa_skip(struct vm_area_struct *vma,= struct folio *folio, } =20 static bool prot_numa_avoid_fault(struct vm_area_struct *vma, - unsigned long addr, pte_t oldpte, int target_node) + unsigned long addr, pte_t *pte, pte_t oldpte, int target_node, + int max_nr, int *nr) { + const fpb_t flags =3D FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; struct folio *folio; int ret; =20 @@ -136,12 +141,16 @@ static bool prot_numa_avoid_fault(struct vm_area_stru= ct *vma, return true; =20 folio =3D vm_normal_folio(vma, addr, oldpte); - if (!folio || folio_is_zone_device(folio) || - folio_test_ksm(folio)) + if (!folio) return true; + ret =3D prot_numa_skip(vma, folio, target_node); - if (ret) + if (ret) { + if (folio_test_large(folio) && max_nr !=3D 1) + *nr =3D folio_pte_batch(folio, addr, pte, oldpte, + max_nr, flags, NULL, NULL, NULL); return ret; + } if (folio_use_access_time(folio)) folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); @@ -159,6 +168,7 @@ static long change_pte_range(struct mmu_gather *tlb, bool prot_numa =3D cp_flags & MM_CP_PROT_NUMA; bool uffd_wp =3D cp_flags & MM_CP_UFFD_WP; bool uffd_wp_resolve =3D cp_flags & MM_CP_UFFD_WP_RESOLVE; + int nr; =20 tlb_change_page_size(tlb, PAGE_SIZE); pte =3D pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); @@ -173,8 +183,10 @@ static long change_pte_range(struct mmu_gather *tlb, flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); do { + nr =3D 1; oldpte =3D ptep_get(pte); if (pte_present(oldpte)) { + int max_nr =3D (end - addr) >> PAGE_SHIFT; pte_t ptent; =20 /* @@ -182,8 +194,9 @@ static long change_pte_range(struct mmu_gather *tlb, * pages. See similar comment in change_huge_pmd. */ if (prot_numa && - prot_numa_avoid_fault(vma, addr, - oldpte, target_node)) + prot_numa_avoid_fault(vma, addr, pte, + oldpte, target_node, + max_nr, &nr)) continue; =20 oldpte =3D ptep_modify_prot_start(vma, addr, pte); @@ -300,7 +313,7 @@ static long change_pte_range(struct mmu_gather *tlb, pages++; } } - } while (pte++, addr +=3D PAGE_SIZE, addr !=3D end); + } while (pte +=3D nr, addr +=3D nr * PAGE_SIZE, addr !=3D end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); =20 --=20 2.30.2 From nobody Mon Feb 9 05:53:19 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id C94D025D531 for ; Mon, 28 Apr 2025 12:05:09 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841913; cv=none; b=AMDz899/1WVtbM+TCjrhNANOtj+sbWnU0IqoHFCWhtjPXKSM0dP/yc6FZl/Z4EXT3VVIt/etukBLLhKMbBbsB7/HRGdd5BN4e3Jz4wM2UAMPTM7HLBW7ciI1Seyj5ED0aEGii1QfOBbeQjpUKt1Q8Z2V7mPlLk2K3eSCKZ/v7gY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841913; c=relaxed/simple; bh=MEf9UWmFTT/bt+07zDYWjS5BDus75XQTfdSRt1CVO4M=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=iLR19ywDVbeLwD+7u7YPfqdhh4cjRc9H3wa8XcHlPEkpIUbRpV1wueRGBepmRk3NS9ZrELdjvqIWhv1WpxFf6M8oPzKQxSRAp6o8mULUGVcMMWKgHQcwc01EDnhf3EXRh7espLfe5jXDDl6YEwfOfrGXvuOMva6xVaJWHHIGAA0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 52ACF22BE; Mon, 28 Apr 2025 05:05:02 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.210]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id D26543F66E; Mon, 28 Apr 2025 05:04:58 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH 3/7] mm: Add batched versions of ptep_modify_prot_start/commit Date: Mon, 28 Apr 2025 17:34:10 +0530 Message-Id: <20250428120414.12101-4-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250428120414.12101-1-dev.jain@arm.com> References: <20250428120414.12101-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Batch ptep_modify_prot_start/commit in preparation for optimizing mprotect. Architecture can override these helpers. Signed-off-by: Dev Jain --- include/linux/pgtable.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index b50447ef1c92..ed287289335f 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -891,6 +891,44 @@ static inline void wrprotect_ptes(struct mm_struct *mm= , unsigned long addr, } #endif =20 +/* See the comment for ptep_modify_prot_start */ +#ifndef modify_prot_start_ptes +static inline pte_t modify_prot_start_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned int nr) +{ + pte_t pte, tmp_pte; + + pte =3D ptep_modify_prot_start(vma, addr, ptep); + while (--nr) { + ptep++; + addr +=3D PAGE_SIZE; + tmp_pte =3D ptep_modify_prot_start(vma, addr, ptep); + if (pte_dirty(tmp_pte)) + pte =3D pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte =3D pte_mkyoung(pte); + } + return pte; +} +#endif + +/* See the comment for ptep_modify_prot_commit */ +#ifndef modify_prot_commit_ptes +static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, uns= igned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte, unsigned int nr) +{ + for (;;) { + ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte); + if (--nr =3D=3D 0) + break; + ptep++; + addr +=3D PAGE_SIZE; + old_pte =3D pte_next_pfn(old_pte); + pte =3D pte_next_pfn(pte); + } +} +#endif + /* * On some architectures hardware does not set page access bit when access= ing * memory page, it is responsibility of software setting this bit. It brin= gs --=20 2.30.2 From nobody Mon Feb 9 05:53:19 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id D7DC1264A95 for ; Mon, 28 Apr 2025 12:05:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841922; cv=none; b=N8pPuSZPEndrR7z8drScqyvANG++HtS+w62G21mzXT2okYvxeHQMQ14voQuPQ0bnTT6/YvkMDBVm9IvejH/wF3fJn0J0hbxv2dTdYb7rEx4aQAUvbXjyZ+E1uCogurg2LsogAHE04q4smzc2/+RgwtrKEYSm+OUSGlaseDgx1Po= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841922; c=relaxed/simple; bh=KDuhHkG7OgF2QPC8tt/KvdEQGK1r1t8g8TWhJTJjJAE=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=BI4lLJzKWjiN5OUhOtIWzcgngMo5nZjdeS7NYZkDiuSkqAHz8jHqFk3WlfwUZagv2xSRjq8TpF6Y/VqJ4wJiqJxLSQHQ67eCi0DGmLddeKd1MV2OUUXF7CBrIOPYfIJfl0r3ZryplnMCGgQvW7GdxQZH9Dy0byQ8nV9t8Oq7XP8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 5C4D122C7; Mon, 28 Apr 2025 05:05:13 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.210]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 73D993F66E; Mon, 28 Apr 2025 05:05:09 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH 4/7] arm64: Add batched version of ptep_modify_prot_start Date: Mon, 28 Apr 2025 17:34:11 +0530 Message-Id: <20250428120414.12101-5-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250428120414.12101-1-dev.jain@arm.com> References: <20250428120414.12101-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Override the generic definition to use get_and_clear_full_ptes(), so that we do a TLBI possibly only on the "contpte-edges" of the large PTE block, instead of doing it for every contpte block, which happens for ptep_get_and= _clear(). Signed-off-by: Dev Jain --- arch/arm64/include/asm/pgtable.h | 5 +++++ arch/arm64/mm/mmu.c | 12 +++++++++--- include/linux/pgtable.h | 4 ++++ mm/pgtable-generic.c | 16 +++++++++++----- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgta= ble.h index 2a77f11b78d5..8872ea5f0642 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1553,6 +1553,11 @@ extern void ptep_modify_prot_commit(struct vm_area_s= truct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t new_pte); =20 +#define modify_prot_start_ptes modify_prot_start_ptes +extern pte_t modify_prot_start_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + unsigned int nr); + #ifdef CONFIG_ARM64_CONTPTE =20 /* diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8fcf59ba39db..fe60be8774f4 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1523,7 +1523,8 @@ static int __init prevent_bootmem_remove_init(void) early_initcall(prevent_bootmem_remove_init); #endif =20 -pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long add= r, pte_t *ptep) +pte_t modify_prot_start_ptes(struct vm_area_struct *vma, unsigned long add= r, + pte_t *ptep, unsigned int nr) { if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) { /* @@ -1532,9 +1533,14 @@ pte_t ptep_modify_prot_start(struct vm_area_struct *= vma, unsigned long addr, pte * in cases where cpu is affected with errata #2645198. */ if (pte_user_exec(ptep_get(ptep))) - return ptep_clear_flush(vma, addr, ptep); + return clear_flush_ptes(vma, addr, ptep, nr); } - return ptep_get_and_clear(vma->vm_mm, addr, ptep); + return get_and_clear_full_ptes(vma->vm_mm, addr, ptep, nr, 0); +} + +pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long add= r, pte_t *ptep) +{ + return modify_prot_start_ptes(vma, addr, ptep, 1); } =20 void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long add= r, pte_t *ptep, diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index ed287289335f..10cdb87ccecf 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -828,6 +828,10 @@ extern pte_t ptep_clear_flush(struct vm_area_struct *v= ma, pte_t *ptep); #endif =20 +extern pte_t clear_flush_ptes(struct vm_area_struct *vma, + unsigned long address, + pte_t *ptep, unsigned int nr); + #ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 5a882f2b10f9..e238f88c3cac 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -90,17 +90,23 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, } #endif =20 -#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH -pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, - pte_t *ptep) +pte_t clear_flush_ptes(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, unsigned int nr) { struct mm_struct *mm =3D (vma)->vm_mm; pte_t pte; - pte =3D ptep_get_and_clear(mm, address, ptep); + pte =3D get_and_clear_full_ptes(mm, address, ptep, nr, 0); if (pte_accessible(mm, pte)) - flush_tlb_page(vma, address); + flush_tlb_range(vma, address, address + nr * PAGE_SIZE); return pte; } + +#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH +pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep) +{ + return clear_flush_ptes(vma, address, ptep, 1); +} #endif =20 #ifdef CONFIG_TRANSPARENT_HUGEPAGE --=20 2.30.2 From nobody Mon Feb 9 05:53:19 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 892972676EA for ; Mon, 28 Apr 2025 12:05:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841931; cv=none; b=LkVlE/HMKHQce1xvQvAAY/zvYc7e33zGc0imM5ckaqbOdDi5V+zQnpwYzWJQH8bhcGZCdAP1VdxT6qestiR4F6jvdZ40q0QW4soQtPCAxE8c/PtaBIkPkpYnDqxyQXlcNkD3A3ImapS/WAQqv6JhJt9rbvghM3LMhA+G7i1ybg8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841931; c=relaxed/simple; bh=FPC3c1UtZeXKvhNu9xElMb/9OS9+eIdnWELS1tZE6Ww=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=Ze78FlcFxG7jLdo0yK9e1JAYUTyZxiBMtfSmUtGttLcMaDK8iEUqbDAZNscoG67m/oaUAavz56qlQ7B53Wa739ugM8rDZjuLMuO7veGhH12DiJSIkcCxnTn9kjnH0hbDYsvZxXies/fivjGVcv6a35Eyb5tnPlL3Sy3HDog+pi4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 67D3C22C8; Mon, 28 Apr 2025 05:05:22 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.210]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 7742D3F66E; Mon, 28 Apr 2025 05:05:20 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH 5/7] arm64: Add batched version of ptep_modify_prot_commit Date: Mon, 28 Apr 2025 17:34:12 +0530 Message-Id: <20250428120414.12101-6-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250428120414.12101-1-dev.jain@arm.com> References: <20250428120414.12101-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Override the generic definition to simply use set_ptes() to map the new ptes into the pagetable. Signed-off-by: Dev Jain --- arch/arm64/include/asm/pgtable.h | 5 +++++ arch/arm64/mm/mmu.c | 9 ++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgta= ble.h index 8872ea5f0642..0b13ca38f80c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1558,6 +1558,11 @@ extern pte_t modify_prot_start_ptes(struct vm_area_s= truct *vma, unsigned long addr, pte_t *ptep, unsigned int nr); =20 +#define modify_prot_commit_ptes modify_prot_commit_ptes +extern void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned l= ong addr, + pte_t *ptep, pte_t old_pte, pte_t pte, + unsigned int nr); + #ifdef CONFIG_ARM64_CONTPTE =20 /* diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index fe60be8774f4..5f04bcdcd946 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1543,10 +1543,17 @@ pte_t ptep_modify_prot_start(struct vm_area_struct = *vma, unsigned long addr, pte return modify_prot_start_ptes(vma, addr, ptep, 1); } =20 +void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long add= r, + pte_t *ptep, pte_t old_pte, pte_t pte, + unsigned int nr) +{ + set_ptes(vma->vm_mm, addr, ptep, pte, nr); +} + void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long add= r, pte_t *ptep, pte_t old_pte, pte_t pte) { - set_pte_at(vma->vm_mm, addr, ptep, pte); + modify_prot_commit_ptes(vma, addr, ptep, old_pte, pte, 1); } =20 /* --=20 2.30.2 From nobody Mon Feb 9 05:53:19 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 72406267B76 for ; Mon, 28 Apr 2025 12:05:38 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841940; cv=none; b=FGfw+N41SYB54H1DmgwuYJiHu1ksJ4FFhja0fmS88T41lUYtc+8Em5Nqz95armr/VdUsaTfSCZNUPE/xiUC8O4CgmRSBPCXAWCCeURzzrQErJ0xCsx7ntzvsqAhWbbr1TYvkQaeeBgtIEjT20EQP1YZoqbvwt/Y2BPAE8ziG8F4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841940; c=relaxed/simple; bh=qJ2Iyk/6FID6si78fbokXxfAerjQ+o6WBssQf/2qflA=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=QZO3FvoHIcywSYg8Eh4oJ9F6N71cI8fmmHpCF/J5peRTQgjQD8Ec/g8KUJInmsnoefWrkB0aBlIgkq3hVPHrb3IIvnymIMU+YydIcl5p3GjbP/O8F1pB+6+GgA8Qytk6NWwLB3k/YEXee/VztJmYuo57eomHt6IJ3h4CLA+JRsE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 4F13D22D7; Mon, 28 Apr 2025 05:05:31 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.210]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 7DED83F66E; Mon, 28 Apr 2025 05:05:29 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH 6/7] mm: Batch around can_change_pte_writable() Date: Mon, 28 Apr 2025 17:34:13 +0530 Message-Id: <20250428120414.12101-7-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250428120414.12101-1-dev.jain@arm.com> References: <20250428120414.12101-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" In preparation for patch 7, we need to properly batch around can_change_pte_writable(). We batch around pte_needs_soft_dirty_wp() by the corresponding fpb flag, we batch around the page-anon exclusive check using folio_maybe_mapped_shared(); modify_prot_start_ptes() collects the dirty and access bits across the batch, therefore batching across pte_dirty(): this is correct since the dirty bit on the PTE really is just an indication that the folio got written to, so even if the PTE is not actually dirty (but one of the PTEs in the batch is), the wp-fault optimization can be made. Signed-off-by: Dev Jain --- include/linux/mm.h | 4 ++-- mm/gup.c | 2 +- mm/huge_memory.c | 4 ++-- mm/memory.c | 6 +++--- mm/mprotect.c | 9 ++++++--- 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 5eb0d77c4438..ffa02e15863f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2710,8 +2710,8 @@ int get_cmdline(struct task_struct *task, char *buffe= r, int buflen); #define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \ MM_CP_UFFD_WP_RESOLVE) =20 -bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long add= r, - pte_t pte); +bool can_change_ptes_writable(struct vm_area_struct *vma, unsigned long ad= dr, + pte_t pte, struct folio *folio, unsigned int nr); extern long change_protection(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long cp_flags); diff --git a/mm/gup.c b/mm/gup.c index 84461d384ae2..6a605fc5f2cb 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -614,7 +614,7 @@ static inline bool can_follow_write_common(struct page = *page, return false; =20 /* - * See can_change_pte_writable(): we broke COW and could map the page + * See can_change_ptes_writable(): we broke COW and could map the page * writable if we have an exclusive anonymous page ... */ return page && PageAnon(page) && PageAnonExclusive(page); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 28c87e0e036f..e5496c0d9e7e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2032,12 +2032,12 @@ static inline bool can_change_pmd_writable(struct v= m_area_struct *vma, return false; =20 if (!(vma->vm_flags & VM_SHARED)) { - /* See can_change_pte_writable(). */ + /* See can_change_ptes_writable(). */ page =3D vm_normal_page_pmd(vma, addr, pmd); return page && PageAnon(page) && PageAnonExclusive(page); } =20 - /* See can_change_pte_writable(). */ + /* See can_change_ptes_writable(). */ return pmd_dirty(pmd); } =20 diff --git a/mm/memory.c b/mm/memory.c index b9e8443aaa86..b1fda3de8d27 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -750,7 +750,7 @@ static void restore_exclusive_pte(struct vm_area_struct= *vma, pte =3D pte_mkuffd_wp(pte); =20 if ((vma->vm_flags & VM_WRITE) && - can_change_pte_writable(vma, address, pte)) { + can_change_ptes_writable(vma, address, pte, NULL, 1)) { if (folio_test_dirty(folio)) pte =3D pte_mkdirty(pte); pte =3D pte_mkwrite(pte, vma); @@ -5767,7 +5767,7 @@ static void numa_rebuild_large_mapping(struct vm_faul= t *vmf, struct vm_area_stru ptent =3D pte_modify(ptent, vma->vm_page_prot); writable =3D pte_write(ptent); if (!writable && pte_write_upgrade && - can_change_pte_writable(vma, addr, ptent)) + can_change_ptes_writable(vma, addr, ptent, NULL, 1)) writable =3D true; } =20 @@ -5808,7 +5808,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) */ writable =3D pte_write(pte); if (!writable && pte_write_upgrade && - can_change_pte_writable(vma, vmf->address, pte)) + can_change_ptes_writable(vma, vmf->address, pte, NULL, 1)) writable =3D true; =20 folio =3D vm_normal_folio(vma, vmf->address, pte); diff --git a/mm/mprotect.c b/mm/mprotect.c index 33eabc995584..362fd7e5457d 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -40,8 +40,8 @@ =20 #include "internal.h" =20 -bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long add= r, - pte_t pte) +bool can_change_ptes_writable(struct vm_area_struct *vma, unsigned long ad= dr, + pte_t pte, struct folio *folio, unsigned int nr) { struct page *page; =20 @@ -67,6 +67,9 @@ bool can_change_pte_writable(struct vm_area_struct *vma, = unsigned long addr, * write-fault handler similarly would map them writable without * any additional checks while holding the PT lock. */ + if (unlikely(nr !=3D 1)) + return !folio_maybe_mapped_shared(folio); + page =3D vm_normal_page(vma, addr, pte); return page && PageAnon(page) && PageAnonExclusive(page); } @@ -222,7 +225,7 @@ static long change_pte_range(struct mmu_gather *tlb, */ if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pte_write(ptent) && - can_change_pte_writable(vma, addr, ptent)) + can_change_ptes_writable(vma, addr, ptent, folio, 1)) ptent =3D pte_mkwrite(ptent, vma); =20 ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent); --=20 2.30.2 From nobody Mon Feb 9 05:53:19 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 6EDE52673B7 for ; Mon, 28 Apr 2025 12:05:48 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841950; cv=none; b=FLy59Rhr9e6/giX470es5s3uigK+yep9V1UzCgQYP8Vb8Dv0PhHK44iCGXKjCa2ay45gBVoqd5yi5O6PL/RYPB6MXE/BXVFFScSXRHU3rUHyXZyO/rIQ+aBA24/Mb8P8lnpdTsdrQcV5AKv9zjrgKt/iBCLpQue6aLBqmQ7s5fY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1745841950; c=relaxed/simple; bh=uhhb/WbjTvfi31f7EFPTIbVN0sxli/jwpt1FYquhYkE=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=N4CQbOHuXT6vR+EqDsKox0Gt3WIYbduRz5V2F7sahgZ6mXRiPSf6OcZeCk8OP3UAWIw510Hrzlhka+r+MVnai+yyqGXXbq6xSKU1DJ+u2s4FFoxTVvkCrt5IS3pMQN588Q1XtMYxqKFDizfdby6S1Kx78vEL+vgkjmuILyTxcCA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 4C17122D7; Mon, 28 Apr 2025 05:05:41 -0700 (PDT) Received: from K4MQJ0H1H2.arm.com (unknown [10.163.78.210]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 57D683F66E; Mon, 28 Apr 2025 05:05:38 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org Cc: ryan.roberts@arm.com, david@redhat.com, willy@infradead.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org, catalin.marinas@arm.com, will@kernel.org, Liam.Howlett@oracle.com, lorenzo.stoakes@oracle.com, vbabka@suse.cz, jannh@google.com, anshuman.khandual@arm.com, peterx@redhat.com, joey.gouly@arm.com, ioworker0@gmail.com, baohua@kernel.org, kevin.brodsky@arm.com, quic_zhenhuah@quicinc.com, christophe.leroy@csgroup.eu, yangyicong@hisilicon.com, linux-arm-kernel@lists.infradead.org, namit@vmware.com, hughd@google.com, yang@os.amperecomputing.com, ziy@nvidia.com, Dev Jain Subject: [PATCH 7/7] mm: Optimize mprotect() through PTE-batching Date: Mon, 28 Apr 2025 17:34:14 +0530 Message-Id: <20250428120414.12101-8-dev.jain@arm.com> X-Mailer: git-send-email 2.39.3 (Apple Git-146) In-Reply-To: <20250428120414.12101-1-dev.jain@arm.com> References: <20250428120414.12101-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The common pte_present case does not require the folio. Elide the overhead = of vm_normal_folio() for the small folio case, by making an approximation: for arm64, pte_batch_hint() is conclusive. For other arches, if the pfns pointed to by the current and the next PTE are contiguous, check whether a large folio is actually mapped, and only then make the batch optimization. Reuse the folio from prot_numa case if possible. Since modify_prot_start_pt= es() gathers access/dirty bits, it lets us batch around pte_needs_flush() (for parisc, the definition includes the access bit). Signed-off-by: Dev Jain --- mm/mprotect.c | 49 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 362fd7e5457d..d382d57bc796 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -131,7 +131,7 @@ static bool prot_numa_skip(struct vm_area_struct *vma, = struct folio *folio, return false; } =20 -static bool prot_numa_avoid_fault(struct vm_area_struct *vma, +static struct folio *prot_numa_avoid_fault(struct vm_area_struct *vma, unsigned long addr, pte_t *pte, pte_t oldpte, int target_node, int max_nr, int *nr) { @@ -141,25 +141,37 @@ static bool prot_numa_avoid_fault(struct vm_area_stru= ct *vma, =20 /* Avoid TLB flush if possible */ if (pte_protnone(oldpte)) - return true; + return NULL; =20 folio =3D vm_normal_folio(vma, addr, oldpte); if (!folio) - return true; + return NULL; =20 ret =3D prot_numa_skip(vma, folio, target_node); if (ret) { if (folio_test_large(folio) && max_nr !=3D 1) *nr =3D folio_pte_batch(folio, addr, pte, oldpte, max_nr, flags, NULL, NULL, NULL); - return ret; + return NULL; } if (folio_use_access_time(folio)) folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); - return false; + return folio; } =20 +static bool maybe_contiguous_pte_pfns(pte_t *ptep, pte_t pte) +{ + pte_t *next_ptep, next_pte; + + if (pte_batch_hint(ptep, pte) !=3D 1) + return true; + + next_ptep =3D ptep + 1; + next_pte =3D ptep_get(next_ptep); + + return unlikely(pte_pfn(next_pte) - pte_pfn(pte) =3D=3D PAGE_SIZE); +} static long change_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) @@ -190,19 +202,28 @@ static long change_pte_range(struct mmu_gather *tlb, oldpte =3D ptep_get(pte); if (pte_present(oldpte)) { int max_nr =3D (end - addr) >> PAGE_SHIFT; + const fpb_t flags =3D FPB_IGNORE_DIRTY; + struct folio *folio =3D NULL; pte_t ptent; =20 /* * Avoid trapping faults against the zero or KSM * pages. See similar comment in change_huge_pmd. */ - if (prot_numa && - prot_numa_avoid_fault(vma, addr, pte, - oldpte, target_node, - max_nr, &nr)) + if (prot_numa) { + folio =3D prot_numa_avoid_fault(vma, addr, pte, + oldpte, target_node, max_nr, &nr); + if (!folio) continue; + } =20 - oldpte =3D ptep_modify_prot_start(vma, addr, pte); + if (!folio && (max_nr !=3D 1) && maybe_contiguous_pte_pfns(pte, oldpte)= ) { + folio =3D vm_normal_folio(vma, addr, oldpte); + if (folio_test_large(folio)) + nr =3D folio_pte_batch(folio, addr, pte, + oldpte, max_nr, flags, NULL, NULL, NULL); + } + oldpte =3D modify_prot_start_ptes(vma, addr, pte, nr); ptent =3D pte_modify(oldpte, newprot); =20 if (uffd_wp) @@ -225,13 +246,13 @@ static long change_pte_range(struct mmu_gather *tlb, */ if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pte_write(ptent) && - can_change_ptes_writable(vma, addr, ptent, folio, 1)) + can_change_ptes_writable(vma, addr, ptent, folio, nr)) ptent =3D pte_mkwrite(ptent, vma); =20 - ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent); + modify_prot_commit_ptes(vma, addr, pte, oldpte, ptent, nr); if (pte_needs_flush(oldpte, ptent)) - tlb_flush_pte_range(tlb, addr, PAGE_SIZE); - pages++; + tlb_flush_pte_range(tlb, addr, nr * PAGE_SIZE); + pages +=3D nr; } else if (is_swap_pte(oldpte)) { swp_entry_t entry =3D pte_to_swp_entry(oldpte); pte_t newpte; --=20 2.30.2