From nobody Fri Oct 3 02:14:57 2025 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 511F52E8B8F for ; Mon, 8 Sep 2025 07:51:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757317879; cv=none; b=GCtAW8iZCzFqYbVOZWWVwULnEgIYjpM8cdNXXop38qf0WS4p7bPz682jIChxPBkVmEb+x2Onn6fXNJkoQ/nQKCv5BHYDjqQ9ht3t3YKIbPhJHyVNp/1dZtWTtGJbbE4AZRXNSoAHT8mvanDdNVAQ2U3Ybzs/O7lUOAz+r2PTuMk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757317879; c=relaxed/simple; bh=C308Md0Nbn3hdTAR4O3V1gw5jJJbnHnhb8i2lR8f7ig=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=R2BOe8WhlkBPfE0xoxsgrRgL+cj+c6Em8KzqaytfC8O9TXrsi+RsGhaj1BByIFs/9W0MtkkOZA1VPxXkQdsA6uqEX6SUnUJSDBhC2xSMHMPTdp6ZPwrBV/49WGDaFaIVH3HCeoWDIPsAh66wSR79me1mdKS110BTieXRZ8DL5NY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 5DC7E169C; Mon, 8 Sep 2025 00:51:07 -0700 (PDT) Received: from MacBook-Pro.blr.arm.com (MacBook-Pro.blr.arm.com [10.164.18.52]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 759E33F63F; Mon, 8 Sep 2025 00:51:11 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org, david@redhat.com, kas@kernel.org, willy@infradead.org, hughd@google.com Cc: ziy@nvidia.com, baolin.wang@linux.alibaba.com, lorenzo.stoakes@oracle.com, Liam.Howlett@oracle.com, npache@redhat.com, ryan.roberts@arm.com, baohua@kernel.org, richard.weiyang@gmail.com, linux-mm@kvack.org, linux-kernel@vger.kernel.org, Dev Jain Subject: [PATCH v2 1/2] mm: Enable khugepaged anonymous collapse on non-writable regions Date: Mon, 8 Sep 2025 13:20:27 +0530 Message-Id: <20250908075028.38431-2-dev.jain@arm.com> X-Mailer: git-send-email 2.39.5 (Apple Git-154) In-Reply-To: <20250908075028.38431-1-dev.jain@arm.com> References: <20250908075028.38431-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Currently khugepaged does not collapse an anonymous region which does not have a single writable pte. This is wasteful since a region mapped with non-writable ptes, for example, non-writable VMAs mapped by the application, won't benefit from THP collapse. An additional consequence of this constraint is that MADV_COLLAPSE does not perform a collapse on a non-writable VMA, and this restriction is nowhere to be found on the manpage - the restriction itself sounds wrong to me since the user knows the protection of the memory it has mapped, so collapsing read-only memory via madvise() should be a choice of the user which shouldn't be overridden by the kernel. Therefore, remove this restriction by not honouring SCAN_PAGE_RO. Acked-by: David Hildenbrand Acked-by: Zi Yan =20 Reviewed-by: Wei Yang Reviewed-by: Kiryl Shutsemau Reviewed-by: Lorenzo Stoakes Reviewed-by: Baolin Wang Signed-off-by: Dev Jain Reviewed-by: Anshuman Khandual Reviewed-by: Zach O'Keefe --- mm/khugepaged.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 4ec324a4c1fe..a0f1df2a7ae6 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -676,9 +676,7 @@ static int __collapse_huge_page_isolate(struct vm_area_= struct *vma, writable =3D true; } =20 - if (unlikely(!writable)) { - result =3D SCAN_PAGE_RO; - } else if (unlikely(cc->is_khugepaged && !referenced)) { + if (unlikely(cc->is_khugepaged && !referenced)) { result =3D SCAN_LACK_REFERENCED_PAGE; } else { result =3D SCAN_SUCCEED; @@ -1421,9 +1419,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *= mm, mmu_notifier_test_young(vma->vm_mm, _address))) referenced++; } - if (!writable) { - result =3D SCAN_PAGE_RO; - } else if (cc->is_khugepaged && + if (cc->is_khugepaged && (!referenced || (unmapped && referenced < HPAGE_PMD_NR / 2))) { result =3D SCAN_LACK_REFERENCED_PAGE; @@ -2830,7 +2826,6 @@ int madvise_collapse(struct vm_area_struct *vma, unsi= gned long start, case SCAN_PMD_NULL: case SCAN_PTE_NON_PRESENT: case SCAN_PTE_UFFD_WP: - case SCAN_PAGE_RO: case SCAN_LACK_REFERENCED_PAGE: case SCAN_PAGE_NULL: case SCAN_PAGE_COUNT: --=20 2.30.2 From nobody Fri Oct 3 02:14:57 2025 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 107E52E9757 for ; Mon, 8 Sep 2025 07:51:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757317882; cv=none; b=S7fi1WFVBjYIWsDIXagyZGTeeBc6Pbjue8P3bdtd+RB7wLKhRV+Tr538GO1V5NHBJhBJfiMJZlO8Jq0ToKIhgS/THZxYTro/kEIepsTnXSe1msBUot3cFp/jqqTEQdLZfWU1pzR4no3iDdhL3OZfJJdpZD4zgNAcJRvyiW5+H5Y= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757317882; c=relaxed/simple; bh=QadiF0EnF7f8a8lHvdfm9L18L1obbAQM6Edgr7W8OvU=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=rqsbc0+K4brxPaxQumKdxZKTMPxkSVeKDitAc7euafVkI9fG1NKBr/oe27d6ku7RsI56U6OzsuK9QOpBt7dTeNZ9aJ0XlIYfLE2zkGCA4wLKMQscXuhLpxbFthNSZ7ZxFLbhjwHAI9fSA0vKPS9qrNlaM2ghFqdXPwVGaOLVp80= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 2880F169C; Mon, 8 Sep 2025 00:51:12 -0700 (PDT) Received: from MacBook-Pro.blr.arm.com (MacBook-Pro.blr.arm.com [10.164.18.52]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 36CB43F63F; Mon, 8 Sep 2025 00:51:15 -0700 (PDT) From: Dev Jain To: akpm@linux-foundation.org, david@redhat.com, kas@kernel.org, willy@infradead.org, hughd@google.com Cc: ziy@nvidia.com, baolin.wang@linux.alibaba.com, lorenzo.stoakes@oracle.com, Liam.Howlett@oracle.com, npache@redhat.com, ryan.roberts@arm.com, baohua@kernel.org, richard.weiyang@gmail.com, linux-mm@kvack.org, linux-kernel@vger.kernel.org, Dev Jain Subject: [PATCH v2 2/2] mm: Drop all references of writable and SCAN_PAGE_RO Date: Mon, 8 Sep 2025 13:20:28 +0530 Message-Id: <20250908075028.38431-3-dev.jain@arm.com> X-Mailer: git-send-email 2.39.5 (Apple Git-154) In-Reply-To: <20250908075028.38431-1-dev.jain@arm.com> References: <20250908075028.38431-1-dev.jain@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Now that all actionable outcomes from checking pte_write() are gone, drop the related references. Acked-by: David Hildenbrand Acked-by: Zi Yan Reviewed-by: Kiryl Shutsemau Reviewed-by: Lorenzo Stoakes Reviewed-by: Baolin Wang Signed-off-by: Dev Jain Reviewed-by: Anshuman Khandual Reviewed-by: Zach O'Keefe --- include/trace/events/huge_memory.h | 19 ++++++------------- mm/khugepaged.c | 14 +++----------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge= _memory.h index 2305df6cb485..dd94d14a2427 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -19,7 +19,6 @@ EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \ EM( SCAN_PTE_UFFD_WP, "pte_uffd_wp") \ EM( SCAN_PTE_MAPPED_HUGEPAGE, "pte_mapped_hugepage") \ - EM( SCAN_PAGE_RO, "no_writable_page") \ EM( SCAN_LACK_REFERENCED_PAGE, "lack_referenced_page") \ EM( SCAN_PAGE_NULL, "page_null") \ EM( SCAN_SCAN_ABORT, "scan_aborted") \ @@ -55,15 +54,14 @@ SCAN_STATUS =20 TRACE_EVENT(mm_khugepaged_scan_pmd, =20 - TP_PROTO(struct mm_struct *mm, struct folio *folio, bool writable, + TP_PROTO(struct mm_struct *mm, struct folio *folio, int referenced, int none_or_zero, int status, int unmapped), =20 - TP_ARGS(mm, folio, writable, referenced, none_or_zero, status, unmapped), + TP_ARGS(mm, folio, referenced, none_or_zero, status, unmapped), =20 TP_STRUCT__entry( __field(struct mm_struct *, mm) __field(unsigned long, pfn) - __field(bool, writable) __field(int, referenced) __field(int, none_or_zero) __field(int, status) @@ -73,17 +71,15 @@ TRACE_EVENT(mm_khugepaged_scan_pmd, TP_fast_assign( __entry->mm =3D mm; __entry->pfn =3D folio ? folio_pfn(folio) : -1; - __entry->writable =3D writable; __entry->referenced =3D referenced; __entry->none_or_zero =3D none_or_zero; __entry->status =3D status; __entry->unmapped =3D unmapped; ), =20 - TP_printk("mm=3D%p, scan_pfn=3D0x%lx, writable=3D%d, referenced=3D%d, non= e_or_zero=3D%d, status=3D%s, unmapped=3D%d", + TP_printk("mm=3D%p, scan_pfn=3D0x%lx, referenced=3D%d, none_or_zero=3D%d,= status=3D%s, unmapped=3D%d", __entry->mm, __entry->pfn, - __entry->writable, __entry->referenced, __entry->none_or_zero, __print_symbolic(__entry->status, SCAN_STATUS), @@ -117,15 +113,14 @@ TRACE_EVENT(mm_collapse_huge_page, TRACE_EVENT(mm_collapse_huge_page_isolate, =20 TP_PROTO(struct folio *folio, int none_or_zero, - int referenced, bool writable, int status), + int referenced, int status), =20 - TP_ARGS(folio, none_or_zero, referenced, writable, status), + TP_ARGS(folio, none_or_zero, referenced, status), =20 TP_STRUCT__entry( __field(unsigned long, pfn) __field(int, none_or_zero) __field(int, referenced) - __field(bool, writable) __field(int, status) ), =20 @@ -133,15 +128,13 @@ TRACE_EVENT(mm_collapse_huge_page_isolate, __entry->pfn =3D folio ? folio_pfn(folio) : -1; __entry->none_or_zero =3D none_or_zero; __entry->referenced =3D referenced; - __entry->writable =3D writable; __entry->status =3D status; ), =20 - TP_printk("scan_pfn=3D0x%lx, none_or_zero=3D%d, referenced=3D%d, writable= =3D%d, status=3D%s", + TP_printk("scan_pfn=3D0x%lx, none_or_zero=3D%d, referenced=3D%d, status= =3D%s", __entry->pfn, __entry->none_or_zero, __entry->referenced, - __entry->writable, __print_symbolic(__entry->status, SCAN_STATUS)) ); =20 diff --git a/mm/khugepaged.c b/mm/khugepaged.c index a0f1df2a7ae6..af5f5c80fe4e 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -39,7 +39,6 @@ enum scan_result { SCAN_PTE_NON_PRESENT, SCAN_PTE_UFFD_WP, SCAN_PTE_MAPPED_HUGEPAGE, - SCAN_PAGE_RO, SCAN_LACK_REFERENCED_PAGE, SCAN_PAGE_NULL, SCAN_SCAN_ABORT, @@ -557,7 +556,6 @@ static int __collapse_huge_page_isolate(struct vm_area_= struct *vma, struct folio *folio =3D NULL; pte_t *_pte; int none_or_zero =3D 0, shared =3D 0, result =3D SCAN_FAIL, referenced = =3D 0; - bool writable =3D false; =20 for (_pte =3D pte; _pte < pte + HPAGE_PMD_NR; _pte++, address +=3D PAGE_SIZE) { @@ -671,9 +669,6 @@ static int __collapse_huge_page_isolate(struct vm_area_= struct *vma, folio_test_referenced(folio) || mmu_notifier_test_young(vma->vm_mm, address))) referenced++; - - if (pte_write(pteval)) - writable =3D true; } =20 if (unlikely(cc->is_khugepaged && !referenced)) { @@ -681,13 +676,13 @@ static int __collapse_huge_page_isolate(struct vm_are= a_struct *vma, } else { result =3D SCAN_SUCCEED; trace_mm_collapse_huge_page_isolate(folio, none_or_zero, - referenced, writable, result); + referenced, result); return result; } out: release_pte_pages(pte, _pte, compound_pagelist); trace_mm_collapse_huge_page_isolate(folio, none_or_zero, - referenced, writable, result); + referenced, result); return result; } =20 @@ -1280,7 +1275,6 @@ static int hpage_collapse_scan_pmd(struct mm_struct *= mm, unsigned long _address; spinlock_t *ptl; int node =3D NUMA_NO_NODE, unmapped =3D 0; - bool writable =3D false; =20 VM_BUG_ON(address & ~HPAGE_PMD_MASK); =20 @@ -1344,8 +1338,6 @@ static int hpage_collapse_scan_pmd(struct mm_struct *= mm, result =3D SCAN_PTE_UFFD_WP; goto out_unmap; } - if (pte_write(pteval)) - writable =3D true; =20 page =3D vm_normal_page(vma, _address, pteval); if (unlikely(!page) || unlikely(is_zone_device_page(page))) { @@ -1435,7 +1427,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *= mm, *mmap_locked =3D false; } out: - trace_mm_khugepaged_scan_pmd(mm, folio, writable, referenced, + trace_mm_khugepaged_scan_pmd(mm, folio, referenced, none_or_zero, result, unmapped); return result; } --=20 2.30.2