From nobody Wed Dec 24 01:33:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2B3171534E2 for ; Mon, 29 Jan 2024 14:32:40 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=170.10.133.124 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538762; cv=none; b=R8nZrmNZ+rM1Jev0j/OEaLx767KAr2X14DHzzs6uc0XKloXuwCkiR3RN+Kgpg1e+XxMUCvDRLWnL493DATbxlpOYxNQSJ6qaJgWaRT4Oe2xpvo8S1e28Z1RmKKHucWLqFq61atUgBhcMiXTL8lu1pPXEcuM2136iGgCTP9hI8ew= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538762; c=relaxed/simple; bh=jvwufdFMljuhAFpV9jNqh1ZIQmLNafZjEeBNPodulTo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=mOU+NVRRvIVFidFsIi14j5OXhj+ks2oMTUDvZG6bkWBgvFRyuY0+iz7CQ+wvNlT7naLkpob+NTckik1iYY7qtB1V2bYl9okUZyAOmE6XAFO+0saAUhEw7UpdWvQmo0jCkrobk+fcgliRoBCmjPAF3bFPI9YK2VmnKi2Q6qPOS/8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com; spf=pass smtp.mailfrom=redhat.com; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b=P3Y20xYS; arc=none smtp.client-ip=170.10.133.124 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=redhat.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b="P3Y20xYS" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1706538760; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=5k519pv5yC0dphpRGDmBdAjVLNwRA0D+0XMigEKB7uE=; b=P3Y20xYSKuf+gMiRjtMGC6PQlAAR2BPsMBE+9a0XhbrNoymaDSt+Gv2P/CEInyBVXahqhQ adkmuPM+QY7JSTUYG2V72STOMu5v1XbQF6KmlMzM3VURIfTNMInulTxgaHuZAYGalM0fc3 0RSCdsiHWZGorf+NBOqtZacnquP69j4= Received: from mimecast-mx02.redhat.com (mx-ext.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-502-hf9kkbPYM4OwjicnjD23xg-1; Mon, 29 Jan 2024 09:32:33 -0500 X-MC-Unique: hf9kkbPYM4OwjicnjD23xg-1 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.rdu2.redhat.com [10.11.54.1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id B4A5D1C05137; Mon, 29 Jan 2024 14:32:32 +0000 (UTC) Received: from t14s.fritz.box (unknown [10.39.194.46]) by smtp.corp.redhat.com (Postfix) with ESMTP id 02540AD1; Mon, 29 Jan 2024 14:32:27 +0000 (UTC) From: David Hildenbrand To: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org, David Hildenbrand , Andrew Morton , Matthew Wilcox , Ryan Roberts , Catalin Marinas , Will Deacon , "Aneesh Kumar K.V" , Nick Piggin , Peter Zijlstra , Michael Ellerman , Christophe Leroy , "Naveen N. Rao" , Heiko Carstens , Vasily Gorbik , Alexander Gordeev , Christian Borntraeger , Sven Schnelle , Arnd Bergmann , linux-arch@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-s390@vger.kernel.org Subject: [PATCH v1 1/9] mm/memory: factor out zapping of present pte into zap_present_pte() Date: Mon, 29 Jan 2024 15:32:13 +0100 Message-ID: <20240129143221.263763-2-david@redhat.com> In-Reply-To: <20240129143221.263763-1-david@redhat.com> References: <20240129143221.263763-1-david@redhat.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.1 Content-Type: text/plain; charset="utf-8" Let's prepare for further changes by factoring out processing of present PTEs. Signed-off-by: David Hildenbrand --- mm/memory.c | 92 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 40 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index b05fd28dbce1..50a6c79c78fc 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct= *vma, pte_install_uffd_wp_if_needed(vma, addr, pte, pteval); } =20 +static inline void zap_present_pte(struct mmu_gather *tlb, + struct vm_area_struct *vma, pte_t *pte, pte_t ptent, + unsigned long addr, struct zap_details *details, + int *rss, bool *force_flush, bool *force_break) +{ + struct mm_struct *mm =3D tlb->mm; + bool delay_rmap =3D false; + struct folio *folio; + struct page *page; + + page =3D vm_normal_page(vma, addr, ptent); + if (page) + folio =3D page_folio(page); + + if (unlikely(!should_zap_folio(details, folio))) + return; + ptent =3D ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); + arch_check_zapped_pte(vma, ptent); + tlb_remove_tlb_entry(tlb, pte, addr); + zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent); + if (unlikely(!page)) { + ksm_might_unmap_zero_page(mm, ptent); + return; + } + + if (!folio_test_anon(folio)) { + if (pte_dirty(ptent)) { + folio_mark_dirty(folio); + if (tlb_delay_rmap(tlb)) { + delay_rmap =3D true; + *force_flush =3D true; + } + } + if (pte_young(ptent) && likely(vma_has_recency(vma))) + folio_mark_accessed(folio); + } + rss[mm_counter(folio)]--; + if (!delay_rmap) { + folio_remove_rmap_pte(folio, page, vma); + if (unlikely(page_mapcount(page) < 0)) + print_bad_pte(vma, addr, ptent, page); + } + if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) { + *force_flush =3D true; + *force_break =3D true; + } +} + static unsigned long zap_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, struct zap_details *details) { + bool force_flush =3D false, force_break =3D false; struct mm_struct *mm =3D tlb->mm; - int force_flush =3D 0; int rss[NR_MM_COUNTERS]; spinlock_t *ptl; pte_t *start_pte; @@ -1565,45 +1613,9 @@ static unsigned long zap_pte_range(struct mmu_gather= *tlb, break; =20 if (pte_present(ptent)) { - unsigned int delay_rmap; - - page =3D vm_normal_page(vma, addr, ptent); - if (page) - folio =3D page_folio(page); - - if (unlikely(!should_zap_folio(details, folio))) - continue; - ptent =3D ptep_get_and_clear_full(mm, addr, pte, - tlb->fullmm); - arch_check_zapped_pte(vma, ptent); - tlb_remove_tlb_entry(tlb, pte, addr); - zap_install_uffd_wp_if_needed(vma, addr, pte, details, - ptent); - if (unlikely(!page)) { - ksm_might_unmap_zero_page(mm, ptent); - continue; - } - - delay_rmap =3D 0; - if (!folio_test_anon(folio)) { - if (pte_dirty(ptent)) { - folio_mark_dirty(folio); - if (tlb_delay_rmap(tlb)) { - delay_rmap =3D 1; - force_flush =3D 1; - } - } - if (pte_young(ptent) && likely(vma_has_recency(vma))) - folio_mark_accessed(folio); - } - rss[mm_counter(folio)]--; - if (!delay_rmap) { - folio_remove_rmap_pte(folio, page, vma); - if (unlikely(page_mapcount(page) < 0)) - print_bad_pte(vma, addr, ptent, page); - } - if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) { - force_flush =3D 1; + zap_present_pte(tlb, vma, pte, ptent, addr, details, + rss, &force_flush, &force_break); + if (unlikely(force_break)) { addr +=3D PAGE_SIZE; break; } --=20 2.43.0 From nobody Wed Dec 24 01:33:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5516F152E05 for ; Mon, 29 Jan 2024 14:32:49 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=170.10.129.124 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538770; cv=none; b=uNN/IHQMgue9pObq6G85E9ahpNLVOo7/ZYGuTNDzv20eYSUvTW0B3YwmXnQpm1On2FHFi6QM3IkzP7mGWWP7dAznaib6WzTqgnmrVxrzDOoUQypgqiQDf+eI19FUbj2MUA2G4CkSSjAWl9KFWQyci+AAcjoo5eu8cCeKcBhhD10= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538770; c=relaxed/simple; bh=ox+eQ1EXoBSzOjUFapo41STtCich4R5P1O+bP48vyb4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=sC2Y0okHHLrFcQlR++y4hTtXNIA0uVhV2EOre5byGzchBIvZd2/o5kRydgDBHBv+rH4UDEi4pdObq+bKdwUu86Bsy4xa/wtuP5bZULCdsalvmPtYUEyBnIWdD3b5LN6lhDNeRAD9/GACVuVS+NXvSr9yKeYcMlCosDd+07u5u1c= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com; spf=pass smtp.mailfrom=redhat.com; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b=dlZY9/hn; arc=none smtp.client-ip=170.10.129.124 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=redhat.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b="dlZY9/hn" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1706538768; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=FNlnKqTU8a/1Fu37QrFx8MEnv7tH60nNAECNkKLkAAs=; b=dlZY9/hnjnZceAb2FjAWaLhBT4yWwN+Wz8A0qXGMWmqrggv9tA11IJBDCMh9KI/3ggtaST +21nqDue6snZJA0b4cMG0NQOR7UADOoPtZmDIFNvnAkYf9CqdM+cXGSPWr+ZGJsHM4lOP6 q7f8FN53sj4/2EfVpvqFC4ujRUkCGWk= Received: from mimecast-mx02.redhat.com (mx-ext.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-151-unK-9W-5NjSJSLj6qcQ8jg-1; Mon, 29 Jan 2024 09:32:41 -0500 X-MC-Unique: unK-9W-5NjSJSLj6qcQ8jg-1 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.rdu2.redhat.com [10.11.54.1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 902141C05137; Mon, 29 Jan 2024 14:32:37 +0000 (UTC) Received: from t14s.fritz.box (unknown [10.39.194.46]) by smtp.corp.redhat.com (Postfix) with ESMTP id 20796AD1; Mon, 29 Jan 2024 14:32:32 +0000 (UTC) From: David Hildenbrand To: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org, David Hildenbrand , Andrew Morton , Matthew Wilcox , Ryan Roberts , Catalin Marinas , Will Deacon , "Aneesh Kumar K.V" , Nick Piggin , Peter Zijlstra , Michael Ellerman , Christophe Leroy , "Naveen N. Rao" , Heiko Carstens , Vasily Gorbik , Alexander Gordeev , Christian Borntraeger , Sven Schnelle , Arnd Bergmann , linux-arch@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-s390@vger.kernel.org Subject: [PATCH v1 2/9] mm/memory: handle !page case in zap_present_pte() separately Date: Mon, 29 Jan 2024 15:32:14 +0100 Message-ID: <20240129143221.263763-3-david@redhat.com> In-Reply-To: <20240129143221.263763-1-david@redhat.com> References: <20240129143221.263763-1-david@redhat.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.1 Content-Type: text/plain; charset="utf-8" We don't need uptodate accessed/dirty bits, so in theory we could replace ptep_get_and_clear_full() by an optimized ptep_clear_full() function. Let's rely on the provided pte. Further, there is no scenario where we would have to insert uffd-wp markers when zapping something that is not a normal page (i.e., zeropage). Add a sanity check to make sure this remains true. should_zap_folio() no longer has to handle NULL pointers. This change replaces 2/3 "!page/!folio" checks by a single "!page" one. Signed-off-by: David Hildenbrand Reviewed-by: Ryan Roberts --- mm/memory.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 50a6c79c78fc..69502cdc0a7d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1497,10 +1497,6 @@ static inline bool should_zap_folio(struct zap_detai= ls *details, if (should_zap_cows(details)) return true; =20 - /* E.g. the caller passes NULL for the case of a zero folio */ - if (!folio) - return true; - /* Otherwise we should only zap non-anon folios */ return !folio_test_anon(folio); } @@ -1543,19 +1539,23 @@ static inline void zap_present_pte(struct mmu_gathe= r *tlb, struct page *page; =20 page =3D vm_normal_page(vma, addr, ptent); - if (page) - folio =3D page_folio(page); + if (!page) { + /* We don't need up-to-date accessed/dirty bits. */ + ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); + arch_check_zapped_pte(vma, ptent); + tlb_remove_tlb_entry(tlb, pte, addr); + VM_WARN_ON_ONCE(userfaultfd_wp(vma)); + ksm_might_unmap_zero_page(mm, ptent); + return; + } =20 + folio =3D page_folio(page); if (unlikely(!should_zap_folio(details, folio))) return; ptent =3D ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); arch_check_zapped_pte(vma, ptent); tlb_remove_tlb_entry(tlb, pte, addr); zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent); - if (unlikely(!page)) { - ksm_might_unmap_zero_page(mm, ptent); - return; - } =20 if (!folio_test_anon(folio)) { if (pte_dirty(ptent)) { --=20 2.43.0 From nobody Wed Dec 24 01:33:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3C9E1152DFC for ; Mon, 29 Jan 2024 14:32:46 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=170.10.133.124 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538768; cv=none; b=k+lc3P59xctTSHWcfuWWhZQao30Y2qdbxHYx4BUF2ooCBP049NVJDpvusB2FzqLgxI5uWFhjgqEDt4ee3c+DN0KB2QHdhJx10mlx+ZifTC76kRfJkl5MrSlCe7OOA1zSkefTLpGraIZhE/gS9vejxyDkR9RgqljX05Kff7hlxBw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538768; c=relaxed/simple; bh=uJmPGUDLFyZe0bZqwcbaHzJkmT/1nN67hPwrqw3D1Wc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=cGGZIKTotXzZPW7gg+5amD92eM6BRF2AScOFk9np8BjROXc42XsuJs2QsnCUr4VvqaolDQK6ZtWV9r3p1s4RS8heHlNxSBFVdqhBF/cIjUa1mb5Q8m7623sma8MdQP8n3h92mY4Qco0YOR/rdvLPJTkgnKT5syucIBD7jTIHm38= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com; spf=pass smtp.mailfrom=redhat.com; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b=F0UaybRy; arc=none smtp.client-ip=170.10.133.124 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=redhat.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b="F0UaybRy" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1706538766; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=OkSqLVnz5/2y8MLWUf6UFgTHMoYCjTvN5uvEMNfOCyQ=; b=F0UaybRyVw2kjNTaK0O0yXrpbmvCoOW4YouhbkRq/wymc5KL+KMO+Im9D5YTbRy43mvm4I qML0mhUMuICLgRpj8/c3zNHjD8PbOLGnZEin9BZeXqQuLqlKEuyZNfAeEi+F3bjM4HDFVB xZ5tbGj9hRy0HYv3MLwBmmYAhYnQwvY= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-332-tQCqrTzqPz-8PYoPDfxKqg-1; Mon, 29 Jan 2024 09:32:44 -0500 X-MC-Unique: tQCqrTzqPz-8PYoPDfxKqg-1 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.rdu2.redhat.com [10.11.54.1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 8BAC885A599; Mon, 29 Jan 2024 14:32:42 +0000 (UTC) Received: from t14s.fritz.box (unknown [10.39.194.46]) by smtp.corp.redhat.com (Postfix) with ESMTP id CCEB7AD1; Mon, 29 Jan 2024 14:32:37 +0000 (UTC) From: David Hildenbrand To: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org, David Hildenbrand , Andrew Morton , Matthew Wilcox , Ryan Roberts , Catalin Marinas , Will Deacon , "Aneesh Kumar K.V" , Nick Piggin , Peter Zijlstra , Michael Ellerman , Christophe Leroy , "Naveen N. Rao" , Heiko Carstens , Vasily Gorbik , Alexander Gordeev , Christian Borntraeger , Sven Schnelle , Arnd Bergmann , linux-arch@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-s390@vger.kernel.org Subject: [PATCH v1 3/9] mm/memory: further separate anon and pagecache folio handling in zap_present_pte() Date: Mon, 29 Jan 2024 15:32:15 +0100 Message-ID: <20240129143221.263763-4-david@redhat.com> In-Reply-To: <20240129143221.263763-1-david@redhat.com> References: <20240129143221.263763-1-david@redhat.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.1 Content-Type: text/plain; charset="utf-8" We don't need up-to-date accessed-dirty information for anon folios and can simply work with the ptent we already have. Also, we know the RSS counter we want to update. We can safely move arch_check_zapped_pte() + tlb_remove_tlb_entry() + zap_install_uffd_wp_if_needed() after updating the folio and RSS. While at it, only call zap_install_uffd_wp_if_needed() if there is even any chance that pte_install_uffd_wp_if_needed() would do *something*. That is, just don't bother if uffd-wp does not apply. Signed-off-by: David Hildenbrand Reviewed-by: Ryan Roberts --- mm/memory.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 69502cdc0a7d..20bc13ab8db2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1552,12 +1552,9 @@ static inline void zap_present_pte(struct mmu_gather= *tlb, folio =3D page_folio(page); if (unlikely(!should_zap_folio(details, folio))) return; - ptent =3D ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); - arch_check_zapped_pte(vma, ptent); - tlb_remove_tlb_entry(tlb, pte, addr); - zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent); =20 if (!folio_test_anon(folio)) { + ptent =3D ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); if (pte_dirty(ptent)) { folio_mark_dirty(folio); if (tlb_delay_rmap(tlb)) { @@ -1567,8 +1564,17 @@ static inline void zap_present_pte(struct mmu_gather= *tlb, } if (pte_young(ptent) && likely(vma_has_recency(vma))) folio_mark_accessed(folio); + rss[mm_counter(folio)]--; + } else { + /* We don't need up-to-date accessed/dirty bits. */ + ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); + rss[MM_ANONPAGES]--; } - rss[mm_counter(folio)]--; + arch_check_zapped_pte(vma, ptent); + tlb_remove_tlb_entry(tlb, pte, addr); + if (unlikely(userfaultfd_pte_wp(vma, ptent))) + zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent); + if (!delay_rmap) { folio_remove_rmap_pte(folio, page, vma); if (unlikely(page_mapcount(page) < 0)) --=20 2.43.0 From nobody Wed Dec 24 01:33:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 54276152E13 for ; Mon, 29 Jan 2024 14:32:55 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=170.10.129.124 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538776; cv=none; b=sAOJSfkvlVJC7VkxHlSXH27qIETKZspCju6Ip/47rM4HB3HYa1HceUAfUHw1xSCGiVQShiJVl1jIoqtVK6+HxgIxb9fHjs8HD+R+2E0NLJwh2FX3Ct0iQUYf8kMT4BwqflGeXmiRk2E5f6dltu/iy9pxgRDxsYBYWLBZ1yVdJ0k= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538776; c=relaxed/simple; bh=XApK2c2vQna3bcs1NTRVr0RKnpu8isNDD00l9Yikal8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=cuKgIyO86gdzRSr0V/Ki6inkAZHcWprp+yo+bzAHbftIJxoVp1KPaD1lzGFRiehCjtC6XQ4dY+2vf3TqqcMIp2n7ljaVe1IoI5eNb79+cVBzWLYQ4G5k+3MaPNciG2zNTKca6ilp4mW20rZo+n4MNih5sU/dC6j50/JQoC0lcys= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com; spf=pass smtp.mailfrom=redhat.com; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b=F2ORIhlg; arc=none smtp.client-ip=170.10.129.124 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=redhat.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b="F2ORIhlg" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1706538774; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=L2zNeb5SO5KkYO9vfWjJtlZWbswbqsHyyrv464V13y4=; b=F2ORIhlgST0Oaox48DuH6FpDMzyR8a3NNJlr3ncwGjQtwLgW0LYuQiXPJQ5raqaVTEK9ix 0fRq/3FNUJCLC092rYLQySYjiuB1oMwlQz7IZ4tXYK3HI0jP9UeTzqHMyLYr2+lkYsjxrC cAcx9xdg8NuvVKu6Y0JPHwcLUXRKbLM= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-639-VwTjaewtMEGP-PpguyfRyw-1; Mon, 29 Jan 2024 09:32:48 -0500 X-MC-Unique: VwTjaewtMEGP-PpguyfRyw-1 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.rdu2.redhat.com [10.11.54.1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 796401013767; Mon, 29 Jan 2024 14:32:47 +0000 (UTC) Received: from t14s.fritz.box (unknown [10.39.194.46]) by smtp.corp.redhat.com (Postfix) with ESMTP id D15AFAD1; Mon, 29 Jan 2024 14:32:42 +0000 (UTC) From: David Hildenbrand To: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org, David Hildenbrand , Andrew Morton , Matthew Wilcox , Ryan Roberts , Catalin Marinas , Will Deacon , "Aneesh Kumar K.V" , Nick Piggin , Peter Zijlstra , Michael Ellerman , Christophe Leroy , "Naveen N. Rao" , Heiko Carstens , Vasily Gorbik , Alexander Gordeev , Christian Borntraeger , Sven Schnelle , Arnd Bergmann , linux-arch@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-s390@vger.kernel.org Subject: [PATCH v1 4/9] mm/memory: factor out zapping folio pte into zap_present_folio_pte() Date: Mon, 29 Jan 2024 15:32:16 +0100 Message-ID: <20240129143221.263763-5-david@redhat.com> In-Reply-To: <20240129143221.263763-1-david@redhat.com> References: <20240129143221.263763-1-david@redhat.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.1 Content-Type: text/plain; charset="utf-8" Let's prepare for further changes by factoring it out into a separate function. Signed-off-by: David Hildenbrand Reviewed-by: Ryan Roberts --- mm/memory.c | 53 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 20bc13ab8db2..a2190d7cfa74 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1528,30 +1528,14 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct= *vma, pte_install_uffd_wp_if_needed(vma, addr, pte, pteval); } =20 -static inline void zap_present_pte(struct mmu_gather *tlb, - struct vm_area_struct *vma, pte_t *pte, pte_t ptent, - unsigned long addr, struct zap_details *details, - int *rss, bool *force_flush, bool *force_break) +static inline void zap_present_folio_pte(struct mmu_gather *tlb, + struct vm_area_struct *vma, struct folio *folio, + struct page *page, pte_t *pte, pte_t ptent, unsigned long addr, + struct zap_details *details, int *rss, bool *force_flush, + bool *force_break) { struct mm_struct *mm =3D tlb->mm; bool delay_rmap =3D false; - struct folio *folio; - struct page *page; - - page =3D vm_normal_page(vma, addr, ptent); - if (!page) { - /* We don't need up-to-date accessed/dirty bits. */ - ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); - arch_check_zapped_pte(vma, ptent); - tlb_remove_tlb_entry(tlb, pte, addr); - VM_WARN_ON_ONCE(userfaultfd_wp(vma)); - ksm_might_unmap_zero_page(mm, ptent); - return; - } - - folio =3D page_folio(page); - if (unlikely(!should_zap_folio(details, folio))) - return; =20 if (!folio_test_anon(folio)) { ptent =3D ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); @@ -1586,6 +1570,33 @@ static inline void zap_present_pte(struct mmu_gather= *tlb, } } =20 +static inline void zap_present_pte(struct mmu_gather *tlb, + struct vm_area_struct *vma, pte_t *pte, pte_t ptent, + unsigned long addr, struct zap_details *details, + int *rss, bool *force_flush, bool *force_break) +{ + struct mm_struct *mm =3D tlb->mm; + struct folio *folio; + struct page *page; + + page =3D vm_normal_page(vma, addr, ptent); + if (!page) { + /* We don't need up-to-date accessed/dirty bits. */ + ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); + arch_check_zapped_pte(vma, ptent); + tlb_remove_tlb_entry(tlb, pte, addr); + VM_WARN_ON_ONCE(userfaultfd_wp(vma)); + ksm_might_unmap_zero_page(mm, ptent); + return; + } + + folio =3D page_folio(page); + if (unlikely(!should_zap_folio(details, folio))) + return; + zap_present_folio_pte(tlb, vma, folio, page, pte, ptent, addr, details, + rss, force_flush, force_break); +} + static unsigned long zap_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, --=20 2.43.0 From nobody Wed Dec 24 01:33:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A8B3C153BF7 for ; Mon, 29 Jan 2024 14:33:00 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=170.10.133.124 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538782; cv=none; b=DUq/FWBQbM78PjZ+n5n8QXJowjkh7U8yXitD0ffNLkPLR3sx20KM/0ubvDachW2jjSAsRX+FCEvAYRQUxsu6ymquu49OdVqvipJdagU/lB4DCPGU4p0LndxKb6mL5M1cRGMrSSvQ3znT3v1CdfdhJKawqq7ZM7AJ9KhsRcAjf9U= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538782; c=relaxed/simple; bh=v2LHXe4M1/jpGFAoHOpD/tUoB3keJAoacIpB/KTVpWc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=CCFEEPK3uwIKWxcyc00mOFDojveQWDBhLZg+zYbWbCBAv5Otx3FxBDHXAWkS3XS/Wtv0az6UwwpXlUzfUKgRTHASInhovYqjH/xET6nHaV/Uq9CyE7khsGRMywF6dUbFQ7Me2PU8rquCZfVtt1qVZHk4o/E3EFNRBY8WExFqNho= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com; spf=pass smtp.mailfrom=redhat.com; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b=POJj4/lN; arc=none smtp.client-ip=170.10.133.124 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=redhat.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b="POJj4/lN" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1706538779; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=hRXeU82IBxy/RcnY5wfdvSjzmzqpCs6vJAdTnhTQH90=; b=POJj4/lNGKrPMzIG/2zii53FhYQWi8vNmGtPCJ/aJD3+d27Ag3c1UhRujedqdxTGdfVUE0 m6MkFsFGP2ZDVfS3onBCiMQI91m7bdTnK/e4JPmmbSRQhZxww6/7Lf1zIuzUHF9HarUfUO isC4DgdJrKtyqJ4BHSUukJf+xtLJTVs= Received: from mimecast-mx02.redhat.com (mx-ext.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-654-RcYHT5HwMdeLqdQ1EEIoXA-1; Mon, 29 Jan 2024 09:32:53 -0500 X-MC-Unique: RcYHT5HwMdeLqdQ1EEIoXA-1 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.rdu2.redhat.com [10.11.54.1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 61CE93C13AA2; Mon, 29 Jan 2024 14:32:52 +0000 (UTC) Received: from t14s.fritz.box (unknown [10.39.194.46]) by smtp.corp.redhat.com (Postfix) with ESMTP id D5AF0AD1; Mon, 29 Jan 2024 14:32:47 +0000 (UTC) From: David Hildenbrand To: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org, David Hildenbrand , Andrew Morton , Matthew Wilcox , Ryan Roberts , Catalin Marinas , Will Deacon , "Aneesh Kumar K.V" , Nick Piggin , Peter Zijlstra , Michael Ellerman , Christophe Leroy , "Naveen N. Rao" , Heiko Carstens , Vasily Gorbik , Alexander Gordeev , Christian Borntraeger , Sven Schnelle , Arnd Bergmann , linux-arch@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-s390@vger.kernel.org Subject: [PATCH v1 5/9] mm/mmu_gather: pass "delay_rmap" instead of encoded page to __tlb_remove_page_size() Date: Mon, 29 Jan 2024 15:32:17 +0100 Message-ID: <20240129143221.263763-6-david@redhat.com> In-Reply-To: <20240129143221.263763-1-david@redhat.com> References: <20240129143221.263763-1-david@redhat.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.1 Content-Type: text/plain; charset="utf-8" We have two bits available in the encoded page pointer to store additional information. Currently, we use one bit to request delay of the rmap removal until after a TLB flush. We want to make use of the remaining bit internally for batching of multiple pages of the same folio, specifying that the next encoded page pointer in an array is actually "nr_pages". So pass page + delay_rmap flag instead of an encoded page, to handle the encoding internally. Signed-off-by: David Hildenbrand Reviewed-by: Ryan Roberts --- arch/s390/include/asm/tlb.h | 13 ++++++------- include/asm-generic/tlb.h | 12 ++++++------ mm/mmu_gather.c | 7 ++++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index d1455a601adc..48df896d5b79 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -25,8 +25,7 @@ void __tlb_remove_table(void *_table); static inline void tlb_flush(struct mmu_gather *tlb); static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, - struct encoded_page *page, - int page_size); + struct page *page, bool delay_rmap, int page_size); =20 #define tlb_flush tlb_flush #define pte_free_tlb pte_free_tlb @@ -42,14 +41,14 @@ static inline bool __tlb_remove_page_size(struct mmu_ga= ther *tlb, * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page * has already been freed, so just do free_page_and_swap_cache. * - * s390 doesn't delay rmap removal, so there is nothing encoded in - * the page pointer. + * s390 doesn't delay rmap removal. */ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, - struct encoded_page *page, - int page_size) + struct page *page, bool delay_rmap, int page_size) { - free_page_and_swap_cache(encoded_page_ptr(page)); + VM_WARN_ON_ONCE(delay_rmap); + + free_page_and_swap_cache(page); return false; } =20 diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 129a3a759976..2eb7b0d4f5d2 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -260,9 +260,8 @@ struct mmu_gather_batch { */ #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) =20 -extern bool __tlb_remove_page_size(struct mmu_gather *tlb, - struct encoded_page *page, - int page_size); +extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *pa= ge, + bool delay_rmap, int page_size); =20 #ifdef CONFIG_SMP /* @@ -462,13 +461,14 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_g= ather *tlb) static inline void tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) { - if (__tlb_remove_page_size(tlb, encode_page(page, 0), page_size)) + if (__tlb_remove_page_size(tlb, page, false, page_size)) tlb_flush_mmu(tlb); } =20 -static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb, stru= ct page *page, unsigned int flags) +static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb, + struct page *page, bool delay_rmap) { - return __tlb_remove_page_size(tlb, encode_page(page, flags), PAGE_SIZE); + return __tlb_remove_page_size(tlb, page, delay_rmap, PAGE_SIZE); } =20 /* tlb_remove_page diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 604ddf08affe..ac733d81b112 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -116,7 +116,8 @@ static void tlb_batch_list_free(struct mmu_gather *tlb) tlb->local.next =3D NULL; } =20 -bool __tlb_remove_page_size(struct mmu_gather *tlb, struct encoded_page *p= age, int page_size) +bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, + bool delay_rmap, int page_size) { struct mmu_gather_batch *batch; =20 @@ -131,13 +132,13 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, s= truct encoded_page *page, i * Add the page and check if we are full. If so * force a flush. */ - batch->encoded_pages[batch->nr++] =3D page; + batch->encoded_pages[batch->nr++] =3D encode_page(page, delay_rmap); if (batch->nr =3D=3D batch->max) { if (!tlb_next_batch(tlb)) return true; batch =3D tlb->active; } - VM_BUG_ON_PAGE(batch->nr > batch->max, encoded_page_ptr(page)); + VM_BUG_ON_PAGE(batch->nr > batch->max, page); =20 return false; } --=20 2.43.0 From nobody Wed Dec 24 01:33:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4499D154426 for ; Mon, 29 Jan 2024 14:33:04 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=170.10.133.124 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538786; cv=none; b=QYE/X+yDHQfaJYrY6ur95xZ6d/BU4jhp191t1okWzB4TlHH3v19EFSCIFvU5AWMUB5eh0DeZNySlxWUEq0QHCsQJvyqVheUi0TFuwq7d5mGSMge3FdmD1MXwQa/lXlXieQPVXWO74RPi2dGbmb7erkklsCfGKbvqwDrO1v1BU0E= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538786; c=relaxed/simple; bh=SFBh0jlYhQ4i0GImjfc/SgW5aNf3hVaKskoDHZxMjYc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=NJLS4tnDzjiwzXxQcpl3pFb9HkRpdD5Fw8S7cjTvuGNXU6EU7U4XIQOrFlFVM5jHI55Bk4wIQvSNaRFQMQbMgkCTIeaKtJwTDSnFbIBzwdd4VIIEncVGYJ35RdNTmvuw5mYvk+qfjtphmvNCSc2ZTTCt7WhlcQWs2Sb4kVMSvps= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com; spf=pass smtp.mailfrom=redhat.com; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b=DM6pZeq4; arc=none smtp.client-ip=170.10.133.124 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=redhat.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b="DM6pZeq4" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1706538783; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=mNf9J4fpCfYM4JsYJaH2xcr0nS1jv7jO8GYZWdUDb4k=; b=DM6pZeq4ge7oQvggcpLdBJIIO6GHCDJCdbCGf4H5sByIka12OW2JXgDZJ6IOx10amgTC94 aUGfJSrcMMmGI1OOidDg7JJFv4p+VZ+UxKJ4GO1NEyUHFeBUi8y6UfqaBIAQQqtZqiMeo/ 7ykXnEQV4k1Oa1zpkoW95wUPt2qFEMU= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-136-7zFZXTSTNpira0N1ASkqtg-1; Mon, 29 Jan 2024 09:32:58 -0500 X-MC-Unique: 7zFZXTSTNpira0N1ASkqtg-1 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.rdu2.redhat.com [10.11.54.1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 55A0B185A781; Mon, 29 Jan 2024 14:32:57 +0000 (UTC) Received: from t14s.fritz.box (unknown [10.39.194.46]) by smtp.corp.redhat.com (Postfix) with ESMTP id C0D15AD1; Mon, 29 Jan 2024 14:32:52 +0000 (UTC) From: David Hildenbrand To: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org, David Hildenbrand , Andrew Morton , Matthew Wilcox , Ryan Roberts , Catalin Marinas , Will Deacon , "Aneesh Kumar K.V" , Nick Piggin , Peter Zijlstra , Michael Ellerman , Christophe Leroy , "Naveen N. Rao" , Heiko Carstens , Vasily Gorbik , Alexander Gordeev , Christian Borntraeger , Sven Schnelle , Arnd Bergmann , linux-arch@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-s390@vger.kernel.org Subject: [PATCH v1 6/9] mm/mmu_gather: define ENCODED_PAGE_FLAG_DELAY_RMAP Date: Mon, 29 Jan 2024 15:32:18 +0100 Message-ID: <20240129143221.263763-7-david@redhat.com> In-Reply-To: <20240129143221.263763-1-david@redhat.com> References: <20240129143221.263763-1-david@redhat.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.1 Content-Type: text/plain; charset="utf-8" Nowadays, encoded pages are only used in mmu_gather handling. Let's update the documentation, and define ENCODED_PAGE_BIT_DELAY_RMAP. While at it, rename ENCODE_PAGE_BITS to ENCODED_PAGE_BITS. If encoded page pointers would ever be used in other context again, we'd likely want to change the defines to reflect their context (e.g., ENCODED_PAGE_FLAG_MMU_GATHER_DELAY_RMAP). For now, let's keep it simple. This is a preparation for using the remaining spare bit to indicate that the next item in an array of encoded pages is a "nr_pages" argument and not an encoded page. Signed-off-by: David Hildenbrand Reviewed-by: Ryan Roberts --- include/linux/mm_types.h | 17 +++++++++++------ mm/mmu_gather.c | 5 +++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8b611e13153e..1b89eec0d6df 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -210,8 +210,8 @@ struct page { * * An 'encoded_page' pointer is a pointer to a regular 'struct page', but * with the low bits of the pointer indicating extra context-dependent - * information. Not super-common, but happens in mmu_gather and mlock - * handling, and this acts as a type system check on that use. + * information. Only used in mmu_gather handling, and this acts as a type + * system check on that use. * * We only really have two guaranteed bits in general, although you could * play with 'struct page' alignment (see CONFIG_HAVE_ALIGNED_STRUCT_PAGE) @@ -220,21 +220,26 @@ struct page { * Use the supplied helper functions to endcode/decode the pointer and bit= s. */ struct encoded_page; -#define ENCODE_PAGE_BITS 3ul + +#define ENCODED_PAGE_BITS 3ul + +/* Perform rmap removal after we have flushed the TLB. */ +#define ENCODED_PAGE_BIT_DELAY_RMAP 1ul + static __always_inline struct encoded_page *encode_page(struct page *page,= unsigned long flags) { - BUILD_BUG_ON(flags > ENCODE_PAGE_BITS); + BUILD_BUG_ON(flags > ENCODED_PAGE_BITS); return (struct encoded_page *)(flags | (unsigned long)page); } =20 static inline unsigned long encoded_page_flags(struct encoded_page *page) { - return ENCODE_PAGE_BITS & (unsigned long)page; + return ENCODED_PAGE_BITS & (unsigned long)page; } =20 static inline struct page *encoded_page_ptr(struct encoded_page *page) { - return (struct page *)(~ENCODE_PAGE_BITS & (unsigned long)page); + return (struct page *)(~ENCODED_PAGE_BITS & (unsigned long)page); } =20 /* diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index ac733d81b112..6540c99c6758 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -53,7 +53,7 @@ static void tlb_flush_rmap_batch(struct mmu_gather_batch = *batch, struct vm_area_ for (int i =3D 0; i < batch->nr; i++) { struct encoded_page *enc =3D batch->encoded_pages[i]; =20 - if (encoded_page_flags(enc)) { + if (encoded_page_flags(enc) & ENCODED_PAGE_BIT_DELAY_RMAP) { struct page *page =3D encoded_page_ptr(enc); folio_remove_rmap_pte(page_folio(page), page, vma); } @@ -119,6 +119,7 @@ static void tlb_batch_list_free(struct mmu_gather *tlb) bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, bool delay_rmap, int page_size) { + int flags =3D delay_rmap ? ENCODED_PAGE_BIT_DELAY_RMAP : 0; struct mmu_gather_batch *batch; =20 VM_BUG_ON(!tlb->end); @@ -132,7 +133,7 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, str= uct page *page, * Add the page and check if we are full. If so * force a flush. */ - batch->encoded_pages[batch->nr++] =3D encode_page(page, delay_rmap); + batch->encoded_pages[batch->nr++] =3D encode_page(page, flags); if (batch->nr =3D=3D batch->max) { if (!tlb_next_batch(tlb)) return true; --=20 2.43.0 From nobody Wed Dec 24 01:33:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 49F501534E0 for ; Mon, 29 Jan 2024 14:33:13 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=170.10.133.124 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538795; cv=none; b=RuraH5cWGcZ14k9EA7tUSCmmLiFcPOZM9cdtcOEGmxXL+Docnv84HIkIff6DrOkE5msb6PMATar5A19gY0UkZ5UTnMSUDqONi82itChkPR72lOxn2AePllGG8993iNvBeveHyl6p41ZTxACnpAcQ9FrDxq5+u65WIpwjtl/C4kM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538795; c=relaxed/simple; bh=U8dM1LpnOBkhx1xNjrI5BIJ3/Pd/C2Pgrvzy5iXZrt8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=XAIFsaKF5K5si5+Z/zG47YUaDMDeOov4dBA5jZ9A5I9b/yZclGStItiR0BobEHrp4+hoWnCq2PmNbLVUEM6AJM99nkc30KETgrQx6BWIGE3xna0DqT/Mtp0dkk8o9yI5IdRr1KWHT8DnrwCmN4nJmiFWtwnXZ7L+c3CIuAJXDj8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com; spf=pass smtp.mailfrom=redhat.com; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b=ad1FhxUp; arc=none smtp.client-ip=170.10.133.124 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=redhat.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b="ad1FhxUp" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1706538792; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=+kOcFg6PGNbb+ifN7/MrtgmE3FIL277TBSGnt13KTXI=; b=ad1FhxUp5TAvb7lcNA/r2BXiM3Ye7HRNvEDJKnsD0uCrTiQZjctcjLV+Eu84n8JhVT/K1n iwvqKUm3AdZmUueoeLJ9OAPziRcd0m+IMw8PrADtzJRCg3Xh0HfqAd8rQgN1Ur7TgnvkD0 Ca0hBcW1DoVqcc/dzC1r+MGMiDJbSg8= Received: from mimecast-mx02.redhat.com (mx-ext.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-266-skzeZ3g1NrqHlItJu1KF5Q-1; Mon, 29 Jan 2024 09:33:03 -0500 X-MC-Unique: skzeZ3g1NrqHlItJu1KF5Q-1 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.rdu2.redhat.com [10.11.54.1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 518D43C025AD; Mon, 29 Jan 2024 14:33:02 +0000 (UTC) Received: from t14s.fritz.box (unknown [10.39.194.46]) by smtp.corp.redhat.com (Postfix) with ESMTP id AC506C3F; Mon, 29 Jan 2024 14:32:57 +0000 (UTC) From: David Hildenbrand To: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org, David Hildenbrand , Andrew Morton , Matthew Wilcox , Ryan Roberts , Catalin Marinas , Will Deacon , "Aneesh Kumar K.V" , Nick Piggin , Peter Zijlstra , Michael Ellerman , Christophe Leroy , "Naveen N. Rao" , Heiko Carstens , Vasily Gorbik , Alexander Gordeev , Christian Borntraeger , Sven Schnelle , Arnd Bergmann , linux-arch@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-s390@vger.kernel.org Subject: [PATCH v1 7/9] mm/mmu_gather: add __tlb_remove_folio_pages() Date: Mon, 29 Jan 2024 15:32:19 +0100 Message-ID: <20240129143221.263763-8-david@redhat.com> In-Reply-To: <20240129143221.263763-1-david@redhat.com> References: <20240129143221.263763-1-david@redhat.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.1 Content-Type: text/plain; charset="utf-8" Add __tlb_remove_folio_pages(), which will remove multiple consecutive pages that belong to the same large folio, instead of only a single page. We'll be using this function when optimizing unmapping/zapping of large folios that are mapped by PTEs. We're using the remaining spare bit in an encoded_page to indicate that the next enoced page in an array contains actually shifted "nr_pages". Teach swap/freeing code about putting multiple folio references, and delayed rmap handling to remove page ranges of a folio. This extension allows for still gathering almost as many small folios as we used to (-1, because we have to prepare for a possibly bigger next entry), but still allows for gathering consecutive pages that belong to the same large folio. Note that we don't pass the folio pointer, because it is not required for now. Further, we don't support page_size !=3D PAGE_SIZE, it won't be required for simple PTE batching. We have to provide a separate s390 implementation, but it's fairly straight forward. Another, more invasive and likely more expensive, approach would be to use folio+range or a PFN range instead of page+nr_pages. But, we should do that consistently for the whole mmu_gather. For now, let's keep it simple and add "nr_pages" only. Signed-off-by: David Hildenbrand --- arch/s390/include/asm/tlb.h | 17 +++++++++++ include/asm-generic/tlb.h | 8 +++++ include/linux/mm_types.h | 20 ++++++++++++ mm/mmu_gather.c | 61 +++++++++++++++++++++++++++++++------ mm/swap.c | 12 ++++++-- mm/swap_state.c | 12 ++++++-- 6 files changed, 116 insertions(+), 14 deletions(-) diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 48df896d5b79..abfd2bf29e9e 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -26,6 +26,8 @@ void __tlb_remove_table(void *_table); static inline void tlb_flush(struct mmu_gather *tlb); static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, bool delay_rmap, int page_size); +static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb, + struct page *page, unsigned int nr_pages, bool delay_rmap); =20 #define tlb_flush tlb_flush #define pte_free_tlb pte_free_tlb @@ -52,6 +54,21 @@ static inline bool __tlb_remove_page_size(struct mmu_gat= her *tlb, return false; } =20 +static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb, + struct page *page, unsigned int nr_pages, bool delay_rmap) +{ + struct encoded_page *encoded_pages[] =3D { + encode_page(page, ENCODED_PAGE_BIT_NR_PAGES), + encode_nr_pages(nr_pages), + }; + + VM_WARN_ON_ONCE(delay_rmap); + VM_WARN_ON_ONCE(page_folio(page) !=3D page_folio(page + nr_pages - 1)); + + free_pages_and_swap_cache(encoded_pages, ARRAY_SIZE(encoded_pages)); + return false; +} + static inline void tlb_flush(struct mmu_gather *tlb) { __tlb_flush_mm_lazy(tlb->mm); diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 2eb7b0d4f5d2..428c3f93addc 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -69,6 +69,7 @@ * * - tlb_remove_page() / __tlb_remove_page() * - tlb_remove_page_size() / __tlb_remove_page_size() + * - __tlb_remove_folio_pages() * * __tlb_remove_page_size() is the basic primitive that queues a page f= or * freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a @@ -78,6 +79,11 @@ * tlb_remove_page() and tlb_remove_page_size() imply the call to * tlb_flush_mmu() when required and has no return value. * + * __tlb_remove_folio_pages() is similar to __tlb_remove_page(), howeve= r, + * instead of removing a single page, remove the given number of consec= utive + * pages that are all part of the same (large) folio: just like calling + * __tlb_remove_page() on each page individually. + * * - tlb_change_page_size() * * call before __tlb_remove_page*() to set the current page-size; impli= es a @@ -262,6 +268,8 @@ struct mmu_gather_batch { =20 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *pa= ge, bool delay_rmap, int page_size); +bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page, + unsigned int nr_pages, bool delay_rmap); =20 #ifdef CONFIG_SMP /* diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1b89eec0d6df..198662b7a39a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -226,6 +226,15 @@ struct encoded_page; /* Perform rmap removal after we have flushed the TLB. */ #define ENCODED_PAGE_BIT_DELAY_RMAP 1ul =20 +/* + * The next item in an encoded_page array is the "nr_pages" argument, spec= ifying + * the number of consecutive pages starting from this page, that all belon= g to + * the same folio. For example, "nr_pages" corresponds to the number of fo= lio + * references that must be dropped. If this bit is not set, "nr_pages" is + * implicitly 1. + */ +#define ENCODED_PAGE_BIT_NR_PAGES 2ul + static __always_inline struct encoded_page *encode_page(struct page *page,= unsigned long flags) { BUILD_BUG_ON(flags > ENCODED_PAGE_BITS); @@ -242,6 +251,17 @@ static inline struct page *encoded_page_ptr(struct enc= oded_page *page) return (struct page *)(~ENCODED_PAGE_BITS & (unsigned long)page); } =20 +static __always_inline struct encoded_page *encode_nr_pages(unsigned long = nr) +{ + VM_WARN_ON_ONCE((nr << 2) >> 2 !=3D nr); + return (struct encoded_page *)(nr << 2); +} + +static __always_inline unsigned long encoded_nr_pages(struct encoded_page = *page) +{ + return ((unsigned long)page) >> 2; +} + /* * A swap entry has to fit into a "unsigned long", as the entry is hidden * in the "index" field of the swapper address space. diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 6540c99c6758..dba1973dfe25 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -50,12 +50,21 @@ static bool tlb_next_batch(struct mmu_gather *tlb) #ifdef CONFIG_SMP static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm= _area_struct *vma) { + struct encoded_page **pages =3D batch->encoded_pages; + for (int i =3D 0; i < batch->nr; i++) { - struct encoded_page *enc =3D batch->encoded_pages[i]; + struct encoded_page *enc =3D pages[i]; =20 if (encoded_page_flags(enc) & ENCODED_PAGE_BIT_DELAY_RMAP) { struct page *page =3D encoded_page_ptr(enc); - folio_remove_rmap_pte(page_folio(page), page, vma); + unsigned int nr_pages =3D 1; + + if (unlikely(encoded_page_flags(enc) & + ENCODED_PAGE_BIT_NR_PAGES)) + nr_pages =3D encoded_nr_pages(pages[++i]); + + folio_remove_rmap_ptes(page_folio(page), page, nr_pages, + vma); } } } @@ -89,18 +98,26 @@ static void tlb_batch_pages_flush(struct mmu_gather *tl= b) for (batch =3D &tlb->local; batch && batch->nr; batch =3D batch->next) { struct encoded_page **pages =3D batch->encoded_pages; =20 - do { + while (batch->nr) { /* * limit free batch count when PAGE_SIZE > 4K */ unsigned int nr =3D min(512U, batch->nr); =20 + /* + * Make sure we cover page + nr_pages, and don't leave + * nr_pages behind when capping the number of entries. + */ + if (unlikely(encoded_page_flags(pages[nr - 1]) & + ENCODED_PAGE_BIT_NR_PAGES)) + nr++; + free_pages_and_swap_cache(pages, nr); pages +=3D nr; batch->nr -=3D nr; =20 cond_resched(); - } while (batch->nr); + } } tlb->active =3D &tlb->local; } @@ -116,8 +133,9 @@ static void tlb_batch_list_free(struct mmu_gather *tlb) tlb->local.next =3D NULL; } =20 -bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, - bool delay_rmap, int page_size) +static bool __tlb_remove_folio_pages_size(struct mmu_gather *tlb, + struct page *page, unsigned int nr_pages, bool delay_rmap, + int page_size) { int flags =3D delay_rmap ? ENCODED_PAGE_BIT_DELAY_RMAP : 0; struct mmu_gather_batch *batch; @@ -126,6 +144,8 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, str= uct page *page, =20 #ifdef CONFIG_MMU_GATHER_PAGE_SIZE VM_WARN_ON(tlb->page_size !=3D page_size); + VM_WARN_ON_ONCE(nr_pages !=3D 1 && page_size !=3D PAGE_SIZE); + VM_WARN_ON_ONCE(page_folio(page) !=3D page_folio(page + nr_pages - 1)); #endif =20 batch =3D tlb->active; @@ -133,17 +153,40 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, s= truct page *page, * Add the page and check if we are full. If so * force a flush. */ - batch->encoded_pages[batch->nr++] =3D encode_page(page, flags); - if (batch->nr =3D=3D batch->max) { + if (likely(nr_pages =3D=3D 1)) { + batch->encoded_pages[batch->nr++] =3D encode_page(page, flags); + } else { + flags |=3D ENCODED_PAGE_BIT_NR_PAGES; + batch->encoded_pages[batch->nr++] =3D encode_page(page, flags); + batch->encoded_pages[batch->nr++] =3D encode_nr_pages(nr_pages); + } + /* + * Make sure that we can always add another "page" + "nr_pages", + * requiring two entries instead of only a single one. + */ + if (batch->nr >=3D batch->max - 1) { if (!tlb_next_batch(tlb)) return true; batch =3D tlb->active; } - VM_BUG_ON_PAGE(batch->nr > batch->max, page); + VM_BUG_ON_PAGE(batch->nr > batch->max - 1, page); =20 return false; } =20 +bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page, + unsigned int nr_pages, bool delay_rmap) +{ + return __tlb_remove_folio_pages_size(tlb, page, nr_pages, delay_rmap, + PAGE_SIZE); +} + +bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, + bool delay_rmap, int page_size) +{ + return __tlb_remove_folio_pages_size(tlb, page, 1, delay_rmap, page_size); +} + #endif /* MMU_GATHER_NO_GATHER */ =20 #ifdef CONFIG_MMU_GATHER_TABLE_FREE diff --git a/mm/swap.c b/mm/swap.c index cd8f0150ba3a..2a217520b80b 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -967,11 +967,17 @@ void release_pages(release_pages_arg arg, int nr) unsigned int lock_batch; =20 for (i =3D 0; i < nr; i++) { + unsigned int nr_refs =3D 1; struct folio *folio; =20 /* Turn any of the argument types into a folio */ folio =3D page_folio(encoded_page_ptr(encoded[i])); =20 + /* Is our next entry actually "nr_pages" -> "nr_refs" ? */ + if (unlikely(encoded_page_flags(encoded[i]) & + ENCODED_PAGE_BIT_NR_PAGES)) + nr_refs =3D encoded_nr_pages(encoded[++i]); + /* * Make sure the IRQ-safe lock-holding time does not get * excessive with a continuous string of pages from the @@ -990,14 +996,14 @@ void release_pages(release_pages_arg arg, int nr) unlock_page_lruvec_irqrestore(lruvec, flags); lruvec =3D NULL; } - if (put_devmap_managed_page(&folio->page)) + if (put_devmap_managed_page_refs(&folio->page, nr_refs)) continue; - if (folio_put_testzero(folio)) + if (folio_ref_sub_and_test(folio, nr_refs)) free_zone_device_page(&folio->page); continue; } =20 - if (!folio_put_testzero(folio)) + if (!folio_ref_sub_and_test(folio, nr_refs)) continue; =20 if (folio_test_large(folio)) { diff --git a/mm/swap_state.c b/mm/swap_state.c index e671266ad772..ae0c0f1f51bd 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -311,8 +311,16 @@ void free_page_and_swap_cache(struct page *page) void free_pages_and_swap_cache(struct encoded_page **pages, int nr) { lru_add_drain(); - for (int i =3D 0; i < nr; i++) - free_swap_cache(encoded_page_ptr(pages[i])); + for (int i =3D 0; i < nr; i++) { + struct page *page =3D encoded_page_ptr(pages[i]); + + /* Skip over "nr_pages". Only call it once for the folio. */ + if (unlikely(encoded_page_flags(pages[i]) & + ENCODED_PAGE_BIT_NR_PAGES)) + i++; + + free_swap_cache(page); + } release_pages(pages, nr); } =20 --=20 2.43.0 From nobody Wed Dec 24 01:33:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 051621534F9 for ; Mon, 29 Jan 2024 14:33:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=170.10.133.124 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538798; cv=none; b=CJqMSxPzLlOoxbvCcCKUYSvxFGFMMK+fsgSVFiqXyZp3tYoua5xYDO9sS2bbNqm0ToJ/x48anfIrbaeAYACrevfQh2K/lnFfpHC9254pins0C3HfKHqwYshx6znvxqG1ZuTFJ82u13wXJR41ngP80lm0roWjy1N0glOHQOMbGLo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538798; c=relaxed/simple; bh=zw6RATGgPG5UGfHtEP8dnEvhsSimfAqvKLqzhVagRpU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=BUoIGpBy/y73xoMP7FpB7dVtu+N3gi6o8mKH9sJSLFppB8taj8oEasyG6QVtUJWY4qEFte+04wRCo9mqWLnuZbQ8HU9ZhAZV4XUtL6QeMHMQ0BkclyddsNBaIB1ld6JupBZXr4uqF6TMwbi2vEsX/Fj5tTEbN6LH2Sepms3V9kM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com; spf=pass smtp.mailfrom=redhat.com; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b=NU96guWR; arc=none smtp.client-ip=170.10.133.124 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=redhat.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b="NU96guWR" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1706538795; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=hD+wJlVoYoeun3CtWSKYM8LYSFldaYp0TLUcFe85aZ0=; b=NU96guWRNiYjInyiO/0AgWcC10x1XAYIMmtrEvJphrxQtKqE9SUWtXqgN2LoFHVlDwt1Gp i9vHEv9ubuCjcuvYCnhgtOLlXzKF7haQK+Ur12nXVfVkW5GRnIzaLxdy/WuuUNkrzNVudx vWdsYb3sjJsdtKCepJ3vs+gbCXSuPJo= Received: from mimecast-mx02.redhat.com (mx-ext.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-64-0ZkSAIR7N62aRzlXnFbltw-1; Mon, 29 Jan 2024 09:33:09 -0500 X-MC-Unique: 0ZkSAIR7N62aRzlXnFbltw-1 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.rdu2.redhat.com [10.11.54.1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 88CF43C13A9D; Mon, 29 Jan 2024 14:33:07 +0000 (UTC) Received: from t14s.fritz.box (unknown [10.39.194.46]) by smtp.corp.redhat.com (Postfix) with ESMTP id B1D40AD1; Mon, 29 Jan 2024 14:33:02 +0000 (UTC) From: David Hildenbrand To: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org, David Hildenbrand , Andrew Morton , Matthew Wilcox , Ryan Roberts , Catalin Marinas , Will Deacon , "Aneesh Kumar K.V" , Nick Piggin , Peter Zijlstra , Michael Ellerman , Christophe Leroy , "Naveen N. Rao" , Heiko Carstens , Vasily Gorbik , Alexander Gordeev , Christian Borntraeger , Sven Schnelle , Arnd Bergmann , linux-arch@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-s390@vger.kernel.org Subject: [PATCH v1 8/9] mm/mmu_gather: add tlb_remove_tlb_entries() Date: Mon, 29 Jan 2024 15:32:20 +0100 Message-ID: <20240129143221.263763-9-david@redhat.com> In-Reply-To: <20240129143221.263763-1-david@redhat.com> References: <20240129143221.263763-1-david@redhat.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.1 Content-Type: text/plain; charset="utf-8" Let's add a helper that lets us batch-process multiple consecutive PTEs. Note that the loop will get optimized out on all architectures except on powerpc. We have to add an early define of __tlb_remove_tlb_entry() on ppc to make the compiler happy (and avoid making tlb_remove_tlb_entries() a macro). Signed-off-by: David Hildenbrand Reviewed-by: Ryan Roberts --- arch/powerpc/include/asm/tlb.h | 2 ++ include/asm-generic/tlb.h | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index b3de6102a907..1ca7d4c4b90d 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -19,6 +19,8 @@ =20 #include =20 +static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *p= tep, + unsigned long address); #define __tlb_remove_tlb_entry __tlb_remove_tlb_entry =20 #define tlb_flush tlb_flush diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 428c3f93addc..bd00dd238b79 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -616,6 +616,26 @@ static inline void tlb_flush_p4d_range(struct mmu_gath= er *tlb, __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) =20 +/** + * tlb_remove_tlb_entries - remember unmapping of multiple consecutive pte= s for + * later tlb invalidation. + * + * Similar to tlb_remove_tlb_entry(), but remember unmapping of multiple + * consecutive ptes instead of only a single one. + */ +static inline void tlb_remove_tlb_entries(struct mmu_gather *tlb, + pte_t *ptep, unsigned int nr, unsigned long address) +{ + tlb_flush_pte_range(tlb, address, PAGE_SIZE * nr); + for (;;) { + __tlb_remove_tlb_entry(tlb, ptep, address); + if (--nr =3D=3D 0) + break; + ptep++; + address +=3D PAGE_SIZE; + } +} + #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ do { \ unsigned long _sz =3D huge_page_size(h); \ --=20 2.43.0 From nobody Wed Dec 24 01:33:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9A1E9152DF3 for ; Mon, 29 Jan 2024 14:33:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=170.10.133.124 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538802; cv=none; b=ot6xf0GdMREzkq7sHn4k7CfWZatiIo9/D2nuqHKPONdkucL7sVXFaAK2rrcJE+ePi1YvrCS/qXVof8x96rZ9MIBIjt+52SX6q3raCpltoKF7PItb9gKbmCVQKkjPRUKG4g/S92SLyYeLHK41fhMICmJinZOt+xpQuZFinaNs9rw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1706538802; c=relaxed/simple; bh=TSmfVJpTFAI5Bi+X9dzLq2pbCeH7Qo4+UMUFhytmPFY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=eLrn3Mu5Z2aKaE8UX8VvbMMPmP1f+xKt+1hyfbcZqtK2592HOK3+lLz/FZAMbylcdjMAzGD2gprtX26NW4wJfusuTQ4AGwx/zB3gsWd3esBwct91fTnMXVYOxlhFPI3Rf/+sWafiPokI6h2Hda+grhjW1WXaAeI5MSeUrUGrueM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com; spf=pass smtp.mailfrom=redhat.com; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b=F97i03YE; arc=none smtp.client-ip=170.10.133.124 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=redhat.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com header.b="F97i03YE" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1706538799; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=zfDWyrOXB1mD+zgjXEawbz7aquFQMadFUbUPJ1Y1hbE=; b=F97i03YECXe79BXV7uUfU5EVmejwHr570tKMWuKTtqrxRUNnLynrlsMpP9N73rAn3M8x3+ CeBlPrvwLCBVf7POPFrSXI2EgAWGYh88IOZ6/hkESUWPhSFSNK+QhMAWDmDz8RnscjEaGM /f1mjH6Je4WU0WK+G9GCGPS01EYd3os= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-434-G5xnxcuDPh6_dHlX6DUXrg-1; Mon, 29 Jan 2024 09:33:13 -0500 X-MC-Unique: G5xnxcuDPh6_dHlX6DUXrg-1 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.rdu2.redhat.com [10.11.54.1]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id D140685A597; Mon, 29 Jan 2024 14:33:11 +0000 (UTC) Received: from t14s.fritz.box (unknown [10.39.194.46]) by smtp.corp.redhat.com (Postfix) with ESMTP id E6EB5AD1; Mon, 29 Jan 2024 14:33:07 +0000 (UTC) From: David Hildenbrand To: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org, David Hildenbrand , Andrew Morton , Matthew Wilcox , Ryan Roberts , Catalin Marinas , Will Deacon , "Aneesh Kumar K.V" , Nick Piggin , Peter Zijlstra , Michael Ellerman , Christophe Leroy , "Naveen N. Rao" , Heiko Carstens , Vasily Gorbik , Alexander Gordeev , Christian Borntraeger , Sven Schnelle , Arnd Bergmann , linux-arch@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-s390@vger.kernel.org Subject: [PATCH v1 9/9] mm/memory: optimize unmap/zap with PTE-mapped THP Date: Mon, 29 Jan 2024 15:32:21 +0100 Message-ID: <20240129143221.263763-10-david@redhat.com> In-Reply-To: <20240129143221.263763-1-david@redhat.com> References: <20240129143221.263763-1-david@redhat.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.1 Content-Type: text/plain; charset="utf-8" Similar to how we optimized fork(), let's implement PTE batching when consecutive (present) PTEs map consecutive pages of the same large folio. Most infrastructure we need for batching (mmu gather, rmap) is already there. We only have to add get_and_clear_full_ptes() and clear_full_ptes(). Similarly, extend zap_install_uffd_wp_if_needed() to process a PTE range. We won't bother sanity-checking the mapcount of all subpages, but only check the mapcount of the first subpage we process. To keep small folios as fast as possible force inlining of a specialized variant using __always_inline with nr=3D1. Signed-off-by: David Hildenbrand --- include/linux/pgtable.h | 66 +++++++++++++++++++++++++++++ mm/memory.c | 92 +++++++++++++++++++++++++++++------------ 2 files changed, 132 insertions(+), 26 deletions(-) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index aab227e12493..f0feae7f89fb 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -580,6 +580,72 @@ static inline pte_t ptep_get_and_clear_full(struct mm_= struct *mm, } #endif =20 +#ifndef get_and_clear_full_ptes +/** + * get_and_clear_full_ptes - Clear PTEs that map consecutive pages of the = same + * folio, collecting dirty/accessed bits. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * @full: Whether we are clearing a full mm. + * + * May be overridden by the architecture; otherwise, implemented as a simp= le + * loop over ptep_get_and_clear_full(), merging dirty/accessed bits into + * returned PTE. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For exa= mple, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, unsigned int nr, int full) +{ + pte_t pte, tmp_pte; + + pte =3D ptep_get_and_clear_full(mm, addr, ptep, full); + while (--nr) { + ptep++; + addr +=3D PAGE_SIZE; + tmp_pte =3D ptep_get_and_clear_full(mm, addr, ptep, full); + if (pte_dirty(tmp_pte)) + pte =3D pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte =3D pte_mkyoung(pte); + } + return pte; +} +#endif + +#ifndef clear_full_ptes +/** + * clear_full_ptes - Clear PTEs that map consecutive pages of the same fol= io. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * @full: Whether we are clearing a full mm. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For exa= mple, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void clear_full_ptes(struct mm_struct *mm, unsigned long add= r, + pte_t *ptep, unsigned int nr, int full) +{ + for (;;) { + ptep_get_and_clear_full(mm, addr, ptep, full); + if (--nr =3D=3D 0) + break; + ptep++; + addr +=3D PAGE_SIZE; + } +} +#endif =20 /* * If two threads concurrently fault at the same page, the thread that diff --git a/mm/memory.c b/mm/memory.c index a2190d7cfa74..38a010c4d04d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1515,7 +1515,7 @@ static inline bool zap_drop_file_uffd_wp(struct zap_d= etails *details) */ static inline void zap_install_uffd_wp_if_needed(struct vm_area_struct *vma, - unsigned long addr, pte_t *pte, + unsigned long addr, pte_t *pte, int nr, struct zap_details *details, pte_t pteval) { /* Zap on anonymous always means dropping everything */ @@ -1525,20 +1525,27 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct= *vma, if (zap_drop_file_uffd_wp(details)) return; =20 - pte_install_uffd_wp_if_needed(vma, addr, pte, pteval); + for (;;) { + /* the PFN in the PTE is irrelevant. */ + pte_install_uffd_wp_if_needed(vma, addr, pte, pteval); + if (--nr =3D=3D 0) + break; + pte++; + addr +=3D PAGE_SIZE; + } } =20 -static inline void zap_present_folio_pte(struct mmu_gather *tlb, +static __always_inline void zap_present_folio_ptes(struct mmu_gather *tlb, struct vm_area_struct *vma, struct folio *folio, - struct page *page, pte_t *pte, pte_t ptent, unsigned long addr, - struct zap_details *details, int *rss, bool *force_flush, - bool *force_break) + struct page *page, pte_t *pte, pte_t ptent, unsigned int nr, + unsigned long addr, struct zap_details *details, int *rss, + bool *force_flush, bool *force_break) { struct mm_struct *mm =3D tlb->mm; bool delay_rmap =3D false; =20 if (!folio_test_anon(folio)) { - ptent =3D ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); + ptent =3D get_and_clear_full_ptes(mm, addr, pte, nr, tlb->fullmm); if (pte_dirty(ptent)) { folio_mark_dirty(folio); if (tlb_delay_rmap(tlb)) { @@ -1548,36 +1555,49 @@ static inline void zap_present_folio_pte(struct mmu= _gather *tlb, } if (pte_young(ptent) && likely(vma_has_recency(vma))) folio_mark_accessed(folio); - rss[mm_counter(folio)]--; + rss[mm_counter(folio)] -=3D nr; } else { /* We don't need up-to-date accessed/dirty bits. */ - ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); - rss[MM_ANONPAGES]--; + clear_full_ptes(mm, addr, pte, nr, tlb->fullmm); + rss[MM_ANONPAGES] -=3D nr; } + /* Checking a single PTE in a batch is sufficient. */ arch_check_zapped_pte(vma, ptent); - tlb_remove_tlb_entry(tlb, pte, addr); + tlb_remove_tlb_entries(tlb, pte, nr, addr); if (unlikely(userfaultfd_pte_wp(vma, ptent))) - zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent); + zap_install_uffd_wp_if_needed(vma, addr, pte, nr, details, + ptent); =20 if (!delay_rmap) { - folio_remove_rmap_pte(folio, page, vma); + folio_remove_rmap_ptes(folio, page, nr, vma); + + /* Only sanity-check the first page in a batch. */ if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); } - if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) { + if (unlikely(__tlb_remove_folio_pages(tlb, page, nr, delay_rmap))) { *force_flush =3D true; *force_break =3D true; } } =20 -static inline void zap_present_pte(struct mmu_gather *tlb, +/* + * Zap or skip one present PTE, trying to batch-process subsequent PTEs th= at map + * consecutive pages of the same folio. + * + * Returns the number of processed (skipped or zapped) PTEs (at least 1). + */ +static inline int zap_present_ptes(struct mmu_gather *tlb, struct vm_area_struct *vma, pte_t *pte, pte_t ptent, - unsigned long addr, struct zap_details *details, - int *rss, bool *force_flush, bool *force_break) + unsigned int max_nr, unsigned long addr, + struct zap_details *details, int *rss, bool *force_flush, + bool *force_break) { + const fpb_t fpb_flags =3D FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; struct mm_struct *mm =3D tlb->mm; struct folio *folio; struct page *page; + int nr; =20 page =3D vm_normal_page(vma, addr, ptent); if (!page) { @@ -1587,14 +1607,29 @@ static inline void zap_present_pte(struct mmu_gathe= r *tlb, tlb_remove_tlb_entry(tlb, pte, addr); VM_WARN_ON_ONCE(userfaultfd_wp(vma)); ksm_might_unmap_zero_page(mm, ptent); - return; + return 1; } =20 folio =3D page_folio(page); if (unlikely(!should_zap_folio(details, folio))) - return; - zap_present_folio_pte(tlb, vma, folio, page, pte, ptent, addr, details, - rss, force_flush, force_break); + return 1; + + /* + * Make sure that the common "small folio" case is as fast as possible + * by keeping the batching logic separate. + */ + if (unlikely(folio_test_large(folio) && max_nr !=3D 1)) { + nr =3D folio_pte_batch(folio, addr, pte, ptent, max_nr, fpb_flags, + NULL); + + zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, nr, + addr, details, rss, force_flush, + force_break); + return nr; + } + zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, 1, addr, + details, rss, force_flush, force_break); + return 1; } =20 static unsigned long zap_pte_range(struct mmu_gather *tlb, @@ -1609,6 +1644,7 @@ static unsigned long zap_pte_range(struct mmu_gather = *tlb, pte_t *start_pte; pte_t *pte; swp_entry_t entry; + int nr; =20 tlb_change_page_size(tlb, PAGE_SIZE); init_rss_vec(rss); @@ -1622,7 +1658,9 @@ static unsigned long zap_pte_range(struct mmu_gather = *tlb, pte_t ptent =3D ptep_get(pte); struct folio *folio =3D NULL; struct page *page; + int max_nr; =20 + nr =3D 1; if (pte_none(ptent)) continue; =20 @@ -1630,10 +1668,12 @@ static unsigned long zap_pte_range(struct mmu_gathe= r *tlb, break; =20 if (pte_present(ptent)) { - zap_present_pte(tlb, vma, pte, ptent, addr, details, - rss, &force_flush, &force_break); + max_nr =3D (end - addr) / PAGE_SIZE; + nr =3D zap_present_ptes(tlb, vma, pte, ptent, max_nr, + addr, details, rss, &force_flush, + &force_break); if (unlikely(force_break)) { - addr +=3D PAGE_SIZE; + addr +=3D nr * PAGE_SIZE; break; } continue; @@ -1687,8 +1727,8 @@ static unsigned long zap_pte_range(struct mmu_gather = *tlb, WARN_ON_ONCE(1); } pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); - zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent); - } while (pte++, addr +=3D PAGE_SIZE, addr !=3D end); + zap_install_uffd_wp_if_needed(vma, addr, pte, 1, details, ptent); + } while (pte +=3D nr, addr +=3D PAGE_SIZE * nr, addr !=3D end); =20 add_mm_rss_vec(mm, rss); arch_leave_lazy_mmu_mode(); --=20 2.43.0