From nobody Sat Feb 7 21:23:47 2026 Received: from frasgout.his.huawei.com (frasgout.his.huawei.com [185.176.79.56]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E44BB328253 for ; Fri, 19 Dec 2025 12:46:58 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=185.176.79.56 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766148422; cv=none; b=kSLXUutVMIH8/O4G5a7BuZ5YH2MOoghGX4bfTtKoeoLQj0+/arIIqmg5ZzEJVL9UgWNgfmzlg8HgoDNjhl3/wrUCTHL76KbalMRFcl1OTKmjijkTDeLCPeEuCH9EpKn1VklTMB28EmNNF3ymWbYqRdLy7UmtSizk0llI1pIcRII= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766148422; c=relaxed/simple; bh=3Mf9vEUMWvNZrHuJcWlL9mikhE1tKOIZ8nJZ0r19J4E=; h=From:To:CC:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=kE2oZ/wNNCG6oJ1U6J61nllRvo/XULJ2hG/LIf0M/6LvG0v/fVDVOg7ynbyL4aTZAjw84ZJLPjDCU09saMFR+42MDfWAwCyfc1T+3y419wvjramIjUQOWk2qOlCd1GoSVZT+uIewX6NcWff/QgXQUvl6lrf8+8cXUbHlHPkjmU8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=h-partners.com; spf=pass smtp.mailfrom=h-partners.com; arc=none smtp.client-ip=185.176.79.56 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=h-partners.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=h-partners.com Received: from mail.maildlp.com (unknown [172.18.224.107]) by frasgout.his.huawei.com (SkyGuard) with ESMTPS id 4dXnN35vVWzHnGh8; Fri, 19 Dec 2025 20:46:27 +0800 (CST) Received: from mscpeml500003.china.huawei.com (unknown [7.188.49.51]) by mail.maildlp.com (Postfix) with ESMTPS id E969540571; Fri, 19 Dec 2025 20:46:56 +0800 (CST) Received: from mscphis00972.huawei.com (10.123.68.107) by mscpeml500003.china.huawei.com (7.188.49.51) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1544.11; Fri, 19 Dec 2025 15:46:56 +0300 From: Gladyshev Ilya To: CC: , , , , , , , , , , , , , , , , , , , , , , , , , Subject: [RFC PATCH 1/2] mm: make ref_unless functions unless_zero only Date: Fri, 19 Dec 2025 12:46:38 +0000 Message-ID: X-Mailer: git-send-email 2.43.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-ClientProxiedBy: mscpeml500004.china.huawei.com (7.188.26.250) To mscpeml500003.china.huawei.com (7.188.49.51) Content-Type: text/plain; charset="utf-8" There are no users of (folio/page)_ref_add_unless(page, nr, u) with u !=3D 0 [1] and all current users are "internal" for page refcounting API. This allows us to safely drop this parameter and reduce function semantics to the "unless zero" cases only, which will be optimized in the following patch. If needed, these functions for the u!=3D0 cases can be trivially reintroduced later using the same atomic_add_unless operations as before. [1]: The last user was dropped in v5.18 kernel, commit 27674ef6c73f ("mm: remove the extra ZONE_DEVICE struct page refcount"). There is no trace of discussion as to why this cleanup wasn't done earlier. Co-developed-by: Gorbunov Ivan Signed-off-by: Gorbunov Ivan Signed-off-by: Gladyshev Ilya Acked-by: David Hildenbrand (Arm) --- include/linux/mm.h | 2 +- include/linux/page-flags.h | 6 +++--- include/linux/page_ref.h | 14 +++++++------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 7c79b3369b82..f652426cc218 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1115,7 +1115,7 @@ static inline int folio_put_testzero(struct folio *fo= lio) */ static inline bool get_page_unless_zero(struct page *page) { - return page_ref_add_unless(page, 1, 0); + return page_ref_add_unless_zero(page, 1); } =20 static inline struct folio *folio_get_nontail_page(struct page *page) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 0091ad1986bf..7c2195baf4c1 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -231,7 +231,7 @@ static __always_inline const struct page *page_fixed_fa= ke_head(const struct page return page; } =20 -static __always_inline bool page_count_writable(const struct page *page, i= nt u) +static __always_inline bool page_count_writable(const struct page *page) { if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) return true; @@ -257,7 +257,7 @@ static __always_inline bool page_count_writable(const s= truct page *page, int u) * The refcount check also prevents modification attempts to other (r/o) * tail pages that are not fake heads. */ - if (atomic_read_acquire(&page->_refcount) =3D=3D u) + if (!atomic_read_acquire(&page->_refcount)) return false; =20 return page_fixed_fake_head(page) =3D=3D page; @@ -268,7 +268,7 @@ static inline const struct page *page_fixed_fake_head(c= onst struct page *page) return page; } =20 -static inline bool page_count_writable(const struct page *page, int u) +static inline bool page_count_writable(const struct page *page) { return true; } diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 544150d1d5fd..b0e3f4a4b4b8 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -228,14 +228,14 @@ static inline int folio_ref_dec_return(struct folio *= folio) return page_ref_dec_return(&folio->page); } =20 -static inline bool page_ref_add_unless(struct page *page, int nr, int u) +static inline bool page_ref_add_unless_zero(struct page *page, int nr) { bool ret =3D false; =20 rcu_read_lock(); /* avoid writing to the vmemmap area being remapped */ - if (page_count_writable(page, u)) - ret =3D atomic_add_unless(&page->_refcount, nr, u); + if (page_count_writable(page)) + ret =3D atomic_add_unless(&page->_refcount, nr, 0); rcu_read_unlock(); =20 if (page_ref_tracepoint_active(page_ref_mod_unless)) @@ -243,9 +243,9 @@ static inline bool page_ref_add_unless(struct page *pag= e, int nr, int u) return ret; } =20 -static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u) +static inline bool folio_ref_add_unless_zero(struct folio *folio, int nr) { - return page_ref_add_unless(&folio->page, nr, u); + return page_ref_add_unless_zero(&folio->page, nr); } =20 /** @@ -261,12 +261,12 @@ static inline bool folio_ref_add_unless(struct folio = *folio, int nr, int u) */ static inline bool folio_try_get(struct folio *folio) { - return folio_ref_add_unless(folio, 1, 0); + return folio_ref_add_unless_zero(folio, 1); } =20 static inline bool folio_ref_try_add(struct folio *folio, int count) { - return folio_ref_add_unless(folio, count, 0); + return folio_ref_add_unless_zero(folio, count); } =20 static inline int page_ref_freeze(struct page *page, int count) --=20 2.43.0 From nobody Sat Feb 7 21:23:47 2026 Received: from frasgout.his.huawei.com (frasgout.his.huawei.com [185.176.79.56]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id F1CF2328B78 for ; Fri, 19 Dec 2025 12:46:58 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=185.176.79.56 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766148421; cv=none; b=p3/SmK/92/K/MQEBCE4rrxCsmcHtbS0q5Jd6bQ2qupjVUqnO5GOwQxrD+sV4knvczZclNBH4484godfMHPAvnYeQBccfCX9UWCt7ePBXC8mIvQL9F/EdvTWrcitxqEks3Yz3ApJWC/vukQGUs8GeDWCU+TUh9fcJGYIxnJlXjyg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1766148421; c=relaxed/simple; bh=HFUpawnZzNWk1OpVwB39ZsKqxLXtIft28Mj+hHkdqvY=; h=From:To:CC:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=TGoFg+FmZGmkvwdnhXLeV2371+dCMaDxNKhT9fp4hYoYIOUyJU/ZVVW6NkEDLLw0/Z+HGCj6sVz1rtVBIB9jX8+Y0ihNdpRcszEYsL+GGXWdt9YFayIrxlusZb2lbq1c5MVK2EmKn2T6JdlLEFIRaUPjv5OuDdDLo0mePtCysqI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=h-partners.com; spf=pass smtp.mailfrom=h-partners.com; arc=none smtp.client-ip=185.176.79.56 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=h-partners.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=h-partners.com Received: from mail.maildlp.com (unknown [172.18.224.107]) by frasgout.his.huawei.com (SkyGuard) with ESMTPS id 4dXnN406H3zHnGk3; Fri, 19 Dec 2025 20:46:28 +0800 (CST) Received: from mscpeml500003.china.huawei.com (unknown [7.188.49.51]) by mail.maildlp.com (Postfix) with ESMTPS id 2362D40571; Fri, 19 Dec 2025 20:46:57 +0800 (CST) Received: from mscphis00972.huawei.com (10.123.68.107) by mscpeml500003.china.huawei.com (7.188.49.51) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1544.11; Fri, 19 Dec 2025 15:46:56 +0300 From: Gladyshev Ilya To: CC: , , , , , , , , , , , , , , , , , , , , , , , , , Subject: [RFC PATCH 2/2] mm: implement page refcount locking via dedicated bit Date: Fri, 19 Dec 2025 12:46:39 +0000 Message-ID: <81e3c45f49bdac231e831ec7ba09ef42fbb77930.1766145604.git.gladyshev.ilya1@h-partners.com> X-Mailer: git-send-email 2.43.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-ClientProxiedBy: mscpeml500004.china.huawei.com (7.188.26.250) To mscpeml500003.china.huawei.com (7.188.49.51) Content-Type: text/plain; charset="utf-8" The current atomic-based page refcount implementation treats zero counter as dead and requires a compare-and-swap loop in folio_try_get() to prevent incrementing a dead refcount. This CAS loop acts as a serialization point and can become a significant bottleneck during high-frequency file read operations. This patch introduces FOLIO_LOCKED_BIT to distinguish between a (temporary) zero refcount and a locked (dead/frozen) state. Because now incrementing counter doesn't affect it's locked/unlocked state, it is possible to use an optimistic atomic_fetch_add() in page_ref_add_unless_zero() that operates independently of the locked bit. The locked state is handled after the increment attempt, eliminating the need for the CAS loop. Co-developed-by: Gorbunov Ivan Signed-off-by: Gorbunov Ivan Signed-off-by: Gladyshev Ilya --- include/linux/page-flags.h | 5 ++++- include/linux/page_ref.h | 25 +++++++++++++++++++++---- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 7c2195baf4c1..f2a9302104eb 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -196,6 +196,9 @@ enum pageflags { =20 #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) =20 +/* Most significant bit in page refcount */ +#define PAGEREF_LOCKED_BIT (1 << 31) + #ifndef __GENERATING_BOUNDS_H =20 #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP @@ -257,7 +260,7 @@ static __always_inline bool page_count_writable(const s= truct page *page) * The refcount check also prevents modification attempts to other (r/o) * tail pages that are not fake heads. */ - if (!atomic_read_acquire(&page->_refcount)) + if (atomic_read_acquire(&page->_refcount) & PAGEREF_LOCKED_BIT) return false; =20 return page_fixed_fake_head(page) =3D=3D page; diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index b0e3f4a4b4b8..98717fd25306 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -64,7 +64,12 @@ static inline void __page_ref_unfreeze(struct page *page= , int v) =20 static inline int page_ref_count(const struct page *page) { - return atomic_read(&page->_refcount); + int val =3D atomic_read(&page->_refcount); + + if (unlikely(val & PAGEREF_LOCKED_BIT)) + return 0; + + return val; } =20 /** @@ -176,6 +181,9 @@ static inline int page_ref_sub_and_test(struct page *pa= ge, int nr) { int ret =3D atomic_sub_and_test(nr, &page->_refcount); =20 + if (ret) + ret =3D !atomic_cmpxchg_relaxed(&page->_refcount, 0, PAGEREF_LOCKED_BIT); + if (page_ref_tracepoint_active(page_ref_mod_and_test)) __page_ref_mod_and_test(page, -nr, ret); return ret; @@ -204,6 +212,9 @@ static inline int page_ref_dec_and_test(struct page *pa= ge) { int ret =3D atomic_dec_and_test(&page->_refcount); =20 + if (ret) + ret =3D !atomic_cmpxchg_relaxed(&page->_refcount, 0, PAGEREF_LOCKED_BIT); + if (page_ref_tracepoint_active(page_ref_mod_and_test)) __page_ref_mod_and_test(page, -1, ret); return ret; @@ -231,11 +242,17 @@ static inline int folio_ref_dec_return(struct folio *= folio) static inline bool page_ref_add_unless_zero(struct page *page, int nr) { bool ret =3D false; + int val; =20 rcu_read_lock(); /* avoid writing to the vmemmap area being remapped */ - if (page_count_writable(page)) - ret =3D atomic_add_unless(&page->_refcount, nr, 0); + if (page_count_writable(page)) { + val =3D atomic_add_return(nr, &page->_refcount); + ret =3D !(val & PAGEREF_LOCKED_BIT); + + if (unlikely(!ret)) + atomic_cmpxchg_relaxed(&page->_refcount, val, PAGEREF_LOCKED_BIT); + } rcu_read_unlock(); =20 if (page_ref_tracepoint_active(page_ref_mod_unless)) @@ -271,7 +288,7 @@ static inline bool folio_ref_try_add(struct folio *foli= o, int count) =20 static inline int page_ref_freeze(struct page *page, int count) { - int ret =3D likely(atomic_cmpxchg(&page->_refcount, count, 0) =3D=3D coun= t); + int ret =3D likely(atomic_cmpxchg(&page->_refcount, count, PAGEREF_LOCKED= _BIT) =3D=3D count); =20 if (page_ref_tracepoint_active(page_ref_freeze)) __page_ref_freeze(page, count, ret); --=20 2.43.0