From nobody Sun Feb  8 12:30:48 2026
Return-Path: <linux-kernel-owner@vger.kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 802D5E77352
	for <linux-kernel@archiver.kernel.org>; Sat, 30 Sep 2023 03:26:00 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S233951AbjI3DZ7 (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
        Fri, 29 Sep 2023 23:25:59 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51044 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S229526AbjI3DZ5 (ORCPT
        <rfc822;linux-kernel@vger.kernel.org>);
        Fri, 29 Sep 2023 23:25:57 -0400
Received: from mail-qk1-x732.google.com (mail-qk1-x732.google.com
 [IPv6:2607:f8b0:4864:20::732])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4B408B4
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:25:55 -0700 (PDT)
Received: by mail-qk1-x732.google.com with SMTP id
 af79cd13be357-773ac11de71so949274385a.2
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:25:55 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=google.com; s=20230601; t=1696044354; x=1696649154;
 darn=vger.kernel.org;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:from:to:cc:subject:date:message-id:reply-to;
        bh=AVLzPp93pZABCVKyWP1mAbidMk3rqNAWBwutzFqeRLc=;
        b=Wat29+YZTArt1KnqMhTCNVL7ZA4PFGfHCqdKzpMyMpyPEdeg6ccdV/GdIf+HA2UiL7
         HmeUuKAG0A6BgXPCGnO5kGwvl4sPe2FB6hUQ1hBbjv/DFCq2NYVaOttw9yoBPcogRwve
         EqoevETcpUIb/IROeh2xIZWxa17qB8YAEa6Nypf6pRBMiytlHQW1ZzO65Ft+3mcBQO43
         zNn31fVk4efAdyin9tctGJSCBATWj2UyLAb9e7f4TtXZUxfb1x3DHioQUnJehwtZZD33
         G6SRgtfq6lrMCOvg0tHcdNgeGvo+4jboinSCRi76IgPFkB6kOUd/TYRjPtQ61eKGeKN1
         xOnA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1696044354; x=1696649154;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;
        bh=AVLzPp93pZABCVKyWP1mAbidMk3rqNAWBwutzFqeRLc=;
        b=aPlALOKyPVr0+Yk/9HHYZjkSPj9dfbRqOu8vhrAZNIgiy9ofEofMcj3GsdKqWRkBIe
         FoXPuuOWv7R5wJIQYgS9UPce1Bqxj878NHAVZHFVTI8c8t+vlPv1E3Y+yoC5140PfohX
         ERW99PPEgkBnIfQ4zoONgFVXXFl6u40k6819lQSgVl+bPWWDfWs7//Cvn07SsEw+Gud8
         xjaHtlqUP27bthxO1ZT52w7N4P7gADIYRz80pvBzeKTBV0rXfEjxqBnLjnwMeXkNXAmo
         7gdd8iRrugj0IGvKERT/4fz8annJ8Bl206llGUb6soGALmMlXkFr3TRsj8RwmdRV0E11
         X78A==
X-Gm-Message-State: AOJu0Yxxl9z+lBNQTOKpcHmHFjHJp3sxU3C0uJ7OOYcpzM+XEV20Pu5I
        YDrMYY3vc1q6pBVSzMWrl8YF835ZVyaSTKuBNlzGIg==
X-Google-Smtp-Source: 
 AGHT+IHhuq7jvvojYgsSWl2axsjTICJmxo+Endia0QYqZtTKEsh/ozvTbnorz5ewJjnAk71pgSQv9w==
X-Received: by 2002:a05:620a:1a1d:b0:773:eb81:d043 with SMTP id
 bk29-20020a05620a1a1d00b00773eb81d043mr6501303qkb.52.1696044354298;
        Fri, 29 Sep 2023 20:25:54 -0700 (PDT)
Received: from ripple.attlocal.net
 (172-10-233-147.lightspeed.sntcca.sbcglobal.net. [172.10.233.147])
        by smtp.gmail.com with ESMTPSA id
 o7-20020a257307000000b00d43697c429esm5462075ybc.50.2023.09.29.20.25.52
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Fri, 29 Sep 2023 20:25:53 -0700 (PDT)
Date: Fri, 29 Sep 2023 20:25:38 -0700 (PDT)
From: Hugh Dickins <hughd@google.com>
X-X-Sender: hugh@ripple.attlocal.net
To: Andrew Morton <akpm@linux-foundation.org>
cc: Christian Brauner <brauner@kernel.org>,
        Carlos Maiolino <cem@kernel.org>,
        Chuck Lever <chuck.lever@oracle.com>, Jan Kara <jack@suse.cz>,
        Matthew Wilcox <willy@infradead.org>,
        Johannes Weiner <hannes@cmpxchg.org>,
        Axel Rasmussen <axelrasmussen@google.com>,
        linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
        linux-mm@kvack.org
Subject: [PATCH 1/8] shmem: shrink shmem_inode_info: dir_offsets in a union
In-Reply-To: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
Message-ID: <86ebb4b-c571-b9e8-27f5-cb82ec50357e@google.com>
References: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
MIME-Version: 1.0
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Shave 32 bytes off (the 64-bit) shmem_inode_info.  There was a 4-byte
pahole after stop_eviction, better filled by fsflags.  And the 24-byte
dir_offsets can only be used by directories, whereas shrinklist and
swaplist only by shmem_mapping() inodes (regular files or long symlinks):
so put those into a union.  No change in mm/shmem.c is required for this.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 include/linux/shmem_fs.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 6b0c626620f5..2caa6b86106a 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -23,18 +23,22 @@ struct shmem_inode_info {
 	unsigned long		flags;
 	unsigned long		alloced;	/* data pages alloced to file */
 	unsigned long		swapped;	/* subtotal assigned to swap */
-	pgoff_t			fallocend;	/* highest fallocate endindex */
-	struct list_head        shrinklist;     /* shrinkable hpage inodes */
-	struct list_head	swaplist;	/* chain of maybes on swap */
+	union {
+	    struct offset_ctx	dir_offsets;	/* stable directory offsets */
+	    struct {
+		struct list_head shrinklist;	/* shrinkable hpage inodes */
+		struct list_head swaplist;	/* chain of maybes on swap */
+	    };
+	};
+	struct timespec64	i_crtime;	/* file creation time */
 	struct shared_policy	policy;		/* NUMA memory alloc policy */
 	struct simple_xattrs	xattrs;		/* list of xattrs */
+	pgoff_t			fallocend;	/* highest fallocate endindex */
+	unsigned int		fsflags;	/* for FS_IOC_[SG]ETFLAGS */
 	atomic_t		stop_eviction;	/* hold when working on inode */
-	struct timespec64	i_crtime;	/* file creation time */
-	unsigned int		fsflags;	/* flags for FS_IOC_[SG]ETFLAGS */
 #ifdef CONFIG_TMPFS_QUOTA
 	struct dquot		*i_dquot[MAXQUOTAS];
 #endif
-	struct offset_ctx	dir_offsets;	/* stable entry offsets */
 	struct inode		vfs_inode;
 };
=20
--=20
2.35.3
From nobody Sun Feb  8 12:30:48 2026
Return-Path: <linux-kernel-owner@vger.kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 9AF64E7734F
	for <linux-kernel@archiver.kernel.org>; Sat, 30 Sep 2023 03:27:04 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S233955AbjI3D1D (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
        Fri, 29 Sep 2023 23:27:03 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45014 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S229526AbjI3D1A (ORCPT
        <rfc822;linux-kernel@vger.kernel.org>);
        Fri, 29 Sep 2023 23:27:00 -0400
Received: from mail-yw1-x1132.google.com (mail-yw1-x1132.google.com
 [IPv6:2607:f8b0:4864:20::1132])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 44A41DE
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:26:57 -0700 (PDT)
Received: by mail-yw1-x1132.google.com with SMTP id
 00721157ae682-59bbdb435bfso179943597b3.3
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:26:57 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=google.com; s=20230601; t=1696044416; x=1696649216;
 darn=vger.kernel.org;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:from:to:cc:subject:date:message-id:reply-to;
        bh=sJer9ozabqTWGrTyLZzHy82X8q3kDfNExG34jhppfvQ=;
        b=wMIcQ/+dS4AwwLKyybFNw8DoqNOiJuTnJ8FkHUqUoh/zQG755xqHxSNKErdpWzj9ne
         3Afh993WJw4H2ZQm+cPo2ivxijiF6o9yvTiA+oGXy+GGckZzV48i6cOh4yCt4xC1kFdW
         icenesc0Qe0UIrHXTM6M0+lw/vI9DjwmL+ElmakEjToPVIPDT4rffLd+4dY3YL283RA2
         GUriBonnTU4rux3MLs7llH2bA/iALKvFhxvs83DY6vgaandorED9f+C24BQBQj6HuORu
         wtciLO21kZzvcHlZeZLR5h2CK4HakY5lUnH84XAOf5oqwxR1+feuhuU7l+8+FChF/w/n
         /X8w==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1696044416; x=1696649216;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;
        bh=sJer9ozabqTWGrTyLZzHy82X8q3kDfNExG34jhppfvQ=;
        b=q9GokbMvRia28xW2BPRcdF4hhOW6Jos4Bj5KpqVWhKmaJMyE5cV+0zSEbEqcju2iIS
         8ZWOeZKF6aIHh9RxQ7Ci/WzTZP8FGt3rm6ks6lG+OUDf5EvjDPIdBTh2raVlQy+hMkeI
         9/9IxtINaFNlsFr6N0kTHa0NDQhRoOPSiVEo5eI5ZUmDHy9OfuyvHm6hgbEKrmD07mc0
         Namq8eoR2c8Vb7nmfslNN/if6vNn6qCJnV7SWRivTVOIWvzNnZbhlPQxKDUadOtKtc7C
         gOc+YawpF/ku9mNeoswKOJ7iLCi275w2MlAE8Nj5YtWVQMhQpthOYMTE/QrkuZ2C8UOu
         tzyQ==
X-Gm-Message-State: AOJu0YxIsj95Vs5zru4kPR0rjgw0tk5vJx70/PqrX7GXnt0iZ45CAh+h
        bx9VMrAZKbsZgRvwMlnAvGbJRw==
X-Google-Smtp-Source: 
 AGHT+IGY8b1GCtjEjvU2ttKYxnk/izTsxRAU1oyYCFwLsKOA7sQuqVX2C4n+cWR1fH9H4i0FnhEwjg==
X-Received: by 2002:a0d:cc4d:0:b0:5a1:8b2:4330 with SMTP id
 o74-20020a0dcc4d000000b005a108b24330mr6138174ywd.10.1696044416338;
        Fri, 29 Sep 2023 20:26:56 -0700 (PDT)
Received: from ripple.attlocal.net
 (172-10-233-147.lightspeed.sntcca.sbcglobal.net. [172.10.233.147])
        by smtp.gmail.com with ESMTPSA id
 w5-20020a0dd405000000b00570599de9a5sm2955343ywd.88.2023.09.29.20.26.54
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Fri, 29 Sep 2023 20:26:55 -0700 (PDT)
Date: Fri, 29 Sep 2023 20:26:53 -0700 (PDT)
From: Hugh Dickins <hughd@google.com>
X-X-Sender: hugh@ripple.attlocal.net
To: Andrew Morton <akpm@linux-foundation.org>
cc: Christian Brauner <brauner@kernel.org>,
        Carlos Maiolino <cem@kernel.org>,
        Chuck Lever <chuck.lever@oracle.com>, Jan Kara <jack@suse.cz>,
        Matthew Wilcox <willy@infradead.org>,
        Johannes Weiner <hannes@cmpxchg.org>,
        Axel Rasmussen <axelrasmussen@google.com>,
        linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
        linux-mm@kvack.org
Subject: [PATCH 2/8] shmem: remove vma arg from shmem_get_folio_gfp()
In-Reply-To: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
Message-ID: <d9ce6f65-a2ed-48f4-4299-fdb0544875c5@google.com>
References: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
MIME-Version: 1.0
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

The vma is already there in vmf->vma, so no need for a separate arg.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 mm/shmem.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 69595d341882..824eb55671d2 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1921,14 +1921,13 @@ static int shmem_swapin_folio(struct inode *inode, =
pgoff_t index,
  * vm. If we swap it in we mark it dirty since we also free the swap
  * entry since a page cannot live in both the swap and page cache.
  *
- * vma, vmf, and fault_type are only supplied by shmem_fault:
- * otherwise they are NULL.
+ * vmf and fault_type are only supplied by shmem_fault: otherwise they are=
 NULL.
  */
 static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 		struct folio **foliop, enum sgp_type sgp, gfp_t gfp,
-		struct vm_area_struct *vma, struct vm_fault *vmf,
-		vm_fault_t *fault_type)
+		struct vm_fault *vmf, vm_fault_t *fault_type)
 {
+	struct vm_area_struct *vma =3D vmf ? vmf->vma : NULL;
 	struct address_space *mapping =3D inode->i_mapping;
 	struct shmem_inode_info *info =3D SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo;
@@ -2141,7 +2140,7 @@ int shmem_get_folio(struct inode *inode, pgoff_t inde=
x, struct folio **foliop,
 		enum sgp_type sgp)
 {
 	return shmem_get_folio_gfp(inode, index, foliop, sgp,
-			mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL);
+			mapping_gfp_mask(inode->i_mapping), NULL, NULL);
 }
=20
 /*
@@ -2225,7 +2224,7 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
 	}
=20
 	err =3D shmem_get_folio_gfp(inode, vmf->pgoff, &folio, SGP_CACHE,
-				  gfp, vma, vmf, &ret);
+				  gfp, vmf, &ret);
 	if (err)
 		return vmf_error(err);
 	if (folio)
@@ -4897,7 +4896,7 @@ struct folio *shmem_read_folio_gfp(struct address_spa=
ce *mapping,
=20
 	BUG_ON(!shmem_mapping(mapping));
 	error =3D shmem_get_folio_gfp(inode, index, &folio, SGP_CACHE,
-				  gfp, NULL, NULL, NULL);
+				    gfp, NULL, NULL);
 	if (error)
 		return ERR_PTR(error);
=20
--=20
2.35.3
From nobody Sun Feb  8 12:30:48 2026
Return-Path: <linux-kernel-owner@vger.kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 1E3C5E77352
	for <linux-kernel@archiver.kernel.org>; Sat, 30 Sep 2023 03:28:02 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S233963AbjI3D2B (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
        Fri, 29 Sep 2023 23:28:01 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:35674 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S229526AbjI3D17 (ORCPT
        <rfc822;linux-kernel@vger.kernel.org>);
        Fri, 29 Sep 2023 23:27:59 -0400
Received: from mail-yw1-x1135.google.com (mail-yw1-x1135.google.com
 [IPv6:2607:f8b0:4864:20::1135])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4CFF0DE
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:27:57 -0700 (PDT)
Received: by mail-yw1-x1135.google.com with SMTP id
 00721157ae682-59bebd5bdadso181819267b3.0
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:27:57 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=google.com; s=20230601; t=1696044476; x=1696649276;
 darn=vger.kernel.org;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:from:to:cc:subject:date:message-id:reply-to;
        bh=c3VmQUPLhrERK4N60GAFBYo4ZpXXYTUhvBGEGGQtmto=;
        b=HyfOoLyE9tL6Xt3WfX7ZgrcqzE++2BLrNZwAz2xF0aRgKlvj+x3ksYc4TT5bRP/QMz
         8/xBu5nQ4LZSClT8ng7KIv1A0C0cbqpH4Z94AOpsLZg5vJ4YN7ACoaFce9mcUd0y+SvC
         0T5RD6ln/s9f0wI+3CnUYBssX8T46ENUkC0K/mmCwUSu0GIvywYNNYraZGOUjXtTwZNR
         ds/Wtgq3Qb1WmlSvFBZRS68LnuMUMIYxzztLMuPJz2ctH+5MV11y3F0VUYIx06UkZWRb
         X5vLvPJbTBYXthaUy9/xjzQweOnMWWiSLQdqGmGvsh06+khz4Gr3i/xf2V+XYkgbbJg+
         tecw==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1696044476; x=1696649276;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;
        bh=c3VmQUPLhrERK4N60GAFBYo4ZpXXYTUhvBGEGGQtmto=;
        b=vC9HPWrgZr93pk7n7VJdbOslQoeH2GLzmWDbffrFUI5pIR6AORUU1Vt0jS6cntC4WH
         VYaXSeVp8nNQoIupjk2WibQdWFhRVFbdrdf8RGmvJOramwKWYSfxhnwiNKmG9qFI+LKJ
         h0lbotCrC9bX13cKAyMscviLBtgUXdutZsiU8T6C6uZCx/dxm0BSHe86zNX2kcO2CRxK
         U30MNEJ+dr3BZnEY0DWlcyKb+xnguNs7SonThtgsmeka8p1Z1MZX5/sGoEryFHckiqmt
         GkHd1ZvcRtykgGdf6GXmh1lH7s+DeSOFqfDqSpnRoJjBnXsFBIvFQqyqk5IVPQvn67mx
         j/vA==
X-Gm-Message-State: AOJu0YxWnDZlAoAKncv34W6+OYeZTMCH+ws6CN4H2XSHHBzAaQpm5evj
        R7lG1GkxPXacfc4AowhWPFM83O+Y2Ae6PnpXqmp6Hw==
X-Google-Smtp-Source: 
 AGHT+IHp1qF65eHgx27QgmeBCNpipwBMvCNXdEQuNLXjnq7w6FkkrCvB+RR9BXWpFDUscD7OTgD8iw==
X-Received: by 2002:a0d:ee46:0:b0:5a1:635e:e68 with SMTP id
 x67-20020a0dee46000000b005a1635e0e68mr5109602ywe.46.1696044476344;
        Fri, 29 Sep 2023 20:27:56 -0700 (PDT)
Received: from ripple.attlocal.net
 (172-10-233-147.lightspeed.sntcca.sbcglobal.net. [172.10.233.147])
        by smtp.gmail.com with ESMTPSA id
 l8-20020a0de208000000b00586108dd8f5sm5983418ywe.18.2023.09.29.20.27.54
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Fri, 29 Sep 2023 20:27:55 -0700 (PDT)
Date: Fri, 29 Sep 2023 20:27:53 -0700 (PDT)
From: Hugh Dickins <hughd@google.com>
X-X-Sender: hugh@ripple.attlocal.net
To: Andrew Morton <akpm@linux-foundation.org>
cc: Christian Brauner <brauner@kernel.org>,
        Carlos Maiolino <cem@kernel.org>,
        Chuck Lever <chuck.lever@oracle.com>, Jan Kara <jack@suse.cz>,
        Matthew Wilcox <willy@infradead.org>,
        Johannes Weiner <hannes@cmpxchg.org>,
        Axel Rasmussen <axelrasmussen@google.com>,
        linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
        linux-mm@kvack.org
Subject: [PATCH 3/8] shmem: factor shmem_falloc_wait() out of shmem_fault()
In-Reply-To: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
Message-ID: <6fe379a4-6176-9225-9263-fe60d2633c0@google.com>
References: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
MIME-Version: 1.0
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

That Trinity livelock shmem_falloc avoidance block is unlikely, and a
distraction from the proper business of shmem_fault(): separate it out.
(This used to help compilers save stack on the fault path too, but both
gcc and clang nowadays seem to make better choices anyway.)

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 mm/shmem.c | 126 +++++++++++++++++++++++++++++------------------------
 1 file changed, 69 insertions(+), 57 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 824eb55671d2..5501a5bc8d8c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2148,87 +2148,99 @@ int shmem_get_folio(struct inode *inode, pgoff_t in=
dex, struct folio **foliop,
  * entry unconditionally - even if something else had already woken the
  * target.
  */
-static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mo=
de, int sync, void *key)
+static int synchronous_wake_function(wait_queue_entry_t *wait,
+			unsigned int mode, int sync, void *key)
 {
 	int ret =3D default_wake_function(wait, mode, sync, key);
 	list_del_init(&wait->entry);
 	return ret;
 }
=20
+/*
+ * Trinity finds that probing a hole which tmpfs is punching can
+ * prevent the hole-punch from ever completing: which in turn
+ * locks writers out with its hold on i_rwsem.  So refrain from
+ * faulting pages into the hole while it's being punched.  Although
+ * shmem_undo_range() does remove the additions, it may be unable to
+ * keep up, as each new page needs its own unmap_mapping_range() call,
+ * and the i_mmap tree grows ever slower to scan if new vmas are added.
+ *
+ * It does not matter if we sometimes reach this check just before the
+ * hole-punch begins, so that one fault then races with the punch:
+ * we just need to make racing faults a rare case.
+ *
+ * The implementation below would be much simpler if we just used a
+ * standard mutex or completion: but we cannot take i_rwsem in fault,
+ * and bloating every shmem inode for this unlikely case would be sad.
+ */
+static vm_fault_t shmem_falloc_wait(struct vm_fault *vmf, struct inode *in=
ode)
+{
+	struct shmem_falloc *shmem_falloc;
+	struct file *fpin =3D NULL;
+	vm_fault_t ret =3D 0;
+
+	spin_lock(&inode->i_lock);
+	shmem_falloc =3D inode->i_private;
+	if (shmem_falloc &&
+	    shmem_falloc->waitq &&
+	    vmf->pgoff >=3D shmem_falloc->start &&
+	    vmf->pgoff < shmem_falloc->next) {
+		wait_queue_head_t *shmem_falloc_waitq;
+		DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function);
+
+		ret =3D VM_FAULT_NOPAGE;
+		fpin =3D maybe_unlock_mmap_for_io(vmf, NULL);
+		shmem_falloc_waitq =3D shmem_falloc->waitq;
+		prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
+				TASK_UNINTERRUPTIBLE);
+		spin_unlock(&inode->i_lock);
+		schedule();
+
+		/*
+		 * shmem_falloc_waitq points into the shmem_fallocate()
+		 * stack of the hole-punching task: shmem_falloc_waitq
+		 * is usually invalid by the time we reach here, but
+		 * finish_wait() does not dereference it in that case;
+		 * though i_lock needed lest racing with wake_up_all().
+		 */
+		spin_lock(&inode->i_lock);
+		finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
+	}
+	spin_unlock(&inode->i_lock);
+	if (fpin) {
+		fput(fpin);
+		ret =3D VM_FAULT_RETRY;
+	}
+	return ret;
+}
+
 static vm_fault_t shmem_fault(struct vm_fault *vmf)
 {
-	struct vm_area_struct *vma =3D vmf->vma;
-	struct inode *inode =3D file_inode(vma->vm_file);
+	struct inode *inode =3D file_inode(vmf->vma->vm_file);
 	gfp_t gfp =3D mapping_gfp_mask(inode->i_mapping);
 	struct folio *folio =3D NULL;
+	vm_fault_t ret =3D 0;
 	int err;
-	vm_fault_t ret =3D VM_FAULT_LOCKED;
=20
 	/*
 	 * Trinity finds that probing a hole which tmpfs is punching can
-	 * prevent the hole-punch from ever completing: which in turn
-	 * locks writers out with its hold on i_rwsem.  So refrain from
-	 * faulting pages into the hole while it's being punched.  Although
-	 * shmem_undo_range() does remove the additions, it may be unable to
-	 * keep up, as each new page needs its own unmap_mapping_range() call,
-	 * and the i_mmap tree grows ever slower to scan if new vmas are added.
-	 *
-	 * It does not matter if we sometimes reach this check just before the
-	 * hole-punch begins, so that one fault then races with the punch:
-	 * we just need to make racing faults a rare case.
-	 *
-	 * The implementation below would be much simpler if we just used a
-	 * standard mutex or completion: but we cannot take i_rwsem in fault,
-	 * and bloating every shmem inode for this unlikely case would be sad.
+	 * prevent the hole-punch from ever completing: noted in i_private.
 	 */
 	if (unlikely(inode->i_private)) {
-		struct shmem_falloc *shmem_falloc;
-
-		spin_lock(&inode->i_lock);
-		shmem_falloc =3D inode->i_private;
-		if (shmem_falloc &&
-		    shmem_falloc->waitq &&
-		    vmf->pgoff >=3D shmem_falloc->start &&
-		    vmf->pgoff < shmem_falloc->next) {
-			struct file *fpin;
-			wait_queue_head_t *shmem_falloc_waitq;
-			DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function);
-
-			ret =3D VM_FAULT_NOPAGE;
-			fpin =3D maybe_unlock_mmap_for_io(vmf, NULL);
-			if (fpin)
-				ret =3D VM_FAULT_RETRY;
-
-			shmem_falloc_waitq =3D shmem_falloc->waitq;
-			prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
-					TASK_UNINTERRUPTIBLE);
-			spin_unlock(&inode->i_lock);
-			schedule();
-
-			/*
-			 * shmem_falloc_waitq points into the shmem_fallocate()
-			 * stack of the hole-punching task: shmem_falloc_waitq
-			 * is usually invalid by the time we reach here, but
-			 * finish_wait() does not dereference it in that case;
-			 * though i_lock needed lest racing with wake_up_all().
-			 */
-			spin_lock(&inode->i_lock);
-			finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
-			spin_unlock(&inode->i_lock);
-
-			if (fpin)
-				fput(fpin);
+		ret =3D shmem_falloc_wait(vmf, inode);
+		if (ret)
 			return ret;
-		}
-		spin_unlock(&inode->i_lock);
 	}
=20
+	WARN_ON_ONCE(vmf->page !=3D NULL);
 	err =3D shmem_get_folio_gfp(inode, vmf->pgoff, &folio, SGP_CACHE,
 				  gfp, vmf, &ret);
 	if (err)
 		return vmf_error(err);
-	if (folio)
+	if (folio) {
 		vmf->page =3D folio_file_page(folio, vmf->pgoff);
+		ret |=3D VM_FAULT_LOCKED;
+	}
 	return ret;
 }
=20
--=20
2.35.3
From nobody Sun Feb  8 12:30:48 2026
Return-Path: <linux-kernel-owner@vger.kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 8D3F1E77350
	for <linux-kernel@archiver.kernel.org>; Sat, 30 Sep 2023 03:29:01 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S233964AbjI3D3B (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
        Fri, 29 Sep 2023 23:29:01 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:53240 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S229526AbjI3D24 (ORCPT
        <rfc822;linux-kernel@vger.kernel.org>);
        Fri, 29 Sep 2023 23:28:56 -0400
Received: from mail-yw1-x112c.google.com (mail-yw1-x112c.google.com
 [IPv6:2607:f8b0:4864:20::112c])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 883E4A4
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:28:54 -0700 (PDT)
Received: by mail-yw1-x112c.google.com with SMTP id
 00721157ae682-59e77e4f707so179118197b3.0
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:28:54 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=google.com; s=20230601; t=1696044533; x=1696649333;
 darn=vger.kernel.org;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:from:to:cc:subject:date:message-id:reply-to;
        bh=/Dj6Oo+B5y8AHftgdToua+FX2M1F3oxEvkpI+qfcb0o=;
        b=GvSSQ1baWvjZh5vOwQnlQSM5W/zRa0BtOGJRuTis/L9FKEsrQg4cKAkHQWxMFrIU4f
         pVB3LaWpXGlzwnjdkwrieNRUWQap2aTaY3h8ZI4WSY0UFCPfuQ/uqfqiehNZniRpDQfv
         FEcoNjRljQ+F9iBJjoLDG+GAQuHI4TdVMzgpeLR9C6WybSQaMnmncyyk9KBN6/CVrQk9
         iNAASiWuIGpxksF6ESrr3UazMXmftSNEqnTM3YlqNIiXCNOHdUuizexVuUzJ62YuWK5z
         XxFS1s9R/RqaNblxkSX7/+OxWUjzB3y0/zzIu1sl/0zDelEuc+h2SrqOVTDSykbzwaj9
         zx7Q==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1696044533; x=1696649333;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;
        bh=/Dj6Oo+B5y8AHftgdToua+FX2M1F3oxEvkpI+qfcb0o=;
        b=cyrRdZtTgEiaLdMqoW6T+UiixjRMky/O1oTZhc9reb+RVicBRYtZ0a2PaBupvTmDrQ
         c2/9pHyTnhjTdUDGaTgBmtnEau6TPGnSU3Mnh4bQ2eTg37dCE+vqqFmP/kxCqHccgWhl
         JktjBUSHc/jwnGupuGhLZot4BhBM9yV4bhBOwUoBk47P0alGcgma+YcaofZ6giW9QbxM
         jx4rb82vVqBKtBZUio9+X0qNRZFYoWM/buG3HxKIpACXLI2qWuq1dWVcoqSreSK0fgzr
         vH4dgJebuJRAgImg0fOEUABuz2fg0nKkA6Gq1Pmgnjz0h/uk4LQNFxG59kFtF8N5Oivk
         X7jw==
X-Gm-Message-State: AOJu0YxgonRRlc5l9OCI6N8OWve+U5CPFzj2uHycSoUEZmW83eZrzXqv
        AmMVz674+Yi7IYGkzTAJFQRBCw==
X-Google-Smtp-Source: 
 AGHT+IHPj4w9oZpttkU3FSY37D6TH9BiRNUeI9NUbTwdclt9Ncz+8LjmVJX7PwjhqGcgK9q2b2JpVA==
X-Received: by 2002:a0d:df45:0:b0:595:9770:6914 with SMTP id
 i66-20020a0ddf45000000b0059597706914mr6356530ywe.35.1696044533585;
        Fri, 29 Sep 2023 20:28:53 -0700 (PDT)
Received: from ripple.attlocal.net
 (172-10-233-147.lightspeed.sntcca.sbcglobal.net. [172.10.233.147])
        by smtp.gmail.com with ESMTPSA id
 s185-20020a8182c2000000b00597e912e67esm6008532ywf.131.2023.09.29.20.28.51
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Fri, 29 Sep 2023 20:28:52 -0700 (PDT)
Date: Fri, 29 Sep 2023 20:28:50 -0700 (PDT)
From: Hugh Dickins <hughd@google.com>
X-X-Sender: hugh@ripple.attlocal.net
To: Andrew Morton <akpm@linux-foundation.org>
cc: Christian Brauner <brauner@kernel.org>,
        Carlos Maiolino <cem@kernel.org>,
        Chuck Lever <chuck.lever@oracle.com>, Jan Kara <jack@suse.cz>,
        Matthew Wilcox <willy@infradead.org>,
        Johannes Weiner <hannes@cmpxchg.org>,
        Axel Rasmussen <axelrasmussen@google.com>,
        linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
        linux-mm@kvack.org
Subject: [PATCH 4/8] shmem: trivial tidyups, removing extra blank lines,
 etc
In-Reply-To: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
Message-ID: <b3983d28-5d3f-8649-36af-b819285d7a9e@google.com>
References: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
MIME-Version: 1.0
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Mostly removing a few superfluous blank lines, joining short arglines,
imposing some 80-column observance, correcting a couple of comments.
None of it more interesting than deleting a repeated INIT_LIST_HEAD().

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 mm/shmem.c | 56 ++++++++++++++++++++----------------------------------
 1 file changed, 21 insertions(+), 35 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 5501a5bc8d8c..caee8ba841f7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -756,7 +756,7 @@ static unsigned long shmem_unused_huge_shrink(struct sh=
mem_sb_info *sbinfo,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
=20
 /*
- * Like filemap_add_folio, but error if expected item has gone.
+ * Somewhat like filemap_add_folio, but error if expected item has gone.
  */
 static int shmem_add_to_page_cache(struct folio *folio,
 				   struct address_space *mapping,
@@ -825,7 +825,7 @@ static int shmem_add_to_page_cache(struct folio *folio,
 }
=20
 /*
- * Like delete_from_page_cache, but substitutes swap for @folio.
+ * Somewhat like filemap_remove_folio, but substitutes swap for @folio.
  */
 static void shmem_delete_from_page_cache(struct folio *folio, void *radswa=
p)
 {
@@ -887,7 +887,6 @@ unsigned long shmem_partial_swap_usage(struct address_s=
pace *mapping,
 			cond_resched_rcu();
 		}
 	}
-
 	rcu_read_unlock();
=20
 	return swapped << PAGE_SHIFT;
@@ -1213,7 +1212,6 @@ static int shmem_setattr(struct mnt_idmap *idmap,
 	if (i_uid_needs_update(idmap, attr, inode) ||
 	    i_gid_needs_update(idmap, attr, inode)) {
 		error =3D dquot_transfer(idmap, inode, attr);
-
 		if (error)
 			return error;
 	}
@@ -2456,7 +2454,6 @@ static struct inode *__shmem_get_inode(struct mnt_idm=
ap *idmap,
 	if (err)
 		return ERR_PTR(err);
=20
-
 	inode =3D new_inode(sb);
 	if (!inode) {
 		shmem_free_inode(sb, 0);
@@ -2481,11 +2478,10 @@ static struct inode *__shmem_get_inode(struct mnt_i=
dmap *idmap,
 		shmem_set_inode_flags(inode, info->fsflags);
 	INIT_LIST_HEAD(&info->shrinklist);
 	INIT_LIST_HEAD(&info->swaplist);
-	INIT_LIST_HEAD(&info->swaplist);
-	if (sbinfo->noswap)
-		mapping_set_unevictable(inode->i_mapping);
 	simple_xattrs_init(&info->xattrs);
 	cache_no_acl(inode);
+	if (sbinfo->noswap)
+		mapping_set_unevictable(inode->i_mapping);
 	mapping_set_large_folios(inode->i_mapping);
=20
 	switch (mode & S_IFMT) {
@@ -2697,7 +2693,6 @@ shmem_write_begin(struct file *file, struct address_s=
pace *mapping,
 	}
=20
 	ret =3D shmem_get_folio(inode, index, &folio, SGP_WRITE);
-
 	if (ret)
 		return ret;
=20
@@ -3229,8 +3224,7 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *di=
r,
 	error =3D simple_acl_create(dir, inode);
 	if (error)
 		goto out_iput;
-	error =3D security_inode_init_security(inode, dir,
-					     &dentry->d_name,
+	error =3D security_inode_init_security(inode, dir, &dentry->d_name,
 					     shmem_initxattrs, NULL);
 	if (error && error !=3D -EOPNOTSUPP)
 		goto out_iput;
@@ -3259,14 +3253,11 @@ shmem_tmpfile(struct mnt_idmap *idmap, struct inode=
 *dir,
 	int error;
=20
 	inode =3D shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE);
-
 	if (IS_ERR(inode)) {
 		error =3D PTR_ERR(inode);
 		goto err_out;
 	}
-
-	error =3D security_inode_init_security(inode, dir,
-					     NULL,
+	error =3D security_inode_init_security(inode, dir, NULL,
 					     shmem_initxattrs, NULL);
 	if (error && error !=3D -EOPNOTSUPP)
 		goto out_iput;
@@ -3303,7 +3294,8 @@ static int shmem_create(struct mnt_idmap *idmap, stru=
ct inode *dir,
 /*
  * Link a file..
  */
-static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct=
 dentry *dentry)
+static int shmem_link(struct dentry *old_dentry, struct inode *dir,
+		      struct dentry *dentry)
 {
 	struct inode *inode =3D d_inode(old_dentry);
 	int ret =3D 0;
@@ -3334,7 +3326,7 @@ static int shmem_link(struct dentry *old_dentry, stru=
ct inode *dir, struct dentr
 	inode_inc_iversion(dir);
 	inc_nlink(inode);
 	ihold(inode);	/* New dentry reference */
-	dget(dentry);		/* Extra pinning count for the created dentry */
+	dget(dentry);	/* Extra pinning count for the created dentry */
 	d_instantiate(dentry, inode);
 out:
 	return ret;
@@ -3354,7 +3346,7 @@ static int shmem_unlink(struct inode *dir, struct den=
try *dentry)
 					     inode_set_ctime_current(inode));
 	inode_inc_iversion(dir);
 	drop_nlink(inode);
-	dput(dentry);	/* Undo the count from "create" - this does all the work */
+	dput(dentry);	/* Undo the count from "create" - does all the work */
 	return 0;
 }
=20
@@ -3464,7 +3456,6 @@ static int shmem_symlink(struct mnt_idmap *idmap, str=
uct inode *dir,
=20
 	inode =3D shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0,
 				VM_NORESERVE);
-
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
=20
@@ -3518,8 +3509,7 @@ static void shmem_put_link(void *arg)
 	folio_put(arg);
 }
=20
-static const char *shmem_get_link(struct dentry *dentry,
-				  struct inode *inode,
+static const char *shmem_get_link(struct dentry *dentry, struct inode *ino=
de,
 				  struct delayed_call *done)
 {
 	struct folio *folio =3D NULL;
@@ -3593,8 +3583,7 @@ static int shmem_fileattr_set(struct mnt_idmap *idmap,
  * Callback for security_inode_init_security() for acquiring xattrs.
  */
 static int shmem_initxattrs(struct inode *inode,
-			    const struct xattr *xattr_array,
-			    void *fs_info)
+			    const struct xattr *xattr_array, void *fs_info)
 {
 	struct shmem_inode_info *info =3D SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo =3D SHMEM_SB(inode->i_sb);
@@ -3778,7 +3767,6 @@ static struct dentry *shmem_find_alias(struct inode *=
inode)
 	return alias ?: d_find_any_alias(inode);
 }
=20
-
 static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
 		struct fid *fid, int fh_len, int fh_type)
 {
@@ -4362,8 +4350,8 @@ static int shmem_fill_super(struct super_block *sb, s=
truct fs_context *fc)
 	}
 #endif /* CONFIG_TMPFS_QUOTA */
=20
-	inode =3D shmem_get_inode(&nop_mnt_idmap, sb, NULL, S_IFDIR | sbinfo->mod=
e, 0,
-				VM_NORESERVE);
+	inode =3D shmem_get_inode(&nop_mnt_idmap, sb, NULL,
+				S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
 	if (IS_ERR(inode)) {
 		error =3D PTR_ERR(inode);
 		goto failed;
@@ -4666,11 +4654,9 @@ static ssize_t shmem_enabled_show(struct kobject *ko=
bj,
=20
 	for (i =3D 0; i < ARRAY_SIZE(values); i++) {
 		len +=3D sysfs_emit_at(buf, len,
-				     shmem_huge =3D=3D values[i] ? "%s[%s]" : "%s%s",
-				     i ? " " : "",
-				     shmem_format_huge(values[i]));
+				shmem_huge =3D=3D values[i] ? "%s[%s]" : "%s%s",
+				i ? " " : "", shmem_format_huge(values[i]));
 	}
-
 	len +=3D sysfs_emit_at(buf, len, "\n");
=20
 	return len;
@@ -4767,8 +4753,9 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
 #define shmem_acct_size(flags, size)		0
 #define shmem_unacct_size(flags, size)		do {} while (0)
=20
-static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, struc=
t super_block *sb, struct inode *dir,
-					    umode_t mode, dev_t dev, unsigned long flags)
+static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
+				struct super_block *sb, struct inode *dir,
+				umode_t mode, dev_t dev, unsigned long flags)
 {
 	struct inode *inode =3D ramfs_get_inode(sb, dir, mode, dev);
 	return inode ? inode : ERR_PTR(-ENOSPC);
@@ -4778,8 +4765,8 @@ static inline struct inode *shmem_get_inode(struct mn=
t_idmap *idmap, struct supe
=20
 /* common code */
=20
-static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *n=
ame, loff_t size,
-				       unsigned long flags, unsigned int i_flags)
+static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *n=
ame,
+			loff_t size, unsigned long flags, unsigned int i_flags)
 {
 	struct inode *inode;
 	struct file *res;
@@ -4798,7 +4785,6 @@ static struct file *__shmem_file_setup(struct vfsmoun=
t *mnt, const char *name, l
=20
 	inode =3D shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
 				S_IFREG | S_IRWXUGO, 0, flags);
-
 	if (IS_ERR(inode)) {
 		shmem_unacct_size(flags, size);
 		return ERR_CAST(inode);
--=20
2.35.3
From nobody Sun Feb  8 12:30:48 2026
Return-Path: <linux-kernel-owner@vger.kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id CAB08E77352
	for <linux-kernel@archiver.kernel.org>; Sat, 30 Sep 2023 03:30:14 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S233980AbjI3DaO (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
        Fri, 29 Sep 2023 23:30:14 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49434 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S233973AbjI3DaM (ORCPT
        <rfc822;linux-kernel@vger.kernel.org>);
        Fri, 29 Sep 2023 23:30:12 -0400
Received: from mail-qt1-x82a.google.com (mail-qt1-x82a.google.com
 [IPv6:2607:f8b0:4864:20::82a])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A447CE7
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:30:07 -0700 (PDT)
Received: by mail-qt1-x82a.google.com with SMTP id
 d75a77b69052e-419628fb297so26245821cf.1
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:30:07 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=google.com; s=20230601; t=1696044606; x=1696649406;
 darn=vger.kernel.org;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:from:to:cc:subject:date:message-id:reply-to;
        bh=F5lBL1dIqosEB13vnHNJcMu3nNFm9z6wAJ0sm8x1c/Y=;
        b=4pNbCMPfFspO8G7/MhfmAy9r3B7X4+80ptLvUHsca9nvtZ/21NwNQBRB56wAMz2ooj
         RqqXeRlj9niyjk3mAs9Opz6wBVJCZDAZYEETLb/cH9tf2+216tYEvoDTQJUJ7Te/Tlbr
         wGqr0cncxmDRR/XJvTR8OD8huvDrKCx85uhQrloiOY2QFq05wXVy5owIldkh0Nz+icO2
         GSlnjlIN3e1R/1tsuF55J+yUFRx8YiVS6HAU8Zd5jZC9IwXvUkHKsTh7U7AIip6GbE46
         8lE5dtajfRT1q1e+DO11VmRnVzTKYHp09w/O1+o94fsUa1VH4ZeoKgzygxpcriXftomr
         HVTg==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1696044606; x=1696649406;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;
        bh=F5lBL1dIqosEB13vnHNJcMu3nNFm9z6wAJ0sm8x1c/Y=;
        b=qIG+1PoIzBSgLj61iQNCbclr5PGgKMugXSuhw3lm2rBaDSfOGoBD46t4fSEB9meA+c
         BnMh+0/5Hd6pHa9VFVCPaZLqlkBLtt4ZDXQPNpZPy52nGdz/dhtjI6ygGg9xba1XX1Kg
         znm8p/z1U/a/TECat8ZWBLh4Pt9GBOhjhMK2HDwWcmHfqyXiN+GO8wezPphf7ONas1jt
         gfQZwiFEAUnG2wbav3PGDrNw9qnwGoGLwLX1mID1ellT2mPkFRNQi5FBnHl5sYspu0RD
         yeHD4DeoIzS55glEU4CW+txw/Hj8FDdbPsmWfyauOMh8PttdbeSYvtePrKzpEDpjijt/
         CgQg==
X-Gm-Message-State: AOJu0YyLbvYczX+jDh9rPYVcEIRSZHjWaxmLqxic9DslQHvrDeUiMU1Y
        M2rovAPf3UlJXNipvfVRPkg/lA==
X-Google-Smtp-Source: 
 AGHT+IGP2fxP6CWffX9Qi9wXG2I3wbOa1qZMTS36GprFKCZ9FBnqMrcBGIljlzMe7rrEDbitBUPjPw==
X-Received: by 2002:a05:622a:5cb:b0:412:c2a:eaef with SMTP id
 d11-20020a05622a05cb00b004120c2aeaefmr6834494qtb.11.1696044606428;
        Fri, 29 Sep 2023 20:30:06 -0700 (PDT)
Received: from ripple.attlocal.net
 (172-10-233-147.lightspeed.sntcca.sbcglobal.net. [172.10.233.147])
        by smtp.gmail.com with ESMTPSA id
 r74-20020a0de84d000000b0059bc980b1eesm5981929ywe.6.2023.09.29.20.30.04
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Fri, 29 Sep 2023 20:30:05 -0700 (PDT)
Date: Fri, 29 Sep 2023 20:30:03 -0700 (PDT)
From: Hugh Dickins <hughd@google.com>
X-X-Sender: hugh@ripple.attlocal.net
To: Andrew Morton <akpm@linux-foundation.org>
cc: Christian Brauner <brauner@kernel.org>,
        Carlos Maiolino <cem@kernel.org>,
        Chuck Lever <chuck.lever@oracle.com>, Jan Kara <jack@suse.cz>,
        Matthew Wilcox <willy@infradead.org>,
        Johannes Weiner <hannes@cmpxchg.org>,
        Axel Rasmussen <axelrasmussen@google.com>,
        linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
        linux-mm@kvack.org
Subject: [PATCH 5/8] shmem: shmem_acct_blocks() and
 shmem_inode_acct_blocks()
In-Reply-To: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
Message-ID: <9124094-e4ab-8be7-ef80-9a87bdc2e4fc@google.com>
References: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
MIME-Version: 1.0
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

By historical accident, shmem_acct_block() and shmem_inode_acct_block()
were never pluralized when the pages argument was added, despite their
complements being shmem_unacct_blocks() and shmem_inode_unacct_blocks()
all along.  It has been an irritation: fix their naming at last.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 mm/shmem.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index caee8ba841f7..63ba6037b23a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -189,10 +189,10 @@ static inline int shmem_reacct_size(unsigned long fla=
gs,
 /*
  * ... whereas tmpfs objects are accounted incrementally as
  * pages are allocated, in order to allow large sparse files.
- * shmem_get_folio reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
+ * shmem_get_folio reports shmem_acct_blocks failure as -ENOSPC not -ENOME=
M,
  * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
  */
-static inline int shmem_acct_block(unsigned long flags, long pages)
+static inline int shmem_acct_blocks(unsigned long flags, long pages)
 {
 	if (!(flags & VM_NORESERVE))
 		return 0;
@@ -207,13 +207,13 @@ static inline void shmem_unacct_blocks(unsigned long =
flags, long pages)
 		vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
 }
=20
-static int shmem_inode_acct_block(struct inode *inode, long pages)
+static int shmem_inode_acct_blocks(struct inode *inode, long pages)
 {
 	struct shmem_inode_info *info =3D SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo =3D SHMEM_SB(inode->i_sb);
 	int err =3D -ENOSPC;
=20
-	if (shmem_acct_block(info->flags, pages))
+	if (shmem_acct_blocks(info->flags, pages))
 		return err;
=20
 	might_sleep();	/* when quotas */
@@ -447,7 +447,7 @@ bool shmem_charge(struct inode *inode, long pages)
 {
 	struct address_space *mapping =3D inode->i_mapping;
=20
-	if (shmem_inode_acct_block(inode, pages))
+	if (shmem_inode_acct_blocks(inode, pages))
 		return false;
=20
 	/* nrpages adjustment first, then shmem_recalc_inode() when balanced */
@@ -1671,7 +1671,7 @@ static struct folio *shmem_alloc_and_acct_folio(gfp_t=
 gfp, struct inode *inode,
 		huge =3D false;
 	nr =3D huge ? HPAGE_PMD_NR : 1;
=20
-	err =3D shmem_inode_acct_block(inode, nr);
+	err =3D shmem_inode_acct_blocks(inode, nr);
 	if (err)
 		goto failed;
=20
@@ -2572,7 +2572,7 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
 	int ret;
 	pgoff_t max_off;
=20
-	if (shmem_inode_acct_block(inode, 1)) {
+	if (shmem_inode_acct_blocks(inode, 1)) {
 		/*
 		 * We may have got a page, returned -ENOENT triggering a retry,
 		 * and now we find ourselves with -ENOMEM. Release the page, to
--=20
2.35.3
From nobody Sun Feb  8 12:30:48 2026
Return-Path: <linux-kernel-owner@vger.kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id C50D5E7734F
	for <linux-kernel@archiver.kernel.org>; Sat, 30 Sep 2023 03:31:39 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S233972AbjI3Dbi (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
        Fri, 29 Sep 2023 23:31:38 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52698 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S229764AbjI3Dbe (ORCPT
        <rfc822;linux-kernel@vger.kernel.org>);
        Fri, 29 Sep 2023 23:31:34 -0400
Received: from mail-yw1-x1131.google.com (mail-yw1-x1131.google.com
 [IPv6:2607:f8b0:4864:20::1131])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 35BB8BA
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:31:31 -0700 (PDT)
Received: by mail-yw1-x1131.google.com with SMTP id
 00721157ae682-5a22f9e2f40so23100777b3.1
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:31:31 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=google.com; s=20230601; t=1696044690; x=1696649490;
 darn=vger.kernel.org;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:from:to:cc:subject:date:message-id:reply-to;
        bh=QspsCGzxpxq3SSLDt+LJp9Jy//8oF6+tYorlwqGdsRA=;
        b=FVeE6k2L8wP8t7TzSUr7z3tKhbLL753juRIs1Pm1jUQpzO5VguculLP9NNS9J/izpc
         uwF8bc+zEkdBVEPuBqkn7M+zXM7fHFe32zxL8gf6PI+Ul75ofc/93NuSDuaCh2Vchc/G
         FVifpXEfHbF9pvvqRIGKOaMlpLY64Pg8h3ijY/jUuzsCPrCSt/KOwMQyKP5nNsEwFKoD
         epzd3p2+Gk1HSM7tl8WSygnGViXjzW8of00Doe58eQajwEr24n4s0DTYag5WTRImFvss
         Fxnc87PJzT1D2w9fx7jELbfruPab3T7gzvikmmHHxokA0WsCtAY07MhDjJTzz720FtMz
         UF9g==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1696044690; x=1696649490;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;
        bh=QspsCGzxpxq3SSLDt+LJp9Jy//8oF6+tYorlwqGdsRA=;
        b=vQ3XrAP14f0x6cVv4DTRO3OZzxLaLvunGQCmITSnTn4/93MObV/iy21uxujoODK6ka
         Ajcb5r2sjOFG6n2haghziIi5+DjpAhR6/vPBdyEGMtJZLkmE43yB7E9JzLr+xi0l3mF2
         NFPY/8rV/EFddeBO6QhfTEklr4G46WXRuF+DS23FbCIt4wX5NDWplcIJTHKKo3QIiZrl
         /TWuSTE/pcFZ5ouZGy8NujoyMy72tWcgZG8+T4YOegwU8MBRpCqlDAu4ccv6i4v2VnUo
         YGMJR2XeeKSbtEZJEly2lQCkujfF1o4ADr5J5isjn5XXFiT6g6Haq62dCjWx5RbbOxC8
         73RA==
X-Gm-Message-State: AOJu0YwUVx9Dw/hhZVwdoV/pqW3kB+sKGRrZ/tCeDMFpFakomuoocXJC
        3f2FO1+f/FxowwmQpP2tlFoM1aoI1xttBDnLYQojTg==
X-Google-Smtp-Source: 
 AGHT+IEgXqnD7PMNlAh5dIBX1/Qy7aURmmr6eaqI4X8ytb5LLyV63aow9Zb+c+tnCOaAvJkKxO8m7Q==
X-Received: by 2002:a0d:d3c5:0:b0:585:ef4e:6d93 with SMTP id
 v188-20020a0dd3c5000000b00585ef4e6d93mr5637010ywd.47.1696044690238;
        Fri, 29 Sep 2023 20:31:30 -0700 (PDT)
Received: from ripple.attlocal.net
 (172-10-233-147.lightspeed.sntcca.sbcglobal.net. [172.10.233.147])
        by smtp.gmail.com with ESMTPSA id
 d71-20020a814f4a000000b0059be3d854b1sm5928458ywb.109.2023.09.29.20.31.28
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Fri, 29 Sep 2023 20:31:29 -0700 (PDT)
Date: Fri, 29 Sep 2023 20:31:27 -0700 (PDT)
From: Hugh Dickins <hughd@google.com>
X-X-Sender: hugh@ripple.attlocal.net
To: Andrew Morton <akpm@linux-foundation.org>
cc: Christian Brauner <brauner@kernel.org>,
        Carlos Maiolino <cem@kernel.org>,
        Chuck Lever <chuck.lever@oracle.com>, Jan Kara <jack@suse.cz>,
        Matthew Wilcox <willy@infradead.org>,
        Johannes Weiner <hannes@cmpxchg.org>,
        Axel Rasmussen <axelrasmussen@google.com>,
        linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
        linux-mm@kvack.org
Subject: [PATCH 6/8] shmem: move memcg charge out of
 shmem_add_to_page_cache()
In-Reply-To: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
Message-ID: <4b2143c5-bf32-64f0-841-81a81158dac@google.com>
References: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
MIME-Version: 1.0
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Extract shmem's memcg charging out of shmem_add_to_page_cache(): it's
misleading done there, because many calls are dealing with a swapcache
page, whose memcg is nowadays always remembered while swapped out, then
the charge re-levied when it's brought back into swapcache.

Temporarily move it back up to the shmem_get_folio_gfp() level, where
the memcg was charged before v5.8; but the next commit goes on to move
it back down to a new home.

In making this change, it becomes clear that shmem_swapin_folio() does
not need to know the vma, just the fault mm (if any): call it fault_mm
rather than charge_mm - let mem_cgroup_charge() decide whom to charge.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 mm/shmem.c | 68 +++++++++++++++++++++++-------------------------------
 1 file changed, 29 insertions(+), 39 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 63ba6037b23a..0a7f7b567b80 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -146,9 +146,8 @@ static unsigned long shmem_default_max_inodes(void)
 #endif
=20
 static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
-			     struct folio **foliop, enum sgp_type sgp,
-			     gfp_t gfp, struct vm_area_struct *vma,
-			     vm_fault_t *fault_type);
+			struct folio **foliop, enum sgp_type sgp, gfp_t gfp,
+			struct mm_struct *fault_mm, vm_fault_t *fault_type);
=20
 static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
 {
@@ -760,12 +759,10 @@ static unsigned long shmem_unused_huge_shrink(struct =
shmem_sb_info *sbinfo,
  */
 static int shmem_add_to_page_cache(struct folio *folio,
 				   struct address_space *mapping,
-				   pgoff_t index, void *expected, gfp_t gfp,
-				   struct mm_struct *charge_mm)
+				   pgoff_t index, void *expected, gfp_t gfp)
 {
 	XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
 	long nr =3D folio_nr_pages(folio);
-	int error;
=20
 	VM_BUG_ON_FOLIO(index !=3D round_down(index, nr), folio);
 	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -776,16 +773,7 @@ static int shmem_add_to_page_cache(struct folio *folio,
 	folio->mapping =3D mapping;
 	folio->index =3D index;
=20
-	if (!folio_test_swapcache(folio)) {
-		error =3D mem_cgroup_charge(folio, charge_mm, gfp);
-		if (error) {
-			if (folio_test_pmd_mappable(folio)) {
-				count_vm_event(THP_FILE_FALLBACK);
-				count_vm_event(THP_FILE_FALLBACK_CHARGE);
-			}
-			goto error;
-		}
-	}
+	gfp &=3D GFP_RECLAIM_MASK;
 	folio_throttle_swaprate(folio, gfp);
=20
 	do {
@@ -813,15 +801,12 @@ static int shmem_add_to_page_cache(struct folio *foli=
o,
 	} while (xas_nomem(&xas, gfp));
=20
 	if (xas_error(&xas)) {
-		error =3D xas_error(&xas);
-		goto error;
+		folio->mapping =3D NULL;
+		folio_ref_sub(folio, nr);
+		return xas_error(&xas);
 	}
=20
 	return 0;
-error:
-	folio->mapping =3D NULL;
-	folio_ref_sub(folio, nr);
-	return error;
 }
=20
 /*
@@ -1324,10 +1309,8 @@ static int shmem_unuse_swap_entries(struct inode *in=
ode,
=20
 		if (!xa_is_value(folio))
 			continue;
-		error =3D shmem_swapin_folio(inode, indices[i],
-					  &folio, SGP_CACHE,
-					  mapping_gfp_mask(mapping),
-					  NULL, NULL);
+		error =3D shmem_swapin_folio(inode, indices[i], &folio, SGP_CACHE,
+					mapping_gfp_mask(mapping), NULL, NULL);
 		if (error =3D=3D 0) {
 			folio_unlock(folio);
 			folio_put(folio);
@@ -1810,12 +1793,11 @@ static void shmem_set_folio_swapin_error(struct ino=
de *inode, pgoff_t index,
  */
 static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 			     struct folio **foliop, enum sgp_type sgp,
-			     gfp_t gfp, struct vm_area_struct *vma,
+			     gfp_t gfp, struct mm_struct *fault_mm,
 			     vm_fault_t *fault_type)
 {
 	struct address_space *mapping =3D inode->i_mapping;
 	struct shmem_inode_info *info =3D SHMEM_I(inode);
-	struct mm_struct *charge_mm =3D vma ? vma->vm_mm : NULL;
 	struct swap_info_struct *si;
 	struct folio *folio =3D NULL;
 	swp_entry_t swap;
@@ -1843,7 +1825,7 @@ static int shmem_swapin_folio(struct inode *inode, pg=
off_t index,
 		if (fault_type) {
 			*fault_type |=3D VM_FAULT_MAJOR;
 			count_vm_event(PGMAJFAULT);
-			count_memcg_event_mm(charge_mm, PGMAJFAULT);
+			count_memcg_event_mm(fault_mm, PGMAJFAULT);
 		}
 		/* Here we actually start the io */
 		folio =3D shmem_swapin(swap, gfp, info, index);
@@ -1880,8 +1862,7 @@ static int shmem_swapin_folio(struct inode *inode, pg=
off_t index,
 	}
=20
 	error =3D shmem_add_to_page_cache(folio, mapping, index,
-					swp_to_radix_entry(swap), gfp,
-					charge_mm);
+					swp_to_radix_entry(swap), gfp);
 	if (error)
 		goto failed;
=20
@@ -1929,7 +1910,7 @@ static int shmem_get_folio_gfp(struct inode *inode, p=
goff_t index,
 	struct address_space *mapping =3D inode->i_mapping;
 	struct shmem_inode_info *info =3D SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo;
-	struct mm_struct *charge_mm;
+	struct mm_struct *fault_mm;
 	struct folio *folio;
 	pgoff_t hindex;
 	gfp_t huge_gfp;
@@ -1946,7 +1927,7 @@ static int shmem_get_folio_gfp(struct inode *inode, p=
goff_t index,
 	}
=20
 	sbinfo =3D SHMEM_SB(inode->i_sb);
-	charge_mm =3D vma ? vma->vm_mm : NULL;
+	fault_mm =3D vma ? vma->vm_mm : NULL;
=20
 	folio =3D filemap_get_entry(mapping, index);
 	if (folio && vma && userfaultfd_minor(vma)) {
@@ -1958,7 +1939,7 @@ static int shmem_get_folio_gfp(struct inode *inode, p=
goff_t index,
=20
 	if (xa_is_value(folio)) {
 		error =3D shmem_swapin_folio(inode, index, &folio,
-					  sgp, gfp, vma, fault_type);
+					   sgp, gfp, fault_mm, fault_type);
 		if (error =3D=3D -EEXIST)
 			goto repeat;
=20
@@ -2044,9 +2025,16 @@ static int shmem_get_folio_gfp(struct inode *inode, =
pgoff_t index,
 	if (sgp =3D=3D SGP_WRITE)
 		__folio_set_referenced(folio);
=20
-	error =3D shmem_add_to_page_cache(folio, mapping, hindex,
-					NULL, gfp & GFP_RECLAIM_MASK,
-					charge_mm);
+	error =3D mem_cgroup_charge(folio, fault_mm, gfp);
+	if (error) {
+		if (folio_test_pmd_mappable(folio)) {
+			count_vm_event(THP_FILE_FALLBACK);
+			count_vm_event(THP_FILE_FALLBACK_CHARGE);
+		}
+		goto unacct;
+	}
+
+	error =3D shmem_add_to_page_cache(folio, mapping, hindex, NULL, gfp);
 	if (error)
 		goto unacct;
=20
@@ -2644,8 +2632,10 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
 	if (unlikely(pgoff >=3D max_off))
 		goto out_release;
=20
-	ret =3D shmem_add_to_page_cache(folio, mapping, pgoff, NULL,
-				      gfp & GFP_RECLAIM_MASK, dst_vma->vm_mm);
+	ret =3D mem_cgroup_charge(folio, dst_vma->vm_mm, gfp);
+	if (ret)
+		goto out_release;
+	ret =3D shmem_add_to_page_cache(folio, mapping, pgoff, NULL, gfp);
 	if (ret)
 		goto out_release;
=20
--=20
2.35.3
From nobody Sun Feb  8 12:30:48 2026
Return-Path: <linux-kernel-owner@vger.kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id E38DFE7734F
	for <linux-kernel@archiver.kernel.org>; Sat, 30 Sep 2023 03:32:52 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S233981AbjI3Dcw (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
        Fri, 29 Sep 2023 23:32:52 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56038 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S229764AbjI3Dcq (ORCPT
        <rfc822;linux-kernel@vger.kernel.org>);
        Fri, 29 Sep 2023 23:32:46 -0400
Received: from mail-yw1-x1136.google.com (mail-yw1-x1136.google.com
 [IPv6:2607:f8b0:4864:20::1136])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 962ECDE
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:32:43 -0700 (PDT)
Received: by mail-yw1-x1136.google.com with SMTP id
 00721157ae682-59f7f2b1036so124395167b3.3
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:32:43 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=google.com; s=20230601; t=1696044763; x=1696649563;
 darn=vger.kernel.org;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:from:to:cc:subject:date:message-id:reply-to;
        bh=LLFDDir+UoXbMx1glevMcvIlnqgdvYAatnW1ChgCi9k=;
        b=vLiMM/bxoa1vVYz9Hjqlg5djLADgF8njjFDJU1Ah1oEhcXAxomJch08jBg5JpdTIyY
         4swZHLFd3rTy9vL3JKPnkhiIEKqeaNUrZXr/OdJ2KX3pO/1JkNL/E9mpMaAur/owBSvD
         CSKNd6ByIWyNPmJRJEHHXIJ8N+zS2XC4dTo61IDGAtB4PSE4yooRLDjbPKIRNXGaSBfv
         VguCC8tsi0uCJK0Wx7xtuSzWMErjBvfcd1d6VEOo/X8aheL3C5ss1rk+fXUGV1qBhnNm
         90jOVa9BTxryt4M7ipEeVpQkaFfzBzrS+YJJXrLMnH1vx+bAVt0Xlr6Qnt9gUXOhXf6+
         xP/w==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1696044763; x=1696649563;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;
        bh=LLFDDir+UoXbMx1glevMcvIlnqgdvYAatnW1ChgCi9k=;
        b=Y+Vi6OjTHBzbERXP/S3S5wre+MT+hUK7fMJLrvTHVdleTW5AUd2LmRfLpe9NWOJWhz
         +wSaBLkuzGnJAnrG1L5xV4GPF9l5SBiyeENRBmafcUrDs/LZpvhth3WoAT8RIIxiz89N
         x7jAsizF3rW9p4skaodERc0emTSD/yTg6x5uyBTtFbrIi5IE49q1ICi1/LIYzJnDoAw7
         Yw6WDIQuELlOBy7+dMKiYC3PWCUrprogwEs9OnvT9N5hwgt+grR9Z6DiGaFpywKMkc1r
         QxDJC/ys45XpW5c0uAhpMoMxgRQbeslpxi3718kUmBkMb2cBYndycXnpfnDGGuiBCE01
         IYWw==
X-Gm-Message-State: AOJu0YwcJ6nEEKbmZryf112/Yv9GlsS3fq2mm263DxevcEBp391EUFbi
        dz8P94VvI3M77cva4U8AijL6+g==
X-Google-Smtp-Source: 
 AGHT+IHWFfod1ajuoBsT7KhrXsYUjHQWaiJ/anqkA/cYNa4/2kw3W7fwQLS18Z3KXGOj6fs+CBR/pA==
X-Received: by 2002:a81:7951:0:b0:59b:ca2f:6eff with SMTP id
 u78-20020a817951000000b0059bca2f6effmr4258072ywc.40.1696044762549;
        Fri, 29 Sep 2023 20:32:42 -0700 (PDT)
Received: from ripple.attlocal.net
 (172-10-233-147.lightspeed.sntcca.sbcglobal.net. [172.10.233.147])
        by smtp.gmail.com with ESMTPSA id
 m131-20020a817189000000b005a1f7231cf5sm2704514ywc.142.2023.09.29.20.32.41
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Fri, 29 Sep 2023 20:32:41 -0700 (PDT)
Date: Fri, 29 Sep 2023 20:32:40 -0700 (PDT)
From: Hugh Dickins <hughd@google.com>
X-X-Sender: hugh@ripple.attlocal.net
To: Andrew Morton <akpm@linux-foundation.org>
cc: Christian Brauner <brauner@kernel.org>,
        Carlos Maiolino <cem@kernel.org>,
        Chuck Lever <chuck.lever@oracle.com>, Jan Kara <jack@suse.cz>,
        Matthew Wilcox <willy@infradead.org>,
        Johannes Weiner <hannes@cmpxchg.org>,
        Axel Rasmussen <axelrasmussen@google.com>,
        linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
        linux-mm@kvack.org
Subject: [PATCH 7/8] shmem: _add_to_page_cache() before
 shmem_inode_acct_blocks()
In-Reply-To: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
Message-ID: <22ddd06-d919-33b-1219-56335c1bf28e@google.com>
References: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
MIME-Version: 1.0
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

There has been a recurring problem, that when a tmpfs volume is being
filled by racing threads, some fail with ENOSPC (or consequent SIGBUS
or EFAULT) even though all allocations were within the permitted size.

This was a problem since early days, but magnified and complicated by
the addition of huge pages.  We have often worked around it by adding
some slop to the tmpfs size, but it's hard to say how much is needed,
and some users prefer not to do that e.g. keeping sparse files in a
tightly tailored tmpfs helps to prevent accidental writing to holes.

This comes from the allocation sequence:
1. check page cache for existing folio
2. check and reserve from vm_enough_memory
3. check and account from size of tmpfs
4. if huge, check page cache for overlapping folio
5. allocate physical folio, huge or small
6. check and charge from mem cgroup limit
7. add to page cache (but maybe another folio already got in).

Concurrent tasks allocating at the same position could deplete the size
allowance and fail.  Doing vm_enough_memory and size checks before the
folio allocation was intentional (to limit the load on the page allocator
from this source) and still has some virtue; but memory cgroup never did
that, so I think it's better reordered to favour predictable behaviour.

1. check page cache for existing folio
2. if huge, check page cache for overlapping folio
3. allocate physical folio, huge or small
4. check and charge from mem cgroup limit
5. add to page cache (but maybe another folio already got in)
6. check and reserve from vm_enough_memory
7. check and account from size of tmpfs.

The folio lock held from allocation onwards ensures that the !uptodate
folio cannot be used by others, and can safely be deleted from the cache
if checks 6 or 7 subsequently fail (and those waiting on folio lock
already check that the folio was not truncated once they get the lock);
and the early addition to page cache ensures that racers find it before
they try to duplicate the accounting.

Seize the opportunity to tidy up shmem_get_folio_gfp()'s ENOSPC retrying,
which can be combined inside the new shmem_alloc_and_add_folio(): doing
2 splits twice (once huge, once nonhuge) is not exactly equivalent to
trying 5 splits (and giving up early on huge), but let's keep it simple
unless more complication proves necessary.

Userfaultfd is a foreign country: they do things differently there,
and for good reason - to avoid mmap_lock deadlock.  Leave ordering in
shmem_mfill_atomic_pte() untouched for now, but I would rather like to
mesh it better with shmem_get_folio_gfp() in the future.

Signed-off-by: Hugh Dickins <hughd@google.com>
---
 mm/shmem.c | 235 +++++++++++++++++++++++++++--------------------------
 1 file changed, 121 insertions(+), 114 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 0a7f7b567b80..4f4ab26bc58a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -789,13 +789,11 @@ static int shmem_add_to_page_cache(struct folio *foli=
o,
 		xas_store(&xas, folio);
 		if (xas_error(&xas))
 			goto unlock;
-		if (folio_test_pmd_mappable(folio)) {
-			count_vm_event(THP_FILE_ALLOC);
+		if (folio_test_pmd_mappable(folio))
 			__lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr);
-		}
-		mapping->nrpages +=3D nr;
 		__lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
 		__lruvec_stat_mod_folio(folio, NR_SHMEM, nr);
+		mapping->nrpages +=3D nr;
 unlock:
 		xas_unlock_irq(&xas);
 	} while (xas_nomem(&xas, gfp));
@@ -1612,25 +1610,17 @@ static struct folio *shmem_alloc_hugefolio(gfp_t gf=
p,
 		struct shmem_inode_info *info, pgoff_t index)
 {
 	struct vm_area_struct pvma;
-	struct address_space *mapping =3D info->vfs_inode.i_mapping;
-	pgoff_t hindex;
 	struct folio *folio;
=20
-	hindex =3D round_down(index, HPAGE_PMD_NR);
-	if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1,
-								XA_PRESENT))
-		return NULL;
-
-	shmem_pseudo_vma_init(&pvma, info, hindex);
+	shmem_pseudo_vma_init(&pvma, info, index);
 	folio =3D vma_alloc_folio(gfp, HPAGE_PMD_ORDER, &pvma, 0, true);
 	shmem_pseudo_vma_destroy(&pvma);
-	if (!folio)
-		count_vm_event(THP_FILE_FALLBACK);
+
 	return folio;
 }
=20
 static struct folio *shmem_alloc_folio(gfp_t gfp,
-			struct shmem_inode_info *info, pgoff_t index)
+		struct shmem_inode_info *info, pgoff_t index)
 {
 	struct vm_area_struct pvma;
 	struct folio *folio;
@@ -1642,36 +1632,101 @@ static struct folio *shmem_alloc_folio(gfp_t gfp,
 	return folio;
 }
=20
-static struct folio *shmem_alloc_and_acct_folio(gfp_t gfp, struct inode *i=
node,
-		pgoff_t index, bool huge)
+static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
+		struct inode *inode, pgoff_t index,
+		struct mm_struct *fault_mm, bool huge)
 {
+	struct address_space *mapping =3D inode->i_mapping;
 	struct shmem_inode_info *info =3D SHMEM_I(inode);
 	struct folio *folio;
-	int nr;
-	int err;
+	long pages;
+	int error;
=20
 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
 		huge =3D false;
-	nr =3D huge ? HPAGE_PMD_NR : 1;
=20
-	err =3D shmem_inode_acct_blocks(inode, nr);
-	if (err)
-		goto failed;
+	if (huge) {
+		pages =3D HPAGE_PMD_NR;
+		index =3D round_down(index, HPAGE_PMD_NR);
+
+		/*
+		 * Check for conflict before waiting on a huge allocation.
+		 * Conflict might be that a huge page has just been allocated
+		 * and added to page cache by a racing thread, or that there
+		 * is already at least one small page in the huge extent.
+		 * Be careful to retry when appropriate, but not forever!
+		 * Elsewhere -EEXIST would be the right code, but not here.
+		 */
+		if (xa_find(&mapping->i_pages, &index,
+				index + HPAGE_PMD_NR - 1, XA_PRESENT))
+			return ERR_PTR(-E2BIG);
=20
-	if (huge)
 		folio =3D shmem_alloc_hugefolio(gfp, info, index);
-	else
+		if (!folio)
+			count_vm_event(THP_FILE_FALLBACK);
+	} else {
+		pages =3D 1;
 		folio =3D shmem_alloc_folio(gfp, info, index);
-	if (folio) {
-		__folio_set_locked(folio);
-		__folio_set_swapbacked(folio);
-		return folio;
+	}
+	if (!folio)
+		return ERR_PTR(-ENOMEM);
+
+	__folio_set_locked(folio);
+	__folio_set_swapbacked(folio);
+
+	gfp &=3D GFP_RECLAIM_MASK;
+	error =3D mem_cgroup_charge(folio, fault_mm, gfp);
+	if (error) {
+		if (xa_find(&mapping->i_pages, &index,
+				index + pages - 1, XA_PRESENT)) {
+			error =3D -EEXIST;
+		} else if (huge) {
+			count_vm_event(THP_FILE_FALLBACK);
+			count_vm_event(THP_FILE_FALLBACK_CHARGE);
+		}
+		goto unlock;
 	}
=20
-	err =3D -ENOMEM;
-	shmem_inode_unacct_blocks(inode, nr);
-failed:
-	return ERR_PTR(err);
+	error =3D shmem_add_to_page_cache(folio, mapping, index, NULL, gfp);
+	if (error)
+		goto unlock;
+
+	error =3D shmem_inode_acct_blocks(inode, pages);
+	if (error) {
+		struct shmem_sb_info *sbinfo =3D SHMEM_SB(inode->i_sb);
+		long freed;
+		/*
+		 * Try to reclaim some space by splitting a few
+		 * large folios beyond i_size on the filesystem.
+		 */
+		shmem_unused_huge_shrink(sbinfo, NULL, 2);
+		/*
+		 * And do a shmem_recalc_inode() to account for freed pages:
+		 * except our folio is there in cache, so not quite balanced.
+		 */
+		spin_lock(&info->lock);
+		freed =3D pages + info->alloced - info->swapped -
+			READ_ONCE(mapping->nrpages);
+		if (freed > 0)
+			info->alloced -=3D freed;
+		spin_unlock(&info->lock);
+		if (freed > 0)
+			shmem_inode_unacct_blocks(inode, freed);
+		error =3D shmem_inode_acct_blocks(inode, pages);
+		if (error) {
+			filemap_remove_folio(folio);
+			goto unlock;
+		}
+	}
+
+	shmem_recalc_inode(inode, pages, 0);
+	folio_add_lru(folio);
+	return folio;
+
+unlock:
+	folio_unlock(folio);
+	folio_put(folio);
+	return ERR_PTR(error);
 }
=20
 /*
@@ -1907,29 +1962,22 @@ static int shmem_get_folio_gfp(struct inode *inode,=
 pgoff_t index,
 		struct vm_fault *vmf, vm_fault_t *fault_type)
 {
 	struct vm_area_struct *vma =3D vmf ? vmf->vma : NULL;
-	struct address_space *mapping =3D inode->i_mapping;
-	struct shmem_inode_info *info =3D SHMEM_I(inode);
-	struct shmem_sb_info *sbinfo;
 	struct mm_struct *fault_mm;
 	struct folio *folio;
-	pgoff_t hindex;
-	gfp_t huge_gfp;
 	int error;
-	int once =3D 0;
-	int alloced =3D 0;
+	bool alloced;
=20
 	if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
 		return -EFBIG;
 repeat:
 	if (sgp <=3D SGP_CACHE &&
-	    ((loff_t)index << PAGE_SHIFT) >=3D i_size_read(inode)) {
+	    ((loff_t)index << PAGE_SHIFT) >=3D i_size_read(inode))
 		return -EINVAL;
-	}
=20
-	sbinfo =3D SHMEM_SB(inode->i_sb);
+	alloced =3D false;
 	fault_mm =3D vma ? vma->vm_mm : NULL;
=20
-	folio =3D filemap_get_entry(mapping, index);
+	folio =3D filemap_get_entry(inode->i_mapping, index);
 	if (folio && vma && userfaultfd_minor(vma)) {
 		if (!xa_is_value(folio))
 			folio_put(folio);
@@ -1951,7 +1999,7 @@ static int shmem_get_folio_gfp(struct inode *inode, p=
goff_t index,
 		folio_lock(folio);
=20
 		/* Has the folio been truncated or swapped out? */
-		if (unlikely(folio->mapping !=3D mapping)) {
+		if (unlikely(folio->mapping !=3D inode->i_mapping)) {
 			folio_unlock(folio);
 			folio_put(folio);
 			goto repeat;
@@ -1986,65 +2034,38 @@ static int shmem_get_folio_gfp(struct inode *inode,=
 pgoff_t index,
 		return 0;
 	}
=20
-	if (!shmem_is_huge(inode, index, false,
-			   vma ? vma->vm_mm : NULL, vma ? vma->vm_flags : 0))
-		goto alloc_nohuge;
+	if (shmem_is_huge(inode, index, false, fault_mm,
+			  vma ? vma->vm_flags : 0)) {
+		gfp_t huge_gfp;
=20
-	huge_gfp =3D vma_thp_gfp_mask(vma);
-	huge_gfp =3D limit_gfp_mask(huge_gfp, gfp);
-	folio =3D shmem_alloc_and_acct_folio(huge_gfp, inode, index, true);
-	if (IS_ERR(folio)) {
-alloc_nohuge:
-		folio =3D shmem_alloc_and_acct_folio(gfp, inode, index, false);
-	}
-	if (IS_ERR(folio)) {
-		int retry =3D 5;
-
-		error =3D PTR_ERR(folio);
-		folio =3D NULL;
-		if (error !=3D -ENOSPC)
-			goto unlock;
-		/*
-		 * Try to reclaim some space by splitting a large folio
-		 * beyond i_size on the filesystem.
-		 */
-		while (retry--) {
-			int ret;
-
-			ret =3D shmem_unused_huge_shrink(sbinfo, NULL, 1);
-			if (ret =3D=3D SHRINK_STOP)
-				break;
-			if (ret)
-				goto alloc_nohuge;
+		huge_gfp =3D vma_thp_gfp_mask(vma);
+		huge_gfp =3D limit_gfp_mask(huge_gfp, gfp);
+		folio =3D shmem_alloc_and_add_folio(huge_gfp,
+				inode, index, fault_mm, true);
+		if (!IS_ERR(folio)) {
+			count_vm_event(THP_FILE_ALLOC);
+			goto alloced;
 		}
+		if (PTR_ERR(folio) =3D=3D -EEXIST)
+			goto repeat;
+	}
+
+	folio =3D shmem_alloc_and_add_folio(gfp, inode, index, fault_mm, false);
+	if (IS_ERR(folio)) {
+		error =3D PTR_ERR(folio);
+		if (error =3D=3D -EEXIST)
+			goto repeat;
+		folio =3D NULL;
 		goto unlock;
 	}
=20
-	hindex =3D round_down(index, folio_nr_pages(folio));
-
-	if (sgp =3D=3D SGP_WRITE)
-		__folio_set_referenced(folio);
-
-	error =3D mem_cgroup_charge(folio, fault_mm, gfp);
-	if (error) {
-		if (folio_test_pmd_mappable(folio)) {
-			count_vm_event(THP_FILE_FALLBACK);
-			count_vm_event(THP_FILE_FALLBACK_CHARGE);
-		}
-		goto unacct;
-	}
-
-	error =3D shmem_add_to_page_cache(folio, mapping, hindex, NULL, gfp);
-	if (error)
-		goto unacct;
-
-	folio_add_lru(folio);
-	shmem_recalc_inode(inode, folio_nr_pages(folio), 0);
+alloced:
 	alloced =3D true;
-
 	if (folio_test_pmd_mappable(folio) &&
 	    DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
 					folio_next_index(folio) - 1) {
+		struct shmem_sb_info *sbinfo =3D SHMEM_SB(inode->i_sb);
+		struct shmem_inode_info *info =3D SHMEM_I(inode);
 		/*
 		 * Part of the large folio is beyond i_size: subject
 		 * to shrink under memory pressure.
@@ -2062,6 +2083,8 @@ static int shmem_get_folio_gfp(struct inode *inode, p=
goff_t index,
 		spin_unlock(&sbinfo->shrinklist_lock);
 	}
=20
+	if (sgp =3D=3D SGP_WRITE)
+		folio_set_referenced(folio);
 	/*
 	 * Let SGP_FALLOC use the SGP_WRITE optimization on a new folio.
 	 */
@@ -2085,11 +2108,6 @@ static int shmem_get_folio_gfp(struct inode *inode, =
pgoff_t index,
 	/* Perhaps the file has been truncated since we checked */
 	if (sgp <=3D SGP_CACHE &&
 	    ((loff_t)index << PAGE_SHIFT) >=3D i_size_read(inode)) {
-		if (alloced) {
-			folio_clear_dirty(folio);
-			filemap_remove_folio(folio);
-			shmem_recalc_inode(inode, 0, 0);
-		}
 		error =3D -EINVAL;
 		goto unlock;
 	}
@@ -2100,25 +2118,14 @@ static int shmem_get_folio_gfp(struct inode *inode,=
 pgoff_t index,
 	/*
 	 * Error recovery.
 	 */
-unacct:
-	shmem_inode_unacct_blocks(inode, folio_nr_pages(folio));
-
-	if (folio_test_large(folio)) {
-		folio_unlock(folio);
-		folio_put(folio);
-		goto alloc_nohuge;
-	}
 unlock:
+	if (alloced)
+		filemap_remove_folio(folio);
+	shmem_recalc_inode(inode, 0, 0);
 	if (folio) {
 		folio_unlock(folio);
 		folio_put(folio);
 	}
-	if (error =3D=3D -ENOSPC && !once++) {
-		shmem_recalc_inode(inode, 0, 0);
-		goto repeat;
-	}
-	if (error =3D=3D -EEXIST)
-		goto repeat;
 	return error;
 }
=20
--=20
2.35.3
From nobody Sun Feb  8 12:30:48 2026
Return-Path: <linux-kernel-owner@vger.kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 2873EE7734F
	for <linux-kernel@archiver.kernel.org>; Sat, 30 Sep 2023 03:42:58 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S233982AbjI3Dmx (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
        Fri, 29 Sep 2023 23:42:53 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:55572 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S229764AbjI3Dmv (ORCPT
        <rfc822;linux-kernel@vger.kernel.org>);
        Fri, 29 Sep 2023 23:42:51 -0400
Received: from mail-yw1-x112c.google.com (mail-yw1-x112c.google.com
 [IPv6:2607:f8b0:4864:20::112c])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 66DF7BA
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:42:49 -0700 (PDT)
Received: by mail-yw1-x112c.google.com with SMTP id
 00721157ae682-59f1dff5298so170316357b3.3
        for <linux-kernel@vger.kernel.org>;
 Fri, 29 Sep 2023 20:42:49 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=google.com; s=20230601; t=1696045368; x=1696650168;
 darn=vger.kernel.org;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:from:to:cc:subject:date:message-id:reply-to;
        bh=2x6fH+w1PdDZvpTev5khLvy5t3y2+BALqf/PDla7s+s=;
        b=KDXuLjk9D3qhT2wExeqdG8th+lOtxMeAclFPpxw8Eplb+nuCJ1qj2C+wlM8Vnp4Q+s
         5vh+3t3SxmP41GhssWSZpCjIG40O0JH2t0ACsKhF2hQtVNpf9OguwKMtxFLTPjtGtKg5
         AzymlDy38Gz4I5ApfOtFiQ6H3YNVRJ5vTceGXaGciuWzPgPBSqpe6dsg2Z/dv+cGF46G
         p1ElRPE1/+n0bb9P/Rs7Zkhw+34L6Vk2Bkr9iS7SaaosTcBNG6OJfpA0stcivAeo1PW1
         hzA7calsNJNQeTy37dthP2VWY4Sbd/3s9FH/l/x6a+5gkb03En7M3eQV5KYcjduXk/5R
         ackw==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1696045368; x=1696650168;
        h=mime-version:references:message-id:in-reply-to:subject:cc:to:from
         :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;
        bh=2x6fH+w1PdDZvpTev5khLvy5t3y2+BALqf/PDla7s+s=;
        b=n1y6TcW+NmwIuJvVgug+bh/Jj5Jjx7pDt37OtMyCQjhFs+o41luys3BiBz0fxOverI
         d1mmhiCEFhZ4tsrOqL0NMGF3j5HIi6GlbXTD3J2IvCY3S4b5uAlQhPYyJXGJntZqm0C5
         +R8zYIFIrAE5mni6DbEiK/yqYIHFOBIYTmcjlL5o5dWAPs9rfHwHCM1HkrvMJHjk0PXz
         AQiKwsrDesepshC9wnthQL1kp52zqDEIKCKZ24DuG6AoLw0SBfMTgLdG5E/SuOH8ivT0
         LNss9dDatlH2WhfKOh596LgBdB1A1IfBGF03f3xvlIdnrVUwbn4k9wb5Nhj2n80Ux9sU
         FATA==
X-Gm-Message-State: AOJu0YwjFnnJlgiUbFuH8EeadMt/FnkjSZj8bfAwdsvgZcHZaVss+x8/
        WhX0K+cNyPvLMEnU375Y0PbLKw==
X-Google-Smtp-Source: 
 AGHT+IHb14+lcWYvYpZ5abpN3vaue9DpNWsOHa4WiXVhBeSyALADDXCj4J3VWtqEBUCWVKYnyTLA1A==
X-Received: by 2002:a81:a24e:0:b0:59b:5170:a0f3 with SMTP id
 z14-20020a81a24e000000b0059b5170a0f3mr5957990ywg.36.1696045368515;
        Fri, 29 Sep 2023 20:42:48 -0700 (PDT)
Received: from ripple.attlocal.net
 (172-10-233-147.lightspeed.sntcca.sbcglobal.net. [172.10.233.147])
        by smtp.gmail.com with ESMTPSA id
 f184-20020a0dc3c1000000b0059ae483b89dsm5983309ywd.50.2023.09.29.20.42.46
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Fri, 29 Sep 2023 20:42:47 -0700 (PDT)
Date: Fri, 29 Sep 2023 20:42:45 -0700 (PDT)
From: Hugh Dickins <hughd@google.com>
X-X-Sender: hugh@ripple.attlocal.net
To: Andrew Morton <akpm@linux-foundation.org>
cc: Tim Chen <tim.c.chen@intel.com>,
        Dave Chinner <dchinner@redhat.com>,
        "Darrick J. Wong" <djwong@kernel.org>,
        Christian Brauner <brauner@kernel.org>,
        Carlos Maiolino <cem@kernel.org>,
        Chuck Lever <chuck.lever@oracle.com>, Jan Kara <jack@suse.cz>,
        Matthew Wilcox <willy@infradead.org>,
        Johannes Weiner <hannes@cmpxchg.org>,
        Axel Rasmussen <axelrasmussen@google.com>,
        linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
        linux-mm@kvack.org
Subject: [PATCH 8/8] shmem,percpu_counter: add _limited_add(fbc, limit,
 amount)
In-Reply-To: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
Message-ID: <bb817848-2d19-bcc8-39ca-ea179af0f0b4@google.com>
References: <c7441dc6-f3bb-dd60-c670-9f5cbd9f266@google.com>
MIME-Version: 1.0
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Percpu counter's compare and add are separate functions: without locking
around them (which would defeat their purpose), it has been possible to
overflow the intended limit.  Imagine all the other CPUs fallocating
tmpfs huge pages to the limit, in between this CPU's compare and its add.

I have not seen reports of that happening; but tmpfs's recent addition
of dquot_alloc_block_nodirty() in between the compare and the add makes
it even more likely, and I'd be uncomfortable to leave it unfixed.

Introduce percpu_counter_limited_add(fbc, limit, amount) to prevent it.

I believe this implementation is correct, and slightly more efficient
than the combination of compare and add (taking the lock once rather
than twice when nearing full - the last 128MiB of a tmpfs volume on a
machine with 128 CPUs and 4KiB pages); but it does beg for a better
design - when nearing full, there is no new batching, but the costly
percpu counter sum across CPUs still has to be done, while locked.

Follow __percpu_counter_sum()'s example, including cpu_dying_mask as
well as cpu_online_mask: but shouldn't __percpu_counter_compare() and
__percpu_counter_limited_add() then be adding a num_dying_cpus() to
num_online_cpus(), when they calculate the maximum which could be held
across CPUs?  But the times when it matters would be vanishingly rare.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Tim Chen <tim.c.chen@intel.com>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
---
Tim, Dave, Darrick: I didn't want to waste your time on patches 1-7,
which are just internal to shmem, and do not affect this patch (which
applies to v6.6-rc and linux-next as is): but want to run this by you.

 include/linux/percpu_counter.h | 23 +++++++++++++++
 lib/percpu_counter.c           | 53 ++++++++++++++++++++++++++++++++++
 mm/shmem.c                     | 10 +++----
 3 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index d01351b1526f..8cb7c071bd5c 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -57,6 +57,8 @@ void percpu_counter_add_batch(struct percpu_counter *fbc,=
 s64 amount,
 			      s32 batch);
 s64 __percpu_counter_sum(struct percpu_counter *fbc);
 int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batc=
h);
+bool __percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit,
+				  s64 amount, s32 batch);
 void percpu_counter_sync(struct percpu_counter *fbc);
=20
 static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 r=
hs)
@@ -69,6 +71,13 @@ static inline void percpu_counter_add(struct percpu_coun=
ter *fbc, s64 amount)
 	percpu_counter_add_batch(fbc, amount, percpu_counter_batch);
 }
=20
+static inline bool
+percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amou=
nt)
+{
+	return __percpu_counter_limited_add(fbc, limit, amount,
+					    percpu_counter_batch);
+}
+
 /*
  * With percpu_counter_add_local() and percpu_counter_sub_local(), counts
  * are accumulated in local per cpu counter and not in fbc->count until
@@ -185,6 +194,20 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amo=
unt)
 	local_irq_restore(flags);
 }
=20
+static inline bool
+percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amou=
nt)
+{
+	unsigned long flags;
+	s64 count;
+
+	local_irq_save(flags);
+	count =3D fbc->count + amount;
+	if (count <=3D limit)
+		fbc->count =3D count;
+	local_irq_restore(flags);
+	return count <=3D limit;
+}
+
 /* non-SMP percpu_counter_add_local is the same with percpu_counter_add */
 static inline void
 percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 9073430dc865..58a3392f471b 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -278,6 +278,59 @@ int __percpu_counter_compare(struct percpu_counter *fb=
c, s64 rhs, s32 batch)
 }
 EXPORT_SYMBOL(__percpu_counter_compare);
=20
+/*
+ * Compare counter, and add amount if the total is within limit.
+ * Return true if amount was added, false if it would exceed limit.
+ */
+bool __percpu_counter_limited_add(struct percpu_counter *fbc,
+				  s64 limit, s64 amount, s32 batch)
+{
+	s64 count;
+	s64 unknown;
+	unsigned long flags;
+	bool good;
+
+	if (amount > limit)
+		return false;
+
+	local_irq_save(flags);
+	unknown =3D batch * num_online_cpus();
+	count =3D __this_cpu_read(*fbc->counters);
+
+	/* Skip taking the lock when safe */
+	if (abs(count + amount) <=3D batch &&
+	    fbc->count + unknown <=3D limit) {
+		this_cpu_add(*fbc->counters, amount);
+		local_irq_restore(flags);
+		return true;
+	}
+
+	raw_spin_lock(&fbc->lock);
+	count =3D fbc->count + amount;
+
+	/* Skip percpu_counter_sum() when safe */
+	if (count + unknown > limit) {
+		s32 *pcount;
+		int cpu;
+
+		for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) {
+			pcount =3D per_cpu_ptr(fbc->counters, cpu);
+			count +=3D *pcount;
+		}
+	}
+
+	good =3D count <=3D limit;
+	if (good) {
+		count =3D __this_cpu_read(*fbc->counters);
+		fbc->count +=3D count + amount;
+		__this_cpu_sub(*fbc->counters, count);
+	}
+
+	raw_spin_unlock(&fbc->lock);
+	local_irq_restore(flags);
+	return good;
+}
+
 static int __init percpu_counter_startup(void)
 {
 	int ret;
diff --git a/mm/shmem.c b/mm/shmem.c
index 4f4ab26bc58a..7cb72c747954 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -217,15 +217,15 @@ static int shmem_inode_acct_blocks(struct inode *inod=
e, long pages)
=20
 	might_sleep();	/* when quotas */
 	if (sbinfo->max_blocks) {
-		if (percpu_counter_compare(&sbinfo->used_blocks,
-					   sbinfo->max_blocks - pages) > 0)
+		if (!percpu_counter_limited_add(&sbinfo->used_blocks,
+						sbinfo->max_blocks, pages))
 			goto unacct;
=20
 		err =3D dquot_alloc_block_nodirty(inode, pages);
-		if (err)
+		if (err) {
+			percpu_counter_sub(&sbinfo->used_blocks, pages);
 			goto unacct;
-
-		percpu_counter_add(&sbinfo->used_blocks, pages);
+		}
 	} else {
 		err =3D dquot_alloc_block_nodirty(inode, pages);
 		if (err)
--=20
2.35.3