From nobody Tue Dec 16 06:14:42 2025 Received: from out30-100.freemail.mail.aliyun.com (out30-100.freemail.mail.aliyun.com [115.124.30.100]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C8A1E1EB2F; Thu, 29 May 2025 08:29:25 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=115.124.30.100 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1748507369; cv=none; b=XAEihMibYZScncSGFg5XFAV1wVZguY3bsKeXE0qKMU0jAz0cX2R9fNMwKTTapae7aLWgBG6aBzsWqy+kXtAVepGrXHUqie3Kovjmk4+RUhdeGa57loBB9CxVQqopkYU7UEMq/dv/j3ky1aaBY3KBsZu6HE0XAxEbUtNNS8HX5Vk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1748507369; c=relaxed/simple; bh=oWMfWr38IyfL2mt6nbNwh36u88niY/zrghcvsR120Hk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=r9izBJWwQaYyG5EUzMAhGEC+T+wNsvetgtU3YfM8D1ZKS/iBANR7D3UjHsW4k/+LavR+jKuO4UIT2pVb/VsEKKmiECi5H0vrZVxFYLVxJyOha29o3Wt7fqtiyeJOobVtmMuEqgV9pcDnE481/1jlqJXUdQ5ug3zqaAMcP3dZhxY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.alibaba.com; spf=pass smtp.mailfrom=linux.alibaba.com; dkim=pass (1024-bit key) header.d=linux.alibaba.com header.i=@linux.alibaba.com header.b=SiAyZXoL; arc=none smtp.client-ip=115.124.30.100 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.alibaba.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.alibaba.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.alibaba.com header.i=@linux.alibaba.com header.b="SiAyZXoL" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.alibaba.com; s=default; t=1748507363; h=From:To:Subject:Date:Message-ID:MIME-Version; bh=0SELs1h7jaP7V5rEcYlTVB8+PGf5ZsGKYSIxuyINI8U=; b=SiAyZXoLVGCBW9XtJtM56PYuJUmcQnOUzClvfgM7tVIvyomYf297q7TpO6DKb1Q2GJUwXihKME2rlSKKVrMvZkv7CpkB3ChLURi+duiyzY78CH0ld/ZrCKrVtrjgEwqBTK7m9kpeBZ0ePgCGg5LCwoaDjje6EC9IID8XbgO3Kz4= Received: from localhost(mailfrom:baolin.wang@linux.alibaba.com fp:SMTPD_---0WcGls7-_1748507043 cluster:ay36) by smtp.aliyun-inc.com; Thu, 29 May 2025 16:24:03 +0800 From: Baolin Wang To: akpm@linux-foundation.org, hughd@google.com, david@redhat.com Cc: lorenzo.stoakes@oracle.com, Liam.Howlett@oracle.com, npache@redhat.com, ryan.roberts@arm.com, dev.jain@arm.com, ziy@nvidia.com, baolin.wang@linux.alibaba.com, linux-mm@kvack.org, linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 1/2] mm: huge_memory: disallow hugepages if the system-wide THP sysfs settings are disabled Date: Thu, 29 May 2025 16:23:54 +0800 Message-ID: X-Mailer: git-send-email 2.43.5 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The MADV_COLLAPSE will ignore the system-wide Anon THP sysfs settings, which means that even though we have disabled the Anon THP configuration, MADV_CO= LLAPSE will still attempt to collapse into a Anon THP. This violates the rule we h= ave agreed upon: never means never. To address this issue, should check whether the Anon THP configuration is d= isabled in thp_vma_allowable_orders(), even when the TVA_ENFORCE_SYSFS flag is set. Signed-off-by: Baolin Wang Reviewed-by: Zi Yan --- include/linux/huge_mm.h | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 2f190c90192d..199ddc9f04a1 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -287,20 +287,35 @@ unsigned long thp_vma_allowable_orders(struct vm_area= _struct *vma, unsigned long orders) { /* Optimization to check if required orders are enabled early. */ - if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) { - unsigned long mask =3D READ_ONCE(huge_anon_orders_always); + if (vma_is_anonymous(vma)) { + unsigned long always =3D READ_ONCE(huge_anon_orders_always); + unsigned long madvise =3D READ_ONCE(huge_anon_orders_madvise); + unsigned long inherit =3D READ_ONCE(huge_anon_orders_inherit); + unsigned long mask =3D always | madvise; + + /* + * If the system-wide THP/mTHP sysfs settings are disabled, + * then we should never allow hugepages. + */ + if (!(mask & orders) && !(hugepage_global_enabled() && (inherit & orders= ))) + return 0; + + if (!(tva_flags & TVA_ENFORCE_SYSFS)) + goto skip; =20 + mask =3D always; if (vm_flags & VM_HUGEPAGE) - mask |=3D READ_ONCE(huge_anon_orders_madvise); + mask |=3D madvise; if (hugepage_global_always() || ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled())) - mask |=3D READ_ONCE(huge_anon_orders_inherit); + mask |=3D inherit; =20 orders &=3D mask; if (!orders) return 0; } =20 +skip: return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders); } =20 --=20 2.43.5 From nobody Tue Dec 16 06:14:42 2025 Received: from out30-100.freemail.mail.aliyun.com (out30-100.freemail.mail.aliyun.com [115.124.30.100]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CE54B1EB2F; Thu, 29 May 2025 08:24:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=115.124.30.100 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1748507052; cv=none; b=oBNDbokM/LJHYXFb8tdkoTNzFPbX8CjlQn7wGhjVoRY5+cwExhg0TkGN5+z24kaWHN3lZmuHTrF705W5iv/yaPi3LtNKETpt1qhUu5mAkprWSSUVsX/+SBwoioBWSIEScnbYfpWUTjAlzLp+fsOYyyUVHx+nHTO86d1/CIulO0o= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1748507052; c=relaxed/simple; bh=RIJKgny14IykmPjf5Bf9kyBbM164mxDIhs0UrJ22eDM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Qp9+vCFU1kExuY2OkVHOtRqo9ci9fB0Ff2/UE/EOHqvxSBQs4J/iEBS6yiFVrgKDNXB3jEZx/f4upZxeeN1ugEDLMzCLlrI3H5vtyw8xZjFgqPnVVnnsuHr+Z4t00JDUwP+ji4HpxclzXQXTRTegcahhWWlEuPLLxwTaKWySE3Y= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.alibaba.com; spf=pass smtp.mailfrom=linux.alibaba.com; dkim=pass (1024-bit key) header.d=linux.alibaba.com header.i=@linux.alibaba.com header.b=G8E6sIz7; arc=none smtp.client-ip=115.124.30.100 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.alibaba.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.alibaba.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.alibaba.com header.i=@linux.alibaba.com header.b="G8E6sIz7" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.alibaba.com; s=default; t=1748507046; h=From:To:Subject:Date:Message-ID:MIME-Version; bh=HepIRzU9yVfyc0yQbJg8CFCDS1fs2gXvJNO7QTz5Ovs=; b=G8E6sIz7puwJ/OZer7Hdvi/DmUH9PaCh3Vh0Cr0DywuXQvUiqRq+GgLws7DKfzr+99rhSFN3J4DrdEGzMCRbHSUubCS8eAXWDXTmRB9XKS/kbreDiGCryU3vmKMZEsDvM8yAyMVjSj5DUlWXRSB6SHYeYwqlZpDUIiek+eeXUSk= Received: from localhost(mailfrom:baolin.wang@linux.alibaba.com fp:SMTPD_---0WcGcFkd_1748507044 cluster:ay36) by smtp.aliyun-inc.com; Thu, 29 May 2025 16:24:05 +0800 From: Baolin Wang To: akpm@linux-foundation.org, hughd@google.com, david@redhat.com Cc: lorenzo.stoakes@oracle.com, Liam.Howlett@oracle.com, npache@redhat.com, ryan.roberts@arm.com, dev.jain@arm.com, ziy@nvidia.com, baolin.wang@linux.alibaba.com, linux-mm@kvack.org, linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 2/2] mm: shmem: disallow hugepages if the system-wide shmem THP sysfs settings are disabled Date: Thu, 29 May 2025 16:23:55 +0800 Message-ID: X-Mailer: git-send-email 2.43.5 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The MADV_COLLAPSE will ignore the system-wide shmem THP sysfs settings, whi= ch means that even though we have disabled the shmem THP configuration, MADV_C= OLLAPSE will still attempt to collapse into a shmem THP. This violates the rule we = have agreed upon: never means never. Then the current strategy is: For shmem, if none of always, madvise, within_size, and inherit have enabled PMD-sized mTHP, then MADV_COLLAPSE will be prohibited from collapsing PMD-s= ized mTHP. For tmpfs, if the mount option is set with the 'huge=3Dnever' parameter, th= en MADV_COLLAPSE will be prohibited from collapsing PMD-sized mTHP. Signed-off-by: Baolin Wang Acked-by: Zi Yan --- mm/huge_memory.c | 2 +- mm/shmem.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d3e66136e41a..a8cfa37cae72 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -166,7 +166,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area= _struct *vma, * own flags. */ if (!in_pf && shmem_file(vma->vm_file)) - return shmem_allowable_huge_orders(file_inode(vma->vm_file), + return orders & shmem_allowable_huge_orders(file_inode(vma->vm_file), vma, vma->vm_pgoff, 0, !enforce_sysfs); =20 diff --git a/mm/shmem.c b/mm/shmem.c index 4b42419ce6b2..4dbb28d85cd9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -613,7 +613,7 @@ static unsigned int shmem_get_orders_within_size(struct= inode *inode, } =20 static unsigned int shmem_huge_global_enabled(struct inode *inode, pgoff_t= index, - loff_t write_end, bool shmem_huge_force, + loff_t write_end, struct vm_area_struct *vma, unsigned long vm_flags) { @@ -625,7 +625,7 @@ static unsigned int shmem_huge_global_enabled(struct in= ode *inode, pgoff_t index return 0; if (shmem_huge =3D=3D SHMEM_HUGE_DENY) return 0; - if (shmem_huge_force || shmem_huge =3D=3D SHMEM_HUGE_FORCE) + if (shmem_huge =3D=3D SHMEM_HUGE_FORCE) return maybe_pmd_order; =20 /* @@ -860,7 +860,7 @@ static unsigned long shmem_unused_huge_shrink(struct sh= mem_sb_info *sbinfo, } =20 static unsigned int shmem_huge_global_enabled(struct inode *inode, pgoff_t= index, - loff_t write_end, bool shmem_huge_force, + loff_t write_end, struct vm_area_struct *vma, unsigned long vm_flags) { @@ -1261,7 +1261,7 @@ static int shmem_getattr(struct mnt_idmap *idmap, STATX_ATTR_NODUMP); generic_fillattr(idmap, request_mask, inode, stat); =20 - if (shmem_huge_global_enabled(inode, 0, 0, false, NULL, 0)) + if (shmem_huge_global_enabled(inode, 0, 0, NULL, 0)) stat->blksize =3D HPAGE_PMD_SIZE; =20 if (request_mask & STATX_BTIME) { @@ -1768,7 +1768,7 @@ unsigned long shmem_allowable_huge_orders(struct inod= e *inode, return 0; =20 global_orders =3D shmem_huge_global_enabled(inode, index, write_end, - shmem_huge_force, vma, vm_flags); + vma, vm_flags); /* Tmpfs huge pages allocation */ if (!vma || !vma_is_anon_shmem(vma)) return global_orders; @@ -1790,7 +1790,7 @@ unsigned long shmem_allowable_huge_orders(struct inod= e *inode, /* Allow mTHP that will be fully within i_size. */ mask |=3D shmem_get_orders_within_size(inode, within_size_orders, index, = 0); =20 - if (vm_flags & VM_HUGEPAGE) + if (shmem_huge_force || (vm_flags & VM_HUGEPAGE)) mask |=3D READ_ONCE(huge_shmem_orders_madvise); =20 if (global_orders > 0) --=20 2.43.5