From nobody Sat Feb 7 13:41:11 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A14FC230BF8 for ; Mon, 10 Mar 2025 17:23:25 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627405; cv=none; b=R9AUVl7jNb7d9xcllD7KgfF0S3sFj/RSkpG94CZ+1r/PHpi9+qzSuC4/Bwd7j1FIxQ8tw+DLpTKP7Mk/FbjqI7tpxAS5U4LJVTJnjozBlpLjcR9/kBaGE1Im26LjaEnpvxBGlir7q2oW+7pocIpzrwEZrteasYWHJyuJFdwf8hw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627405; c=relaxed/simple; bh=aKpudoyGMo+z+VXYP9y3gu4MvjeTt1j3Cq/sd3oN+kg=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=Jgopt+3IvAu0n9Nxtbr/vb+5c89VqzmS+rgDwd5UGIqljuoq3LoyE5qWmhx5+kALz0eheoseOWCjVVMCeVQBr6cxyDcrSQ7dYQgvjBOiyZrWZ4nTvmOcqfyvR3GL/gyuNBrivbGk+mXmb0OuavntNQw+qAPgVXeuFdGlyPNslp0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=e1BK93rR; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="e1BK93rR" Received: by smtp.kernel.org (Postfix) with ESMTPSA id EF349C4CEE5; Mon, 10 Mar 2025 17:23:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1741627405; bh=aKpudoyGMo+z+VXYP9y3gu4MvjeTt1j3Cq/sd3oN+kg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=e1BK93rRkUd3fN3j1E7lnnyaxNRQGTW5BEIjfLCMLxhVvoBz+4jSN85j3VHnm26J6 3ZruHED9gH/KJIR2LwXrwm83oumL2S0dnHxf56U2+HlbZZlVPURpjRX5+KJ0dzTjAY WVGyOAvhEvEh2nwRjdApfUatSmV5bdVGpbVs0cfxEwEuf6SyXlKjjW3gtiKAByCuy7 coXHx5hVZzpuPzyjBbvt+Mgxj6vZqqm30Kxl8DKnklVs7qfC/xDvmHJ6KzSBHe0x4z vtTHOMLgSkugJ/9YYnTS2e0AY38HDjWOgp4TvS5tfLyt9gE/ruVGDyrKVtAbKFX7RO GkthFUtl2sOFQ== From: SeongJae Park To: Andrew Morton Cc: SeongJae Park , "Liam R. Howlett" , David Hildenbrand , Lorenzo Stoakes , Shakeel Butt , Vlastimil Babka , kernel-team@meta.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH 1/9] mm/madvise: use is_memory_failure() from madvise_do_behavior() Date: Mon, 10 Mar 2025 10:23:10 -0700 Message-Id: <20250310172318.653630-2-sj@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250310172318.653630-1-sj@kernel.org> References: <20250310172318.653630-1-sj@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" To reduce redundant open-coded checks of CONFIG_MEMORY_FAILURE and MADV_{HWPOISON,SOFT_OFFLINE} in madvise_[un]lock(), is_memory_failure() has introduced. madvise_do_behavior() is still doing the same open-coded check, though. Use is_memory_failure() instead. To avoid build failure on !CONFIG_MEMORY_FAILURE case, implement an empty madvise_inject_error() under the config. Also move the definition of is_memory_failure() inside #ifdef CONFIG_MEMORY_FAILURE clause for madvise_inject_error() definition, to reduce duplicated ifdef clauses. Signed-off-by: SeongJae Park Reviewed-by: Lorenzo Stoakes --- mm/madvise.c | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 388dc289b5d1..c3ab1f283b18 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1392,7 +1392,32 @@ static int madvise_inject_error(int behavior, =20 return 0; } -#endif + +static bool is_memory_failure(int behavior) +{ + switch (behavior) { + case MADV_HWPOISON: + case MADV_SOFT_OFFLINE: + return true; + default: + return false; + } +} + +#else + +static int madvise_inject_error(int behavior, + unsigned long start, unsigned long end) +{ + return 0; +} + +static bool is_memory_failure(int behavior) +{ + return false; +} + +#endif /* CONFIG_MEMORY_FAILURE */ =20 static bool madvise_behavior_valid(int behavior) @@ -1569,24 +1594,6 @@ int madvise_set_anon_name(struct mm_struct *mm, unsi= gned long start, } #endif /* CONFIG_ANON_VMA_NAME */ =20 -#ifdef CONFIG_MEMORY_FAILURE -static bool is_memory_failure(int behavior) -{ - switch (behavior) { - case MADV_HWPOISON: - case MADV_SOFT_OFFLINE: - return true; - default: - return false; - } -} -#else -static bool is_memory_failure(int behavior) -{ - return false; -} -#endif - static int madvise_lock(struct mm_struct *mm, int behavior) { if (is_memory_failure(behavior)) @@ -1640,10 +1647,8 @@ static int madvise_do_behavior(struct mm_struct *mm, unsigned long end; int error; =20 -#ifdef CONFIG_MEMORY_FAILURE - if (behavior =3D=3D MADV_HWPOISON || behavior =3D=3D MADV_SOFT_OFFLINE) + if (is_memory_failure(behavior)) return madvise_inject_error(behavior, start, start + len_in); -#endif start =3D untagged_addr_remote(mm, start); end =3D start + len; =20 --=20 2.39.5 From nobody Sat Feb 7 13:41:11 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 69051232363 for ; Mon, 10 Mar 2025 17:23:26 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627406; cv=none; b=kM2JzJAU6WLi7qozuoWE0xztSedKP9KcyV2DBHOr6/gBgSMpZjygV/C1BtdqVCmoTvi8dhZxxR96NdP1AsXzSzhpz/X8EpM750K9gvkDjP7qGeFTICPiRRceineMEMy+wuVxswFmpSKzdnQ8Ic1twmoWVjywTc4KkBHIJEZIrVA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627406; c=relaxed/simple; bh=M9/wAyNFr4MzyO45yCxu/y9IRXgOWJVU4DhwCYnIYO8=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=jkknpryWo0jP4p0S29Xo+bbCYjC+cQhYXmps/gbRFodIE8JxoeWGLmyAtIO4a9EwEuIOJMFzzZjEonWJldYpJ7WdRSKOg3jZHla/0c3jy2sdFTfH+q6JdUgcXLdXXykBot4uRwZoA/S157ZkcF9wphIpsRJ0elCYJIAO5076D+k= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=QLodKnMG; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="QLodKnMG" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 15460C4CEE5; Mon, 10 Mar 2025 17:23:26 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1741627406; bh=M9/wAyNFr4MzyO45yCxu/y9IRXgOWJVU4DhwCYnIYO8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=QLodKnMGunZymOsXhiHwJL3CZN56ph7GyHUBQSnqvbfDZM4FdddyE6UwjyywHRHS1 cQupnW2BYiokTpHo7gTlm1+TlGjayuwf2a+xu80NDLVwzT7QoRMat5a/pBCcLPviUG I2u/9J8exOAV+qOehvNS3kdd/WwNK1e2uTbFksC+1ZA2Dd+i1z2axiZf4RF4O72SYs rRuDT2HOdcp+kQiTqlDhnnFIhMVXzyLGCSCcIu6/KGDv+TdsZi4dur6hETHZOeiWVw q0K7MtgfIqjtRTafPny8uDtlEfc/dSMYN9Pd++aK1UcAwdsXxrPKtRSMlm5C0KuB18 Lwmc/eOHDhANA== From: SeongJae Park To: Andrew Morton Cc: SeongJae Park , "Liam R. Howlett" , David Hildenbrand , Lorenzo Stoakes , Shakeel Butt , Vlastimil Babka , kernel-team@meta.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH 2/9] mm/madvise: split out populate behavior check logic Date: Mon, 10 Mar 2025 10:23:11 -0700 Message-Id: <20250310172318.653630-3-sj@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250310172318.653630-1-sj@kernel.org> References: <20250310172318.653630-1-sj@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" madvise_do_behavior() has a long open-coded 'behavior' check for MADV_POPULATE_{READ,WRITE}. It adds multiple layers[1] and make the code arguably take longer time to read. Like is_memory_failure(), split out the check to a separate function. This is not technically removing the additional layer but discourage further extending the switch-case. Also it makes madvise_do_behavior() code shorter and therefore easier to read. [1] https://lore.kernel.org/bd6d0bf1-c79e-46bd-a810-9791efb9ad73@lucifer.lo= cal Signed-off-by: SeongJae Park Reviewed-by: Lorenzo Stoakes --- mm/madvise.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index c3ab1f283b18..611db868ae38 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1640,6 +1640,17 @@ static bool is_valid_madvise(unsigned long start, si= ze_t len_in, int behavior) return true; } =20 +static bool is_madvise_populate(int behavior) +{ + switch (behavior) { + case MADV_POPULATE_READ: + case MADV_POPULATE_WRITE: + return true; + default: + return false; + } +} + static int madvise_do_behavior(struct mm_struct *mm, unsigned long start, size_t len_in, size_t len, int behavior) { @@ -1653,16 +1664,11 @@ static int madvise_do_behavior(struct mm_struct *mm, end =3D start + len; =20 blk_start_plug(&plug); - switch (behavior) { - case MADV_POPULATE_READ: - case MADV_POPULATE_WRITE: + if (is_madvise_populate(behavior)) error =3D madvise_populate(mm, start, end, behavior); - break; - default: + else error =3D madvise_walk_vmas(mm, start, end, behavior, madvise_vma_behavior); - break; - } blk_finish_plug(&plug); return error; } --=20 2.39.5 From nobody Sat Feb 7 13:41:11 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D6F53231CB0 for ; Mon, 10 Mar 2025 17:23:27 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627407; cv=none; b=rPhKIRDEogVoZlXfWXODCzYm2yN+zdUGDJT0NSvRjYAy4MStWjwDeglPRZumbd702ody9z+kCuMl1KGhUPkKqP6adJSeBdGKOkjrPacwX/EmYhJHb5MtCQzUEqaUFOoXZX3rFaUgynUMwmQIJjPYrZldpuoo/sJmFGwVaNwx5Y4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627407; c=relaxed/simple; bh=wUeeTxoF6/W5BNmB2wyQPZvoKyORH7hO5pM78LtrSAM=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=C6suPgmxVUUJpGa+mfbkKlMIeyNbbWpwS9G7IGIqLDBPjMXZMCUG0ct7c/CByqfhA9el9aKjVdsQ6yRchj12CGu0+ddw7jwkutlCSKJetpXUTWAoFg7vYx2hiE3HM9mY8Du+D8rsBZK1atPmh2cq7wzi7lEpRomSx+Ek5DrWFZI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=cfK2CND5; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="cfK2CND5" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3427AC4CEEC; Mon, 10 Mar 2025 17:23:27 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1741627407; bh=wUeeTxoF6/W5BNmB2wyQPZvoKyORH7hO5pM78LtrSAM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=cfK2CND5xWYz9nn8nUacYGS/fBI3F77IP5i/AnejOywNLUUScO7lFdcvtIqseomMj EeJnrxKN9JefLc0Rx5KoOLV0nimE3wn8gMM9f93f+CvKNBenCp2BXokS8fjdiwdmf3 R0qA0M9s5Fe9S+kBRKu5nS7Pzh4vE4wTRWzZe8RA28TE9ujg9vagLeypltyUywXpui DWQjz+kPfXe/Ghjo7OpB2glKV/S/A72l3FZVrrNFJR0QOlQKQAjp7Tz2V6EgPm4ql9 yASG4MgVpytpi5cvrfDIYPvKpJ55rstceXLqloJMGS4JTTgxOFrEeZzirMoDK+wrpc pvGTGCY2URD0g== From: SeongJae Park To: Andrew Morton Cc: SeongJae Park , "Liam R. Howlett" , David Hildenbrand , Lorenzo Stoakes , Shakeel Butt , Vlastimil Babka , kernel-team@meta.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH 3/9] mm/madvise: deduplicate madvise_do_behavior() skip case handlings Date: Mon, 10 Mar 2025 10:23:12 -0700 Message-Id: <20250310172318.653630-4-sj@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250310172318.653630-1-sj@kernel.org> References: <20250310172318.653630-1-sj@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The logic for checking if a given madvise() request for a single memory range can skip real work, namely madvise_do_behavior(), is duplicated in do_madvise() and vector_madvise(). Split out the logic to a function and resue it. Signed-off-by: SeongJae Park Reviewed-by: Lorenzo Stoakes --- mm/madvise.c | 53 +++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 611db868ae38..764ec1f2475b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1640,6 +1640,27 @@ static bool is_valid_madvise(unsigned long start, si= ze_t len_in, int behavior) return true; } =20 +/* + * madvise_should_skip() - Return if an madivse request can skip real work= s. + * @start: Start address of madvise-requested address range. + * @len_in: Length of madvise-requested address range. + * @behavior: Requested madvise behavor. + * @err: Pointer to store an error code from the check. + */ +static bool madvise_should_skip(unsigned long start, size_t len_in, + int behavior, int *err) +{ + if (!is_valid_madvise(start, len_in, behavior)) { + *err =3D -EINVAL; + return true; + } + if (start + PAGE_ALIGN(len_in) =3D=3D start) { + *err =3D 0; + return true; + } + return false; +} + static bool is_madvise_populate(int behavior) { switch (behavior) { @@ -1747,23 +1768,15 @@ static int madvise_do_behavior(struct mm_struct *mm, */ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, i= nt behavior) { - unsigned long end; int error; - size_t len; - - if (!is_valid_madvise(start, len_in, behavior)) - return -EINVAL; - - len =3D PAGE_ALIGN(len_in); - end =3D start + len; - - if (end =3D=3D start) - return 0; =20 + if (madvise_should_skip(start, len_in, behavior, &error)) + return error; error =3D madvise_lock(mm, behavior); if (error) return error; - error =3D madvise_do_behavior(mm, start, len_in, len, behavior); + error =3D madvise_do_behavior(mm, start, len_in, PAGE_ALIGN(len_in), + behavior); madvise_unlock(mm, behavior); =20 return error; @@ -1790,19 +1803,13 @@ static ssize_t vector_madvise(struct mm_struct *mm,= struct iov_iter *iter, while (iov_iter_count(iter)) { unsigned long start =3D (unsigned long)iter_iov_addr(iter); size_t len_in =3D iter_iov_len(iter); - size_t len; - - if (!is_valid_madvise(start, len_in, behavior)) { - ret =3D -EINVAL; - break; - } + int error; =20 - len =3D PAGE_ALIGN(len_in); - if (start + len =3D=3D start) - ret =3D 0; + if (madvise_should_skip(start, len_in, behavior, &error)) + ret =3D error; else - ret =3D madvise_do_behavior(mm, start, len_in, len, - behavior); + ret =3D madvise_do_behavior(mm, start, len_in, + PAGE_ALIGN(len_in), behavior); /* * An madvise operation is attempting to restart the syscall, * but we cannot proceed as it would not be correct to repeat --=20 2.39.5 From nobody Sat Feb 7 13:41:11 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2A1192343C9 for ; Mon, 10 Mar 2025 17:23:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627409; cv=none; b=O2hjtiSDzkl0CZIvAsoNdEBoz+HGTd4F6qC3aGFfv7/ui4bf8N5h1kch1xh1zXeRA/jr5OPd5wX07t1VK5+ynsxNthJ6qAByBTXS6t2Pc68J9YzFgWZ1/jfzkJLaYEhssomzp70/M/twryCya2qvrZIbGgNlIMq/xymIiE1NYig= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627409; c=relaxed/simple; bh=Z3sSqaG1uhkCQqbFP7NKK185LV7FFPBVBGM4l1BKW9I=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=iqgtaKHuSGpsvR+3esnN5O3u33YqFActA1xJdsuUE4RqIQ9Z8j16ihlFNif2n7idpMN2dAtWaKFa5wO83oWgcsP5EGSCTs7efl7z+BZCi8TwDP/EVx7sPQPTIb6ARKll+/dkjt7DRh1x8jB+hWd/cToPg2lB96vj8cAkD6jfIV0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=GElwyusP; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="GElwyusP" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6388BC4CEEC; Mon, 10 Mar 2025 17:23:28 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1741627408; bh=Z3sSqaG1uhkCQqbFP7NKK185LV7FFPBVBGM4l1BKW9I=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=GElwyusP/WtY+XI/onO9LKP/4XGLXdBGbtjzZdwAGOE+F2xfTt6R/VDsNLdlx+0oZ BBKfLvM7iAu/KTC55Szh02Mfi4ajdSBhMaMRPxk7Dju5LCrOFP4yvqRnEraUQNijrT 85tjmyujh2l7Yj4mEk5zlXDgXBw3pseUyDEbTrDQ7NCSP0ohbe/IpYetUZD5quIBFk WRytS3AOLmgHHaThSSqghYVqfm03VCfkdjN9OGQZ0M63Ipvt6NLtyTfhQNsTGzenTU eASBFEQCg+i6eFPetX/VEhwS5Qx/W4Quts1/SEA7O6vOaH/e+/YIqOC6CeMogn+Gmz 0+QB8QKk5/H8Q== From: SeongJae Park To: Andrew Morton Cc: SeongJae Park , "Liam R. Howlett" , David Hildenbrand , Lorenzo Stoakes , Shakeel Butt , Vlastimil Babka , kernel-team@meta.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH 4/9] mm/madvise: remove len parameter of madvise_do_behavior() Date: Mon, 10 Mar 2025 10:23:13 -0700 Message-Id: <20250310172318.653630-5-sj@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250310172318.653630-1-sj@kernel.org> References: <20250310172318.653630-1-sj@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Because madise_should_skip() logic is factored out, making madvise_do_behavior() calculates 'len' on its own rather then receiving it as a parameter makes code simpler. Remove the parameter. Signed-off-by: SeongJae Park Reviewed-by: Lorenzo Stoakes --- mm/madvise.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 764ec1f2475b..469c25690a0e 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1673,7 +1673,7 @@ static bool is_madvise_populate(int behavior) } =20 static int madvise_do_behavior(struct mm_struct *mm, - unsigned long start, size_t len_in, size_t len, int behavior) + unsigned long start, size_t len_in, int behavior) { struct blk_plug plug; unsigned long end; @@ -1682,7 +1682,7 @@ static int madvise_do_behavior(struct mm_struct *mm, if (is_memory_failure(behavior)) return madvise_inject_error(behavior, start, start + len_in); start =3D untagged_addr_remote(mm, start); - end =3D start + len; + end =3D start + PAGE_ALIGN(len_in); =20 blk_start_plug(&plug); if (is_madvise_populate(behavior)) @@ -1775,8 +1775,7 @@ int do_madvise(struct mm_struct *mm, unsigned long st= art, size_t len_in, int beh error =3D madvise_lock(mm, behavior); if (error) return error; - error =3D madvise_do_behavior(mm, start, len_in, PAGE_ALIGN(len_in), - behavior); + error =3D madvise_do_behavior(mm, start, len_in, behavior); madvise_unlock(mm, behavior); =20 return error; @@ -1808,8 +1807,7 @@ static ssize_t vector_madvise(struct mm_struct *mm, s= truct iov_iter *iter, if (madvise_should_skip(start, len_in, behavior, &error)) ret =3D error; else - ret =3D madvise_do_behavior(mm, start, len_in, - PAGE_ALIGN(len_in), behavior); + ret =3D madvise_do_behavior(mm, start, len_in, behavior); /* * An madvise operation is attempting to restart the syscall, * but we cannot proceed as it would not be correct to repeat --=20 2.39.5 From nobody Sat Feb 7 13:41:11 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C042723534F for ; Mon, 10 Mar 2025 17:23:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627409; cv=none; b=NeWADjDKx9Dmxt6jBX1Hcjd5o+Kk53ecpx41o89QE485tgTf9ifxLkqRbapWD8HPBr+BypW3Uac2AQLdqt3L8glC/5TMygUJRQpGOgZiX1s/OSo4bHEtHMKTJOCVZH61Va1M21rIQVYHwzyP6ZNpCUJLDee87inksMU94/NdWL0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627409; c=relaxed/simple; bh=4dhdwBK3uXi/DICcheMKZstN40gWxuAC0U5kXen1d3Q=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=raBedTbWsx7mvusBbxyyzD7neGGiLTLiAeGrCI+pRhMTnioVi+2CS4vlHw4QLMsCfnE5eK/BNLpGVcZOZfORk/n87lar5eona2gn1Fi0UQUZbautMnOe8sHW8pry2yqDR3KrHVol/YfhkFFyXle5Dyr87EL4RcCo/IU78vvEB40= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=hQaulE8A; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="hQaulE8A" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 77A85C4CEF7; Mon, 10 Mar 2025 17:23:29 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1741627409; bh=4dhdwBK3uXi/DICcheMKZstN40gWxuAC0U5kXen1d3Q=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=hQaulE8A/QMb9rjKSApl6fMLGmwITsJNjmDE/wylUwceFMbDOjyBzaCG+ymEcYxtB yac+cSxxdxCG0sffnMnnapqPgVDofYfA7hCavPyXj62MObV73yNUVRcBVPpbbJ+pXp Ao1+ptYPErt93XP/gPcnFn4ayqyp1rMQRPBxYKY2tUtmV9FZGignk+VK9guNz7H9B2 Db0uGgEW7OgM/N+Dwnj8hWTSmB634uk7VEeM0J0r06rbD/IUwDR8mmoXV/y82Sgtj+ lO1ipuZJ8GoK4eZQJxxAMeDsw78WXXsY330Uy2c35Zl4OjMo9XCFhWMFlEFss4ZzH7 QeXzSHEmPnwyg== From: SeongJae Park To: Andrew Morton Cc: SeongJae Park , "Liam R. Howlett" , David Hildenbrand , Lorenzo Stoakes , Shakeel Butt , Vlastimil Babka , kernel-team@meta.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH 5/9] mm/madvise: define and use madvise_behavior struct for madvise_do_behavior() Date: Mon, 10 Mar 2025 10:23:14 -0700 Message-Id: <20250310172318.653630-6-sj@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250310172318.653630-1-sj@kernel.org> References: <20250310172318.653630-1-sj@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" To implement batched tlb flushes for MADV_DONTNEED[_LOCKED] and MADV_FREE, an mmu_gather object in addition to the behavior integer need to be passed to the internal logics. Using a struct can make it easy without increasing the number of parameters of all code paths towards the internal logic. Define a struct for the purpose and use it on the code path that starts from madvise_do_behavior() and ends on madvise_dontneed_free(). Signed-off-by: SeongJae Park Reviewed-by: Lorenzo Stoakes --- mm/madvise.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 469c25690a0e..ba2a78795207 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -890,11 +890,16 @@ static bool madvise_dontneed_free_valid_vma(struct vm= _area_struct *vma, return true; } =20 +struct madvise_behavior { + int behavior; +}; + static long madvise_dontneed_free(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, - int behavior) + struct madvise_behavior *madv_behavior) { + int behavior =3D madv_behavior->behavior; struct mm_struct *mm =3D vma->vm_mm; =20 *prev =3D vma; @@ -1249,8 +1254,10 @@ static long madvise_guard_remove(struct vm_area_stru= ct *vma, static int madvise_vma_behavior(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, - unsigned long behavior) + void *behavior_arg) { + struct madvise_behavior *arg =3D behavior_arg; + int behavior =3D arg->behavior; int error; struct anon_vma_name *anon_name; unsigned long new_flags =3D vma->vm_flags; @@ -1270,7 +1277,7 @@ static int madvise_vma_behavior(struct vm_area_struct= *vma, case MADV_FREE: case MADV_DONTNEED: case MADV_DONTNEED_LOCKED: - return madvise_dontneed_free(vma, prev, start, end, behavior); + return madvise_dontneed_free(vma, prev, start, end, arg); case MADV_NORMAL: new_flags =3D new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; break; @@ -1487,10 +1494,10 @@ static bool process_madvise_remote_valid(int behavi= or) */ static int madvise_walk_vmas(struct mm_struct *mm, unsigned long start, - unsigned long end, unsigned long arg, + unsigned long end, void *arg, int (*visit)(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, - unsigned long end, unsigned long arg)) + unsigned long end, void *arg)) { struct vm_area_struct *vma; struct vm_area_struct *prev; @@ -1548,7 +1555,7 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned = long start, static int madvise_vma_anon_name(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, - unsigned long anon_name) + void *anon_name) { int error; =20 @@ -1557,7 +1564,7 @@ static int madvise_vma_anon_name(struct vm_area_struc= t *vma, return -EBADF; =20 error =3D madvise_update_vma(vma, prev, start, end, vma->vm_flags, - (struct anon_vma_name *)anon_name); + anon_name); =20 /* * madvise() returns EAGAIN if kernel resources, such as @@ -1589,7 +1596,7 @@ int madvise_set_anon_name(struct mm_struct *mm, unsig= ned long start, if (end =3D=3D start) return 0; =20 - return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name, + return madvise_walk_vmas(mm, start, end, anon_name, madvise_vma_anon_name); } #endif /* CONFIG_ANON_VMA_NAME */ @@ -1673,8 +1680,10 @@ static bool is_madvise_populate(int behavior) } =20 static int madvise_do_behavior(struct mm_struct *mm, - unsigned long start, size_t len_in, int behavior) + unsigned long start, size_t len_in, + struct madvise_behavior *madv_behavior) { + int behavior =3D madv_behavior->behavior; struct blk_plug plug; unsigned long end; int error; @@ -1688,7 +1697,7 @@ static int madvise_do_behavior(struct mm_struct *mm, if (is_madvise_populate(behavior)) error =3D madvise_populate(mm, start, end, behavior); else - error =3D madvise_walk_vmas(mm, start, end, behavior, + error =3D madvise_walk_vmas(mm, start, end, madv_behavior, madvise_vma_behavior); blk_finish_plug(&plug); return error; @@ -1769,13 +1778,14 @@ static int madvise_do_behavior(struct mm_struct *mm, int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, i= nt behavior) { int error; + struct madvise_behavior madv_behavior =3D {.behavior =3D behavior}; =20 if (madvise_should_skip(start, len_in, behavior, &error)) return error; error =3D madvise_lock(mm, behavior); if (error) return error; - error =3D madvise_do_behavior(mm, start, len_in, behavior); + error =3D madvise_do_behavior(mm, start, len_in, &madv_behavior); madvise_unlock(mm, behavior); =20 return error; @@ -1792,6 +1802,7 @@ static ssize_t vector_madvise(struct mm_struct *mm, s= truct iov_iter *iter, { ssize_t ret =3D 0; size_t total_len; + struct madvise_behavior madv_behavior =3D {.behavior =3D behavior}; =20 total_len =3D iov_iter_count(iter); =20 @@ -1807,7 +1818,8 @@ static ssize_t vector_madvise(struct mm_struct *mm, s= truct iov_iter *iter, if (madvise_should_skip(start, len_in, behavior, &error)) ret =3D error; else - ret =3D madvise_do_behavior(mm, start, len_in, behavior); + ret =3D madvise_do_behavior(mm, start, len_in, + &madv_behavior); /* * An madvise operation is attempting to restart the syscall, * but we cannot proceed as it would not be correct to repeat --=20 2.39.5 From nobody Sat Feb 7 13:41:11 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D33A52356CA for ; Mon, 10 Mar 2025 17:23:30 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627410; cv=none; b=AvpwlDSeJnh0dxj+0RMv3MJFfa6VJOz0PIzA13AdqD9pGeeTDhzg6bcxTU0zuW4P7LvZ1ERJ+zuiAX+mNybIAIM1i8trKdMdVPWG2m/OVLGxsFhgslVr8YPtJLrGVcG5Xi3H13zZPzxLn7nkBAK2J0uyodTOHJJEsDKV/vvNDWw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627410; c=relaxed/simple; bh=zqoCyf4FJzGNezZ0ikot3jkjvp+Q80Kl3ZyQNT+Fx4A=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=SF9o4la9aUsAUTi+T6okXEXFKD9QRChnDa5cc7v5b32OSQeb/jEtj5+Pwg3IiEEarSnhrH0mpwACFMqK53ctFqS1/YqQDUP/qYeMIcyP41XQipxuaj5Vk8MyF7z+IBxDBAhWUXOEFYG/7G+Vqu90vJ30I/SRT5BJp3SvAOrRXbM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=hYzz/5cX; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="hYzz/5cX" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 8AA1FC4CEF5; Mon, 10 Mar 2025 17:23:30 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1741627410; bh=zqoCyf4FJzGNezZ0ikot3jkjvp+Q80Kl3ZyQNT+Fx4A=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=hYzz/5cXRxoUPQlKUgpos/cWLuZ2KWn45gHyU3U9M75uGnPaM5UcQwf/511qy1Tp2 glCushNsOICZVKgFqNF987iZhCb+LBCWfScQQSy2z8vFAQqxetIkwYZsaB6A7BDeqM tUv73/Id2lgNh4yQOttjQdClVnd7xQMbsU5nybe6xig4aN7Lf/bXIYsmv2PxyGBXfh VLAnoMfZ/TE7CD0N3y+FbIZ+Cx8wkKXwlIWOKgmHD+Jj6vJ35J55zMYCNqUIcHqfZ6 jUqkG1jkGhq0i23slle4gJZMbR5KcqoAVdll1VNfk8SQZre7lOfG2QaUJnROV1GTI6 QX/2LPUg6599g== From: SeongJae Park To: Andrew Morton Cc: SeongJae Park , "Liam R. Howlett" , David Hildenbrand , Lorenzo Stoakes , Shakeel Butt , Vlastimil Babka , kernel-team@meta.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH 6/9] mm/memory: split non-tlb flushing part from zap_page_range_single() Date: Mon, 10 Mar 2025 10:23:15 -0700 Message-Id: <20250310172318.653630-7-sj@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250310172318.653630-1-sj@kernel.org> References: <20250310172318.653630-1-sj@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Some of zap_page_range_single() callers such as [process_]madvise() with MADV_DONEED[_LOCKED] cannot batch tlb flushes because zap_page_range_single() does tlb flushing for each invocation. Split out the body of zap_page_range_single() except mmu_gather object initialization and gathered tlb entries flushing parts for such batched tlb flushing usage. Signed-off-by: SeongJae Park --- mm/memory.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 78c7ee62795e..88c478e2ed1a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1995,38 +1995,46 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_s= tate *mas, mmu_notifier_invalidate_range_end(&range); } =20 -/** - * zap_page_range_single - remove user pages in a given range - * @vma: vm_area_struct holding the applicable pages - * @address: starting address of pages to zap - * @size: number of bytes to zap - * @details: details of shared cache invalidation - * - * The range must fit into one VMA. - */ -void zap_page_range_single(struct vm_area_struct *vma, unsigned long addre= ss, +static void unmap_vma_single(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details) { const unsigned long end =3D address + size; struct mmu_notifier_range range; - struct mmu_gather tlb; =20 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, address, end); hugetlb_zap_begin(vma, &range.start, &range.end); - tlb_gather_mmu(&tlb, vma->vm_mm); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); /* * unmap 'address-end' not 'range.start-range.end' as range * could have been expanded for hugetlb pmd sharing. */ - unmap_single_vma(&tlb, vma, address, end, details, false); + unmap_single_vma(tlb, vma, address, end, details, false); mmu_notifier_invalidate_range_end(&range); - tlb_finish_mmu(&tlb); hugetlb_zap_end(vma, details); } =20 +/** + * zap_page_range_single - remove user pages in a given range + * @vma: vm_area_struct holding the applicable pages + * @address: starting address of pages to zap + * @size: number of bytes to zap + * @details: details of shared cache invalidation + * + * The range must fit into one VMA. + */ +void zap_page_range_single(struct vm_area_struct *vma, unsigned long addre= ss, + unsigned long size, struct zap_details *details) +{ + struct mmu_gather tlb; + + tlb_gather_mmu(&tlb, vma->vm_mm); + unmap_vma_single(&tlb, vma, address, size, details); + tlb_finish_mmu(&tlb); +} + /** * zap_vma_ptes - remove ptes mapping the vma * @vma: vm_area_struct holding ptes to be zapped --=20 2.39.5 From nobody Sat Feb 7 13:41:11 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id F223A231A2A for ; Mon, 10 Mar 2025 17:23:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627412; cv=none; b=NXCCxDCnSHYYvitjIeCt0dr6fSgxDU8Mt+JxcI6/+qB/3WRP6eauVXhpZQt67mymLx0fxVANMsTT15swrYOxFouOf8PYR8/mY5XdLPo3gUBgmeuxyyLmQMCjLna1esxHgoXZdgbkjMZwG8Ck1IA3nGH9UBRsOHPRtpm0ya7Rthg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627412; c=relaxed/simple; bh=RSFUpifN3HuVWoEj944CoZ0VVGpww5B6SbD8iILWaq8=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=B8QvEnJ2h+dvZsCjPLsNuegJJiGnNx1iMcaex2of296UMmPQbqzP4mkMbirvc9fzjDDnx9BxZ2TFwI4s3pJynUE0ULl7yM5FdD7nIek0Ty4GO/pssj9RwjJy3pdhgzZqBT/LdJ5uV7MqGt8c3TOitG+rkRMiu8fLjLDXaziWvbQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=hSx76SP1; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="hSx76SP1" Received: by smtp.kernel.org (Postfix) with ESMTPSA id A5EDEC4CEED; Mon, 10 Mar 2025 17:23:31 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1741627411; bh=RSFUpifN3HuVWoEj944CoZ0VVGpww5B6SbD8iILWaq8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=hSx76SP1aMWn5lYQ40SpWdThFZAVcwsocl3ap/sjfMRB7710mfqiSRU6pLK6zibY+ /JVK/IYLx1GfYEXDCgblSfJbiMNgX96eIfr3qx8F+pOU+KqVLcvH3sqjmx4o8rUgS2 8xu2CsWKaUtKwmy6yCSvJLoHKNOkTkLlB25SLTWdsjZ4GzC0OaJhZtNXn798S5yRAE HHwcXczG6UftPqSdNetvITTqiLoPeMA3WDlTu4ynL3Wkd7HhCMVxVOvpYEGTmTkSf5 Dwo21kDvLQYj+Y2+Ka8/D3qsv0CE7h3+tuFfOS42mpzYM3XRqlPTHO4EyILJQjMfvr oP5hoQkQjgILA== From: SeongJae Park To: Andrew Morton Cc: SeongJae Park , "Liam R. Howlett" , David Hildenbrand , Lorenzo Stoakes , Shakeel Butt , Vlastimil Babka , kernel-team@meta.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH 7/9] mm/madvise: let madvise_{dontneed,free}_single_vma() caller batches tlb flushes Date: Mon, 10 Mar 2025 10:23:16 -0700 Message-Id: <20250310172318.653630-8-sj@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250310172318.653630-1-sj@kernel.org> References: <20250310172318.653630-1-sj@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Update madvise_dontneed_single_vma() and madvise_free_single_vma() functions so that the caller can pass an mmu_gather object that should be initialized and will be finished outside, for batched tlb flushes. Also modify their internal code to support such usage by skipping the initialization and finishing of self-allocated mmu_gather object if it received a valid mmu_gather object. Signed-off-by: SeongJae Park --- mm/internal.h | 3 +++ mm/madvise.c | 37 +++++++++++++++++++++++++------------ mm/memory.c | 16 +++++++++++++--- 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 0caa64dc2cb7..ce7fb2383f65 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -438,6 +438,9 @@ void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, struct zap_details *details); +void unmap_vma_single(struct mmu_gather *tlb, struct vm_area_struct *vma, + unsigned long addr, unsigned long size, + struct zap_details *details); int folio_unmap_invalidate(struct address_space *mapping, struct folio *fo= lio, gfp_t gfp); =20 diff --git a/mm/madvise.c b/mm/madvise.c index ba2a78795207..d7ea71c6422c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -794,12 +794,19 @@ static const struct mm_walk_ops madvise_free_walk_ops= =3D { .walk_lock =3D PGWALK_RDLOCK, }; =20 -static int madvise_free_single_vma(struct vm_area_struct *vma, - unsigned long start_addr, unsigned long end_addr) +static int madvise_free_single_vma( + struct mmu_gather *caller_tlb, struct vm_area_struct *vma, + unsigned long start_addr, unsigned long end_addr) { struct mm_struct *mm =3D vma->vm_mm; struct mmu_notifier_range range; - struct mmu_gather tlb; + struct mmu_gather self_tlb; + struct mmu_gather *tlb; + + if (caller_tlb) + tlb =3D caller_tlb; + else + tlb =3D &self_tlb; =20 /* MADV_FREE works for only anon vma at the moment */ if (!vma_is_anonymous(vma)) @@ -815,16 +822,18 @@ static int madvise_free_single_vma(struct vm_area_str= uct *vma, range.start, range.end); =20 lru_add_drain(); - tlb_gather_mmu(&tlb, mm); + if (!caller_tlb) + tlb_gather_mmu(tlb, mm); update_hiwater_rss(mm); =20 mmu_notifier_invalidate_range_start(&range); - tlb_start_vma(&tlb, vma); + tlb_start_vma(tlb, vma); walk_page_range(vma->vm_mm, range.start, range.end, - &madvise_free_walk_ops, &tlb); - tlb_end_vma(&tlb, vma); + &madvise_free_walk_ops, tlb); + tlb_end_vma(tlb, vma); mmu_notifier_invalidate_range_end(&range); - tlb_finish_mmu(&tlb); + if (!caller_tlb) + tlb_finish_mmu(tlb); =20 return 0; } @@ -848,7 +857,8 @@ static int madvise_free_single_vma(struct vm_area_struc= t *vma, * An interface that causes the system to free clean pages and flush * dirty pages is already available as msync(MS_INVALIDATE). */ -static long madvise_dontneed_single_vma(struct vm_area_struct *vma, +static long madvise_dontneed_single_vma(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct zap_details details =3D { @@ -856,7 +866,10 @@ static long madvise_dontneed_single_vma(struct vm_area= _struct *vma, .even_cows =3D true, }; =20 - zap_page_range_single(vma, start, end - start, &details); + if (!tlb) + zap_page_range_single(vma, start, end - start, &details); + else + unmap_vma_single(tlb, vma, start, end - start, &details); return 0; } =20 @@ -951,9 +964,9 @@ static long madvise_dontneed_free(struct vm_area_struct= *vma, } =20 if (behavior =3D=3D MADV_DONTNEED || behavior =3D=3D MADV_DONTNEED_LOCKED) - return madvise_dontneed_single_vma(vma, start, end); + return madvise_dontneed_single_vma(NULL, vma, start, end); else if (behavior =3D=3D MADV_FREE) - return madvise_free_single_vma(vma, start, end); + return madvise_free_single_vma(NULL, vma, start, end); else return -EINVAL; } diff --git a/mm/memory.c b/mm/memory.c index 88c478e2ed1a..3256b9713cbd 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1995,9 +1995,19 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_st= ate *mas, mmu_notifier_invalidate_range_end(&range); } =20 -static void unmap_vma_single(struct mmu_gather *tlb, - struct vm_area_struct *vma, unsigned long address, - unsigned long size, struct zap_details *details) +/** + * unmap_vma_single - remove user pages in a given range + * @tlb: pointer to the caller's struct mmu_gather + * @vma: vm_area_struct holding the applicable pages + * @address: starting address of the pages + * @size: number of bytes to remove + * @details: details of shared cache invalidation + * + * @tlb shouldn't be NULL. The range must fit into one VMA. + */ +void unmap_vma_single(struct mmu_gather *tlb, struct vm_area_struct *vma, + unsigned long address, unsigned long size, + struct zap_details *details) { const unsigned long end =3D address + size; struct mmu_notifier_range range; --=20 2.39.5 From nobody Sat Feb 7 13:41:11 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 79651236A6A for ; Mon, 10 Mar 2025 17:23:33 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627413; cv=none; b=bnTJOnk7okOCTKuyLBkFEnIvMqzldkK24SF8KyhUYQXwbmvt3CAV3b3zEfvwjwJRUCP670lb6W9ozoF4axP6D+P/deglPO5CpWTHkO0ffzV6zLc1fShAemY/Z8heStdZGLzaaKmBj0rpE9jeveVArSyOHmVIiC8xkF+DqHKYL88= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627413; c=relaxed/simple; bh=XwNyPWkuo3x8UEQgYUucQN44sSUMxDXOQT6wzHvLom0=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=d84ItyLXBmMQegyKWh/BYGIS4C0o2pxRodsiGEspcTmmnxMl1gld4wUuW4E1dfyH5mF2HFYi5ZMH5F9+13ZwcRav6ieWNxZ+T1uDR963X0VET/0+qB+02+fwC9jTFASA2Jd3izgUJpAiAFlNzH1T2fcgfNskFLw/sLx7ivKuuJY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=krWQwApA; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="krWQwApA" Received: by smtp.kernel.org (Postfix) with ESMTPSA id BC673C4CEEC; Mon, 10 Mar 2025 17:23:32 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1741627412; bh=XwNyPWkuo3x8UEQgYUucQN44sSUMxDXOQT6wzHvLom0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=krWQwApAFooJw8RDR+VQklpBSPp/jb0qvzqLu5pCDpb7eW76XF5JyZwxHg4cy4FEU XvMGRSStteW7kyuxgIk/BsxYaqWeRdLQaa6PTij9bEWwE2+rClOI+uMaCmLkdK2M8K 8MzU+FiOthWHwlK/bdFrs0qShtkdkmaAE2UaZYZM8XMrmJtSnawjt/mLWf6uLnDD0d C+kVoxCRMP9beh4TlBf27mnPlUXccx8TLOwatzKscv0CoCZgrpLWWu7zmxnnpVRCyE Liha99fJhrQLYsMoDyQNiPeJZ4C//wNOTCnt8/6+cNVwvl2dr6Jp1ifIZUVfh+ZNIy g/6Q0LWdHYpWA== From: SeongJae Park To: Andrew Morton Cc: SeongJae Park , "Liam R. Howlett" , David Hildenbrand , Lorenzo Stoakes , Shakeel Butt , Vlastimil Babka , kernel-team@meta.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH 8/9] mm/madvise: batch tlb flushes for [process_]madvise(MADV_{DONTNEED[_LOCKED],FREE}) Date: Mon, 10 Mar 2025 10:23:17 -0700 Message-Id: <20250310172318.653630-9-sj@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250310172318.653630-1-sj@kernel.org> References: <20250310172318.653630-1-sj@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" MADV_DONTNEED[_LOCKED] and MADV_FREE internal logics for [process_]madvise() can be invoked with batched tlb flushes. Update vector_madvise() and do_madvise(), which are called for the two system calls respectively, to use those in the efficient way. Initialize an mmu_gather object before starting the internal works, and flush the gathered tlb entries at once after all the internal works are done. Signed-off-by: SeongJae Park --- mm/madvise.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index d7ea71c6422c..d5f4ce3041a4 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -905,6 +905,7 @@ static bool madvise_dontneed_free_valid_vma(struct vm_a= rea_struct *vma, =20 struct madvise_behavior { int behavior; + struct mmu_gather *tlb; }; =20 static long madvise_dontneed_free(struct vm_area_struct *vma, @@ -964,9 +965,11 @@ static long madvise_dontneed_free(struct vm_area_struc= t *vma, } =20 if (behavior =3D=3D MADV_DONTNEED || behavior =3D=3D MADV_DONTNEED_LOCKED) - return madvise_dontneed_single_vma(NULL, vma, start, end); + return madvise_dontneed_single_vma( + madv_behavior->tlb, vma, start, end); else if (behavior =3D=3D MADV_FREE) - return madvise_free_single_vma(NULL, vma, start, end); + return madvise_free_single_vma( + madv_behavior->tlb, vma, start, end); else return -EINVAL; } @@ -1639,6 +1642,32 @@ static void madvise_unlock(struct mm_struct *mm, int= behavior) mmap_read_unlock(mm); } =20 +static bool madvise_batch_tlb_flush(int behavior) +{ + switch (behavior) { + case MADV_DONTNEED: + case MADV_DONTNEED_LOCKED: + return true; + default: + return false; + } +} + +static void madvise_init_tlb(struct madvise_behavior *madv_behavior, + struct mm_struct *mm) +{ + if (!madvise_batch_tlb_flush(madv_behavior->behavior)) + return; + tlb_gather_mmu(madv_behavior->tlb, mm); +} + +static void madvise_finish_tlb(struct madvise_behavior *madv_behavior) +{ + if (!madvise_batch_tlb_flush(madv_behavior->behavior)) + return; + tlb_finish_mmu(madv_behavior->tlb); +} + static bool is_valid_madvise(unsigned long start, size_t len_in, int behav= ior) { size_t len; @@ -1791,14 +1820,20 @@ static int madvise_do_behavior(struct mm_struct *mm, int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, i= nt behavior) { int error; - struct madvise_behavior madv_behavior =3D {.behavior =3D behavior}; + struct mmu_gather tlb; + struct madvise_behavior madv_behavior =3D { + .behavior =3D behavior, + .tlb =3D &tlb, + }; =20 if (madvise_should_skip(start, len_in, behavior, &error)) return error; error =3D madvise_lock(mm, behavior); if (error) return error; + madvise_init_tlb(&madv_behavior, mm); error =3D madvise_do_behavior(mm, start, len_in, &madv_behavior); + madvise_finish_tlb(&madv_behavior); madvise_unlock(mm, behavior); =20 return error; @@ -1815,13 +1850,18 @@ static ssize_t vector_madvise(struct mm_struct *mm,= struct iov_iter *iter, { ssize_t ret =3D 0; size_t total_len; - struct madvise_behavior madv_behavior =3D {.behavior =3D behavior}; + struct mmu_gather tlb; + struct madvise_behavior madv_behavior =3D { + .behavior =3D behavior, + .tlb =3D &tlb, + }; =20 total_len =3D iov_iter_count(iter); =20 ret =3D madvise_lock(mm, behavior); if (ret) return ret; + madvise_init_tlb(&madv_behavior, mm); =20 while (iov_iter_count(iter)) { unsigned long start =3D (unsigned long)iter_iov_addr(iter); @@ -1850,14 +1890,17 @@ static ssize_t vector_madvise(struct mm_struct *mm,= struct iov_iter *iter, } =20 /* Drop and reacquire lock to unwind race. */ + madvise_finish_tlb(&madv_behavior); madvise_unlock(mm, behavior); madvise_lock(mm, behavior); + madvise_init_tlb(&madv_behavior, mm); continue; } if (ret < 0) break; iov_iter_advance(iter, iter_iov_len(iter)); } + madvise_finish_tlb(&madv_behavior); madvise_unlock(mm, behavior); =20 ret =3D (total_len - iov_iter_count(iter)) ? : ret; --=20 2.39.5 From nobody Sat Feb 7 13:41:11 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 29118236A98 for ; Mon, 10 Mar 2025 17:23:34 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627414; cv=none; b=U/IEmKgGssL0K4o7l5E341GFp2g0q+LYlwSgMyrRmphfwVn3p3uXHFkFZf8DCX0gApuvPhhSxX0/VJVV1NuQ/fgtWAkwzN4pxrZ6hwJN/5GxHY8EKZIVhgdvvDDmYo1pxPmYx7GU8pZ0Ky4ecUidEw+fcu2vNj3XzrzPT1HZm6E= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1741627414; c=relaxed/simple; bh=P3FkpaakjLXsflqPzPkcSxDHFwcvtfrO/BuhT1Vlk/c=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=qxUFztfYw9JOnirpH/pLeINQFAKjovHpiIy1BitgRqkUxY1qPxqLvFgkmzCF+6jQQdjkvntCG2xz0aXSsEeZxee2F9dPYY4Sp/3c+rZHOb9HpsE2E3f0K7DC6gbW3g8l8wI8MrH02FTrjnAj7q4yCGA+TzZrPa3s08blZE84N30= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Set4HVYi; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Set4HVYi" Received: by smtp.kernel.org (Postfix) with ESMTPSA id D4A8CC4CEF3; Mon, 10 Mar 2025 17:23:33 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1741627414; bh=P3FkpaakjLXsflqPzPkcSxDHFwcvtfrO/BuhT1Vlk/c=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Set4HVYiJXagZv6AHBunmJA2YaxUOJNXk/xAV/vhDbFjpU1vNEeI+SBBALuj/M05i KURLdN1pNktQabof22sMSLvUlHkADTNX1hIHgdSslYYSh8X5MurEWENbg0as8amXZ8 3j67efJT7/8ngHcGzDhAqL3JWDpqLWmrNa3eE93RbLgP3ZaGBOJN+U3MkkZUcfemAX j2w0y5eq4tpmZ16QqA+T7gOEJOtMnGL7tyQX/IHV0A4AE+tJiAxejseb4ofZt0hmKP 64a4hJBNiaED6o2r144J+62wh3s3o/d4eb9OVTVKRL8Ck59jNEoJ81sBI1Qo/AZK4L t4BlhlGEzf4NQ== From: SeongJae Park To: Andrew Morton Cc: SeongJae Park , "Liam R. Howlett" , David Hildenbrand , Lorenzo Stoakes , Shakeel Butt , Vlastimil Babka , kernel-team@meta.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH 9/9] mm/madvise: remove !tlb support from madvise_{dontneed,free}_single_vma() Date: Mon, 10 Mar 2025 10:23:18 -0700 Message-Id: <20250310172318.653630-10-sj@kernel.org> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250310172318.653630-1-sj@kernel.org> References: <20250310172318.653630-1-sj@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" madvise_dontneed_single_vma() and madvise_free_single_vma() support both batched tlb flushes and unbatched tlb flushes use cases depending on received tlb parameter's value. The supports were for safe and fine transition of the usages from the unbatched flushes to the batched ones. Now the transition is done, and therefore there is no real unbatched tlb flushes use case. Remove the code for supporting the no more being used cases. Signed-off-by: SeongJae Park Reviewed-by: Lorenzo Stoakes --- mm/madvise.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index d5f4ce3041a4..25af0a24c00b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -795,18 +795,11 @@ static const struct mm_walk_ops madvise_free_walk_ops= =3D { }; =20 static int madvise_free_single_vma( - struct mmu_gather *caller_tlb, struct vm_area_struct *vma, + struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr) { struct mm_struct *mm =3D vma->vm_mm; struct mmu_notifier_range range; - struct mmu_gather self_tlb; - struct mmu_gather *tlb; - - if (caller_tlb) - tlb =3D caller_tlb; - else - tlb =3D &self_tlb; =20 /* MADV_FREE works for only anon vma at the moment */ if (!vma_is_anonymous(vma)) @@ -822,8 +815,6 @@ static int madvise_free_single_vma( range.start, range.end); =20 lru_add_drain(); - if (!caller_tlb) - tlb_gather_mmu(tlb, mm); update_hiwater_rss(mm); =20 mmu_notifier_invalidate_range_start(&range); @@ -832,9 +823,6 @@ static int madvise_free_single_vma( &madvise_free_walk_ops, tlb); tlb_end_vma(tlb, vma); mmu_notifier_invalidate_range_end(&range); - if (!caller_tlb) - tlb_finish_mmu(tlb); - return 0; } =20 @@ -866,10 +854,7 @@ static long madvise_dontneed_single_vma(struct mmu_gat= her *tlb, .even_cows =3D true, }; =20 - if (!tlb) - zap_page_range_single(vma, start, end - start, &details); - else - unmap_vma_single(tlb, vma, start, end - start, &details); + unmap_vma_single(tlb, vma, start, end - start, &details); return 0; } =20 --=20 2.39.5