[PATCH v5 1/7] mm: Refactor MM_CP_PROT_NUMA skipping case into new function

Dev Jain posted 7 patches 2 months, 2 weeks ago
[PATCH v5 1/7] mm: Refactor MM_CP_PROT_NUMA skipping case into new function
Posted by Dev Jain 2 months, 2 weeks ago
Reduce indentation by refactoring the prot_numa case into a new function.
No functional change intended.

Signed-off-by: Dev Jain <dev.jain@arm.com>
---
 mm/mprotect.c | 101 +++++++++++++++++++++++++++-----------------------
 1 file changed, 55 insertions(+), 46 deletions(-)

diff --git a/mm/mprotect.c b/mm/mprotect.c
index 88709c01177b..2a9c73bd0778 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -83,6 +83,59 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
 	return pte_dirty(pte);
 }
 
+static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
+			   pte_t oldpte, pte_t *pte, int target_node)
+{
+	struct folio *folio;
+	bool toptier;
+	int nid;
+
+	/* Avoid TLB flush if possible */
+	if (pte_protnone(oldpte))
+		return true;
+
+	folio = vm_normal_folio(vma, addr, oldpte);
+	if (!folio)
+		return true;
+
+	if (folio_is_zone_device(folio) || folio_test_ksm(folio))
+		return true;
+
+	/* Also skip shared copy-on-write pages */
+	if (is_cow_mapping(vma->vm_flags) &&
+	    (folio_maybe_dma_pinned(folio) || folio_maybe_mapped_shared(folio)))
+		return true;
+
+	/*
+	 * While migration can move some dirty pages,
+	 * it cannot move them all from MIGRATE_ASYNC
+	 * context.
+	 */
+	if (folio_is_file_lru(folio) && folio_test_dirty(folio))
+		return true;
+
+	/*
+	 * Don't mess with PTEs if page is already on the node
+	 * a single-threaded process is running on.
+	 */
+	nid = folio_nid(folio);
+	if (target_node == nid)
+		return true;
+
+	toptier = node_is_toptier(nid);
+
+	/*
+	 * Skip scanning top tier node if normal numa
+	 * balancing is disabled
+	 */
+	if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && toptier)
+		return true;
+
+	if (folio_use_access_time(folio))
+		folio_xchg_access_time(folio, jiffies_to_msecs(jiffies));
+	return false;
+}
+
 static long change_pte_range(struct mmu_gather *tlb,
 		struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
 		unsigned long end, pgprot_t newprot, unsigned long cp_flags)
@@ -117,53 +170,9 @@ static long change_pte_range(struct mmu_gather *tlb,
 			 * pages. See similar comment in change_huge_pmd.
 			 */
 			if (prot_numa) {
-				struct folio *folio;
-				int nid;
-				bool toptier;
-
-				/* Avoid TLB flush if possible */
-				if (pte_protnone(oldpte))
-					continue;
-
-				folio = vm_normal_folio(vma, addr, oldpte);
-				if (!folio || folio_is_zone_device(folio) ||
-				    folio_test_ksm(folio))
-					continue;
-
-				/* Also skip shared copy-on-write pages */
-				if (is_cow_mapping(vma->vm_flags) &&
-				    (folio_maybe_dma_pinned(folio) ||
-				     folio_maybe_mapped_shared(folio)))
-					continue;
-
-				/*
-				 * While migration can move some dirty pages,
-				 * it cannot move them all from MIGRATE_ASYNC
-				 * context.
-				 */
-				if (folio_is_file_lru(folio) &&
-				    folio_test_dirty(folio))
-					continue;
-
-				/*
-				 * Don't mess with PTEs if page is already on the node
-				 * a single-threaded process is running on.
-				 */
-				nid = folio_nid(folio);
-				if (target_node == nid)
-					continue;
-				toptier = node_is_toptier(nid);
-
-				/*
-				 * Skip scanning top tier node if normal numa
-				 * balancing is disabled
-				 */
-				if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
-				    toptier)
+				if (prot_numa_skip(vma, addr, oldpte, pte,
+						   target_node))
 					continue;
-				if (folio_use_access_time(folio))
-					folio_xchg_access_time(folio,
-						jiffies_to_msecs(jiffies));
 			}
 
 			oldpte = ptep_modify_prot_start(vma, addr, pte);
-- 
2.30.2
Re: [PATCH v5 1/7] mm: Refactor MM_CP_PROT_NUMA skipping case into new function
Posted by Zi Yan 2 months, 2 weeks ago
On 18 Jul 2025, at 5:02, Dev Jain wrote:

> Reduce indentation by refactoring the prot_numa case into a new function.
> No functional change intended.
>
> Signed-off-by: Dev Jain <dev.jain@arm.com>
> ---
>  mm/mprotect.c | 101 +++++++++++++++++++++++++++-----------------------
>  1 file changed, 55 insertions(+), 46 deletions(-)
>
LGTM. Reviewed-by: Zi Yan <ziy@nvidia.com>

Best Regards,
Yan, Zi
Re: [PATCH v5 1/7] mm: Refactor MM_CP_PROT_NUMA skipping case into new function
Posted by Ryan Roberts 2 months, 2 weeks ago
On 18/07/2025 10:02, Dev Jain wrote:
> Reduce indentation by refactoring the prot_numa case into a new function.
> No functional change intended.
> 
> Signed-off-by: Dev Jain <dev.jain@arm.com>
> ---
>  mm/mprotect.c | 101 +++++++++++++++++++++++++++-----------------------
>  1 file changed, 55 insertions(+), 46 deletions(-)
> 
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 88709c01177b..2a9c73bd0778 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -83,6 +83,59 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
>  	return pte_dirty(pte);
>  }
>  
> +static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
> +			   pte_t oldpte, pte_t *pte, int target_node)
> +{
> +	struct folio *folio;
> +	bool toptier;
> +	int nid;
> +
> +	/* Avoid TLB flush if possible */
> +	if (pte_protnone(oldpte))
> +		return true;
> +
> +	folio = vm_normal_folio(vma, addr, oldpte);
> +	if (!folio)
> +		return true;
> +
> +	if (folio_is_zone_device(folio) || folio_test_ksm(folio))
> +		return true;
> +
> +	/* Also skip shared copy-on-write pages */
> +	if (is_cow_mapping(vma->vm_flags) &&
> +	    (folio_maybe_dma_pinned(folio) || folio_maybe_mapped_shared(folio)))
> +		return true;
> +
> +	/*
> +	 * While migration can move some dirty pages,
> +	 * it cannot move them all from MIGRATE_ASYNC
> +	 * context.
> +	 */
> +	if (folio_is_file_lru(folio) && folio_test_dirty(folio))
> +		return true;
> +
> +	/*
> +	 * Don't mess with PTEs if page is already on the node
> +	 * a single-threaded process is running on.
> +	 */
> +	nid = folio_nid(folio);
> +	if (target_node == nid)
> +		return true;
> +
> +	toptier = node_is_toptier(nid);
> +
> +	/*
> +	 * Skip scanning top tier node if normal numa
> +	 * balancing is disabled
> +	 */
> +	if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && toptier)
> +		return true;
> +
> +	if (folio_use_access_time(folio))
> +		folio_xchg_access_time(folio, jiffies_to_msecs(jiffies));

Perhaps this bit should be kept in the original location? It's got nothing to do
with determining if the pte should be skipped.

Thanks,
Ryan

> +	return false;
> +}
> +
>  static long change_pte_range(struct mmu_gather *tlb,
>  		struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
>  		unsigned long end, pgprot_t newprot, unsigned long cp_flags)
> @@ -117,53 +170,9 @@ static long change_pte_range(struct mmu_gather *tlb,
>  			 * pages. See similar comment in change_huge_pmd.
>  			 */
>  			if (prot_numa) {
> -				struct folio *folio;
> -				int nid;
> -				bool toptier;
> -
> -				/* Avoid TLB flush if possible */
> -				if (pte_protnone(oldpte))
> -					continue;
> -
> -				folio = vm_normal_folio(vma, addr, oldpte);
> -				if (!folio || folio_is_zone_device(folio) ||
> -				    folio_test_ksm(folio))
> -					continue;
> -
> -				/* Also skip shared copy-on-write pages */
> -				if (is_cow_mapping(vma->vm_flags) &&
> -				    (folio_maybe_dma_pinned(folio) ||
> -				     folio_maybe_mapped_shared(folio)))
> -					continue;
> -
> -				/*
> -				 * While migration can move some dirty pages,
> -				 * it cannot move them all from MIGRATE_ASYNC
> -				 * context.
> -				 */
> -				if (folio_is_file_lru(folio) &&
> -				    folio_test_dirty(folio))
> -					continue;
> -
> -				/*
> -				 * Don't mess with PTEs if page is already on the node
> -				 * a single-threaded process is running on.
> -				 */
> -				nid = folio_nid(folio);
> -				if (target_node == nid)
> -					continue;
> -				toptier = node_is_toptier(nid);
> -
> -				/*
> -				 * Skip scanning top tier node if normal numa
> -				 * balancing is disabled
> -				 */
> -				if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
> -				    toptier)
> +				if (prot_numa_skip(vma, addr, oldpte, pte,
> +						   target_node))
>  					continue;
> -				if (folio_use_access_time(folio))
> -					folio_xchg_access_time(folio,
> -						jiffies_to_msecs(jiffies));
>  			}
>  
>  			oldpte = ptep_modify_prot_start(vma, addr, pte);
Re: [PATCH v5 1/7] mm: Refactor MM_CP_PROT_NUMA skipping case into new function
Posted by Barry Song 2 months, 2 weeks ago
On Fri, Jul 18, 2025 at 5:03 PM Dev Jain <dev.jain@arm.com> wrote:
>
> Reduce indentation by refactoring the prot_numa case into a new function.
> No functional change intended.
>
> Signed-off-by: Dev Jain <dev.jain@arm.com>

Reviewed-by: Barry Song <baohua@kernel.org>

> ---
>  mm/mprotect.c | 101 +++++++++++++++++++++++++++-----------------------
>  1 file changed, 55 insertions(+), 46 deletions(-)
>
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 88709c01177b..2a9c73bd0778 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -83,6 +83,59 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
>         return pte_dirty(pte);
>  }
>
> +static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
> +                          pte_t oldpte, pte_t *pte, int target_node)
> +{

[...]

> +       /*
> +        * Skip scanning top tier node if normal numa
> +        * balancing is disabled
> +        */
> +       if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && toptier)
> +               return true;
> +
> +       if (folio_use_access_time(folio))
> +               folio_xchg_access_time(folio, jiffies_to_msecs(jiffies));

Nit: I wonder if this should be moved elsewhere, since this isn't
actually about 'skipping', even though the function is named
`prot_numa_skip()`.

> +       return false;
> +}
> +

Thanks
Barry
Re: [PATCH v5 1/7] mm: Refactor MM_CP_PROT_NUMA skipping case into new function
Posted by Dev Jain 2 months, 2 weeks ago
On 21/07/25 5:14 am, Barry Song wrote:
> On Fri, Jul 18, 2025 at 5:03 PM Dev Jain <dev.jain@arm.com> wrote:
>> Reduce indentation by refactoring the prot_numa case into a new function.
>> No functional change intended.
>>
>> Signed-off-by: Dev Jain <dev.jain@arm.com>
> Reviewed-by: Barry Song <baohua@kernel.org>
>
>> ---
>>   mm/mprotect.c | 101 +++++++++++++++++++++++++++-----------------------
>>   1 file changed, 55 insertions(+), 46 deletions(-)
>>
>> diff --git a/mm/mprotect.c b/mm/mprotect.c
>> index 88709c01177b..2a9c73bd0778 100644
>> --- a/mm/mprotect.c
>> +++ b/mm/mprotect.c
>> @@ -83,6 +83,59 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
>>          return pte_dirty(pte);
>>   }
>>
>> +static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
>> +                          pte_t oldpte, pte_t *pte, int target_node)
>> +{
> [...]
>
>> +       /*
>> +        * Skip scanning top tier node if normal numa
>> +        * balancing is disabled
>> +        */
>> +       if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && toptier)
>> +               return true;
>> +
>> +       if (folio_use_access_time(folio))
>> +               folio_xchg_access_time(folio, jiffies_to_msecs(jiffies));
> Nit: I wonder if this should be moved elsewhere, since this isn't
> actually about 'skipping', even though the function is named
> `prot_numa_skip()`.

Agreed, thanks. We can use prot_numa_skip() only to return true
or false, and if returned false, we can call folio_xchg_access_time.
I will wait for 2-3 days for any more comments and respin.

>
>> +       return false;
>> +}
>> +
> Thanks
> Barry
Re: [PATCH v5 1/7] mm: Refactor MM_CP_PROT_NUMA skipping case into new function
Posted by Dev Jain 2 months, 2 weeks ago
On 21/07/25 9:14 am, Dev Jain wrote:
>
> On 21/07/25 5:14 am, Barry Song wrote:
>> On Fri, Jul 18, 2025 at 5:03 PM Dev Jain <dev.jain@arm.com> wrote:
>>> Reduce indentation by refactoring the prot_numa case into a new 
>>> function.
>>> No functional change intended.
>>>
>>> Signed-off-by: Dev Jain <dev.jain@arm.com>
>> Reviewed-by: Barry Song <baohua@kernel.org>
>>
>>> ---
>>>   mm/mprotect.c | 101 
>>> +++++++++++++++++++++++++++-----------------------
>>>   1 file changed, 55 insertions(+), 46 deletions(-)
>>>
>>> diff --git a/mm/mprotect.c b/mm/mprotect.c
>>> index 88709c01177b..2a9c73bd0778 100644
>>> --- a/mm/mprotect.c
>>> +++ b/mm/mprotect.c
>>> @@ -83,6 +83,59 @@ bool can_change_pte_writable(struct 
>>> vm_area_struct *vma, unsigned long addr,
>>>          return pte_dirty(pte);
>>>   }
>>>
>>> +static bool prot_numa_skip(struct vm_area_struct *vma, unsigned 
>>> long addr,
>>> +                          pte_t oldpte, pte_t *pte, int target_node)
>>> +{
>> [...]
>>
>>> +       /*
>>> +        * Skip scanning top tier node if normal numa
>>> +        * balancing is disabled
>>> +        */
>>> +       if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && 
>>> toptier)
>>> +               return true;
>>> +
>>> +       if (folio_use_access_time(folio))
>>> +               folio_xchg_access_time(folio, 
>>> jiffies_to_msecs(jiffies));
>> Nit: I wonder if this should be moved elsewhere, since this isn't
>> actually about 'skipping', even though the function is named
>> `prot_numa_skip()`.
>
> Agreed, thanks. We can use prot_numa_skip() only to return true
> or false, and if returned false, we can call folio_xchg_access_time.
> I will wait for 2-3 days for any more comments and respin.

Since Andrew has already pulled this and we are quite late into the

release cycle, I'll do this cleanup in the next cycle.


>
>>
>>> +       return false;
>>> +}
>>> +
>> Thanks
>> Barry
>
Re: [PATCH v5 1/7] mm: Refactor MM_CP_PROT_NUMA skipping case into new function
Posted by Lorenzo Stoakes 2 months, 2 weeks ago
On Fri, Jul 18, 2025 at 02:32:38PM +0530, Dev Jain wrote:
> Reduce indentation by refactoring the prot_numa case into a new function.
> No functional change intended.
>
> Signed-off-by: Dev Jain <dev.jain@arm.com>

LGTM, so:

Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

> ---
>  mm/mprotect.c | 101 +++++++++++++++++++++++++++-----------------------
>  1 file changed, 55 insertions(+), 46 deletions(-)
>
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 88709c01177b..2a9c73bd0778 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -83,6 +83,59 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
>  	return pte_dirty(pte);
>  }
>
> +static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr,
> +			   pte_t oldpte, pte_t *pte, int target_node)
> +{
> +	struct folio *folio;
> +	bool toptier;
> +	int nid;
> +
> +	/* Avoid TLB flush if possible */
> +	if (pte_protnone(oldpte))
> +		return true;
> +
> +	folio = vm_normal_folio(vma, addr, oldpte);
> +	if (!folio)
> +		return true;
> +
> +	if (folio_is_zone_device(folio) || folio_test_ksm(folio))
> +		return true;
> +
> +	/* Also skip shared copy-on-write pages */
> +	if (is_cow_mapping(vma->vm_flags) &&
> +	    (folio_maybe_dma_pinned(folio) || folio_maybe_mapped_shared(folio)))
> +		return true;
> +
> +	/*
> +	 * While migration can move some dirty pages,
> +	 * it cannot move them all from MIGRATE_ASYNC
> +	 * context.
> +	 */
> +	if (folio_is_file_lru(folio) && folio_test_dirty(folio))
> +		return true;
> +
> +	/*
> +	 * Don't mess with PTEs if page is already on the node
> +	 * a single-threaded process is running on.
> +	 */
> +	nid = folio_nid(folio);
> +	if (target_node == nid)
> +		return true;
> +
> +	toptier = node_is_toptier(nid);
> +
> +	/*
> +	 * Skip scanning top tier node if normal numa
> +	 * balancing is disabled
> +	 */
> +	if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && toptier)
> +		return true;
> +
> +	if (folio_use_access_time(folio))
> +		folio_xchg_access_time(folio, jiffies_to_msecs(jiffies));
> +	return false;
> +}
> +
>  static long change_pte_range(struct mmu_gather *tlb,
>  		struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
>  		unsigned long end, pgprot_t newprot, unsigned long cp_flags)
> @@ -117,53 +170,9 @@ static long change_pte_range(struct mmu_gather *tlb,
>  			 * pages. See similar comment in change_huge_pmd.
>  			 */
>  			if (prot_numa) {
> -				struct folio *folio;
> -				int nid;
> -				bool toptier;
> -
> -				/* Avoid TLB flush if possible */
> -				if (pte_protnone(oldpte))
> -					continue;
> -
> -				folio = vm_normal_folio(vma, addr, oldpte);
> -				if (!folio || folio_is_zone_device(folio) ||
> -				    folio_test_ksm(folio))
> -					continue;
> -
> -				/* Also skip shared copy-on-write pages */
> -				if (is_cow_mapping(vma->vm_flags) &&
> -				    (folio_maybe_dma_pinned(folio) ||
> -				     folio_maybe_mapped_shared(folio)))
> -					continue;
> -
> -				/*
> -				 * While migration can move some dirty pages,
> -				 * it cannot move them all from MIGRATE_ASYNC
> -				 * context.
> -				 */
> -				if (folio_is_file_lru(folio) &&
> -				    folio_test_dirty(folio))
> -					continue;
> -
> -				/*
> -				 * Don't mess with PTEs if page is already on the node
> -				 * a single-threaded process is running on.
> -				 */
> -				nid = folio_nid(folio);
> -				if (target_node == nid)
> -					continue;
> -				toptier = node_is_toptier(nid);
> -
> -				/*
> -				 * Skip scanning top tier node if normal numa
> -				 * balancing is disabled
> -				 */
> -				if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
> -				    toptier)
> +				if (prot_numa_skip(vma, addr, oldpte, pte,
> +						   target_node))
>  					continue;
> -				if (folio_use_access_time(folio))
> -					folio_xchg_access_time(folio,
> -						jiffies_to_msecs(jiffies));
>  			}
>
>  			oldpte = ptep_modify_prot_start(vma, addr, pte);
> --
> 2.30.2
>