In case of prot_numa, there are various cases in which we can skip to the
next iteration. Since the skip condition is based on the folio and not
the PTEs, we can skip a PTE batch.
Signed-off-by: Dev Jain <dev.jain@arm.com>
---
mm/mprotect.c | 36 +++++++++++++++++++++++++++++-------
1 file changed, 29 insertions(+), 7 deletions(-)
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 88608d0dc2c2..1ee160ed0b14 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -83,6 +83,18 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
return pte_dirty(pte);
}
+static int mprotect_batch(struct folio *folio, unsigned long addr, pte_t *ptep,
+ pte_t pte, int max_nr_ptes)
+{
+ const fpb_t flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
+
+ if (!folio_test_large(folio) || (max_nr_ptes == 1))
+ return 1;
+
+ return folio_pte_batch(folio, addr, ptep, pte, max_nr_ptes, flags,
+ NULL, NULL, NULL);
+}
+
static long change_pte_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t newprot, unsigned long cp_flags)
@@ -94,6 +106,7 @@ static long change_pte_range(struct mmu_gather *tlb,
bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+ int nr_ptes;
tlb_change_page_size(tlb, PAGE_SIZE);
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
@@ -108,8 +121,10 @@ static long change_pte_range(struct mmu_gather *tlb,
flush_tlb_batched_pending(vma->vm_mm);
arch_enter_lazy_mmu_mode();
do {
+ nr_ptes = 1;
oldpte = ptep_get(pte);
if (pte_present(oldpte)) {
+ int max_nr_ptes = (end - addr) >> PAGE_SHIFT;
pte_t ptent;
/*
@@ -126,15 +141,18 @@ static long change_pte_range(struct mmu_gather *tlb,
continue;
folio = vm_normal_folio(vma, addr, oldpte);
- if (!folio || folio_is_zone_device(folio) ||
- folio_test_ksm(folio))
+ if (!folio)
continue;
+ if (folio_is_zone_device(folio) ||
+ folio_test_ksm(folio))
+ goto skip_batch;
+
/* Also skip shared copy-on-write pages */
if (is_cow_mapping(vma->vm_flags) &&
(folio_maybe_dma_pinned(folio) ||
folio_maybe_mapped_shared(folio)))
- continue;
+ goto skip_batch;
/*
* While migration can move some dirty pages,
@@ -143,7 +161,7 @@ static long change_pte_range(struct mmu_gather *tlb,
*/
if (folio_is_file_lru(folio) &&
folio_test_dirty(folio))
- continue;
+ goto skip_batch;
/*
* Don't mess with PTEs if page is already on the node
@@ -151,7 +169,7 @@ static long change_pte_range(struct mmu_gather *tlb,
*/
nid = folio_nid(folio);
if (target_node == nid)
- continue;
+ goto skip_batch;
toptier = node_is_toptier(nid);
/*
@@ -159,8 +177,12 @@ static long change_pte_range(struct mmu_gather *tlb,
* balancing is disabled
*/
if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
- toptier)
+ toptier) {
+skip_batch:
+ nr_ptes = mprotect_batch(folio, addr, pte,
+ oldpte, max_nr_ptes);
continue;
+ }
if (folio_use_access_time(folio))
folio_xchg_access_time(folio,
jiffies_to_msecs(jiffies));
@@ -280,7 +302,7 @@ static long change_pte_range(struct mmu_gather *tlb,
pages++;
}
}
- } while (pte++, addr += PAGE_SIZE, addr != end);
+ } while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
pte_unmap_unlock(pte - 1, ptl);
--
2.30.2
On 19.05.25 09:48, Dev Jain wrote:
Please highlight in the subject that this is only about MM_CP_PROT_NUMA
handling.
> In case of prot_numa, there are various cases in which we can skip to the
> next iteration. Since the skip condition is based on the folio and not
> the PTEs, we can skip a PTE batch.
>
> Signed-off-by: Dev Jain <dev.jain@arm.com>
> ---
> mm/mprotect.c | 36 +++++++++++++++++++++++++++++-------
> 1 file changed, 29 insertions(+), 7 deletions(-)
>
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 88608d0dc2c2..1ee160ed0b14 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -83,6 +83,18 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
> return pte_dirty(pte);
> }
>
> +static int mprotect_batch(struct folio *folio, unsigned long addr, pte_t *ptep,
> + pte_t pte, int max_nr_ptes)
> +{
> + const fpb_t flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
> +
> + if (!folio_test_large(folio) || (max_nr_ptes == 1))
> + return 1;
> +
> + return folio_pte_batch(folio, addr, ptep, pte, max_nr_ptes, flags,
> + NULL, NULL, NULL);
> +}
> +
> static long change_pte_range(struct mmu_gather *tlb,
> struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
> unsigned long end, pgprot_t newprot, unsigned long cp_flags)
> @@ -94,6 +106,7 @@ static long change_pte_range(struct mmu_gather *tlb,
> bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
> bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
> bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
> + int nr_ptes;
>
> tlb_change_page_size(tlb, PAGE_SIZE);
> pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
> @@ -108,8 +121,10 @@ static long change_pte_range(struct mmu_gather *tlb,
> flush_tlb_batched_pending(vma->vm_mm);
> arch_enter_lazy_mmu_mode();
> do {
> + nr_ptes = 1;
> oldpte = ptep_get(pte);
> if (pte_present(oldpte)) {
> + int max_nr_ptes = (end - addr) >> PAGE_SHIFT;
> pte_t ptent;
>
> /*
> @@ -126,15 +141,18 @@ static long change_pte_range(struct mmu_gather *tlb,
> continue;
>
> folio = vm_normal_folio(vma, addr, oldpte);
> - if (!folio || folio_is_zone_device(folio) ||
> - folio_test_ksm(folio))
> + if (!folio)
> continue;
>
> + if (folio_is_zone_device(folio) ||
> + folio_test_ksm(folio))
> + goto skip_batch;
> +
> /* Also skip shared copy-on-write pages */
> if (is_cow_mapping(vma->vm_flags) &&
> (folio_maybe_dma_pinned(folio) ||
> folio_maybe_mapped_shared(folio)))
> - continue;
> + goto skip_batch;
>
> /*
> * While migration can move some dirty pages,
> @@ -143,7 +161,7 @@ static long change_pte_range(struct mmu_gather *tlb,
> */
> if (folio_is_file_lru(folio) &&
> folio_test_dirty(folio))
> - continue;
> + goto skip_batch;
>
> /*
> * Don't mess with PTEs if page is already on the node
> @@ -151,7 +169,7 @@ static long change_pte_range(struct mmu_gather *tlb,
> */
> nid = folio_nid(folio);
> if (target_node == nid)
> - continue;
> + goto skip_batch;
> toptier = node_is_toptier(nid);
>
> /*
> @@ -159,8 +177,12 @@ static long change_pte_range(struct mmu_gather *tlb,
> * balancing is disabled
> */
> if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
> - toptier)
> + toptier) {
> +skip_batch:
> + nr_ptes = mprotect_batch(folio, addr, pte,
> + oldpte, max_nr_ptes);
> continue;
> + }
I suggest
a) not burying that skip_batch label in another if condition
b) looking into factoring out prot_numa handling into a separate
function first.
Likely we want something like
if (prot_numa) {
nr_ptes = prot_numa_pte_range_skip_ptes(vma, addr, oldpte);
if (nr_ptes)
continue;
}
... likely with a better function name,
--
Cheers,
David / dhildenb
On 21/05/25 5:36 pm, David Hildenbrand wrote:
> On 19.05.25 09:48, Dev Jain wrote:
>
> Please highlight in the subject that this is only about
> MM_CP_PROT_NUMA handling.
Sure.
>
>> In case of prot_numa, there are various cases in which we can skip to
>> the
>> next iteration. Since the skip condition is based on the folio and not
>> the PTEs, we can skip a PTE batch.
>>
>> Signed-off-by: Dev Jain <dev.jain@arm.com>
>> ---
>> mm/mprotect.c | 36 +++++++++++++++++++++++++++++-------
>> 1 file changed, 29 insertions(+), 7 deletions(-)
>>
>> diff --git a/mm/mprotect.c b/mm/mprotect.c
>> index 88608d0dc2c2..1ee160ed0b14 100644
>> --- a/mm/mprotect.c
>> +++ b/mm/mprotect.c
>> @@ -83,6 +83,18 @@ bool can_change_pte_writable(struct vm_area_struct
>> *vma, unsigned long addr,
>> return pte_dirty(pte);
>> }
>> +static int mprotect_batch(struct folio *folio, unsigned long addr,
>> pte_t *ptep,
>> + pte_t pte, int max_nr_ptes)
>> +{
>> + const fpb_t flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
>> +
>> + if (!folio_test_large(folio) || (max_nr_ptes == 1))
>> + return 1;
>> +
>> + return folio_pte_batch(folio, addr, ptep, pte, max_nr_ptes, flags,
>> + NULL, NULL, NULL);
>> +}
>> +
>> static long change_pte_range(struct mmu_gather *tlb,
>> struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
>> unsigned long end, pgprot_t newprot, unsigned long cp_flags)
>> @@ -94,6 +106,7 @@ static long change_pte_range(struct mmu_gather *tlb,
>> bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
>> bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
>> bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
>> + int nr_ptes;
>> tlb_change_page_size(tlb, PAGE_SIZE);
>> pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
>> @@ -108,8 +121,10 @@ static long change_pte_range(struct mmu_gather
>> *tlb,
>> flush_tlb_batched_pending(vma->vm_mm);
>> arch_enter_lazy_mmu_mode();
>> do {
>> + nr_ptes = 1;
>> oldpte = ptep_get(pte);
>> if (pte_present(oldpte)) {
>> + int max_nr_ptes = (end - addr) >> PAGE_SHIFT;
>> pte_t ptent;
>> /*
>> @@ -126,15 +141,18 @@ static long change_pte_range(struct mmu_gather
>> *tlb,
>> continue;
>> folio = vm_normal_folio(vma, addr, oldpte);
>> - if (!folio || folio_is_zone_device(folio) ||
>> - folio_test_ksm(folio))
>> + if (!folio)
>> continue;
>> + if (folio_is_zone_device(folio) ||
>> + folio_test_ksm(folio))
>> + goto skip_batch;
>> +
>> /* Also skip shared copy-on-write pages */
>> if (is_cow_mapping(vma->vm_flags) &&
>> (folio_maybe_dma_pinned(folio) ||
>> folio_maybe_mapped_shared(folio)))
>> - continue;
>> + goto skip_batch;
>> /*
>> * While migration can move some dirty pages,
>> @@ -143,7 +161,7 @@ static long change_pte_range(struct mmu_gather *tlb,
>> */
>> if (folio_is_file_lru(folio) &&
>> folio_test_dirty(folio))
>> - continue;
>> + goto skip_batch;
>> /*
>> * Don't mess with PTEs if page is already on the node
>> @@ -151,7 +169,7 @@ static long change_pte_range(struct mmu_gather *tlb,
>> */
>> nid = folio_nid(folio);
>> if (target_node == nid)
>> - continue;
>> + goto skip_batch;
>> toptier = node_is_toptier(nid);
>> /*
>> @@ -159,8 +177,12 @@ static long change_pte_range(struct mmu_gather
>> *tlb,
>> * balancing is disabled
>> */
>> if (!(sysctl_numa_balancing_mode &
>> NUMA_BALANCING_NORMAL) &&
>> - toptier)
>> + toptier) {
>> +skip_batch:
>> + nr_ptes = mprotect_batch(folio, addr, pte,
>> + oldpte, max_nr_ptes);
>> continue;
>> + }
>
>
> I suggest
>
> a) not burying that skip_batch label in another if condition
>
> b) looking into factoring out prot_numa handling into a separate
> function first.
>
>
> Likely we want something like
>
> if (prot_numa) {
> nr_ptes = prot_numa_pte_range_skip_ptes(vma, addr, oldpte);
> if (nr_ptes)
> continue;
> }
>
> ... likely with a better function name,
I want to be able to reuse the folio from vm_normal_folio(), and we also
need
nr_ptes to know how much to skip, so if there is no objection in passing
int *nr_ptes,
or struct folio **foliop to this new function, then I'll carry on with
your suggestion :)
>
>
>> ... likely with a better function name, > > > I want to be able to reuse the folio from vm_normal_folio(), and we also > need > > nr_ptes to know how much to skip, so if there is no objection in passing > int *nr_ptes, > > or struct folio **foliop to this new function, then I'll carry on with > your suggestion :) Can you quickly prototype what you have in mind and paste it here? Will make it easier :) -- Cheers, David / dhildenb
On 22/05/25 12:43 pm, David Hildenbrand wrote:
>
>>> ... likely with a better function name,
>>
>>
>> I want to be able to reuse the folio from vm_normal_folio(), and we also
>> need
>>
>> nr_ptes to know how much to skip, so if there is no objection in passing
>> int *nr_ptes,
>>
>> or struct folio **foliop to this new function, then I'll carry on with
>> your suggestion :)
>
> Can you quickly prototype what you have in mind and paste it here?
> Will make it easier :)
if (prot_numa)
func(vma, addr, oldpte, &nr);
struct folio *folio func(vma, addr, oldpte, int *nr)
{
if (pte_protnone(oldpte))
*nr = 1, return NULL;
folio = vm_normal_folio();
if (!folio)
*nr =1, return NULL;
if the other skipping conditions happen, goto skip_batch, return
folio and set *nr = batch
if (sysctl.....) {
skip_batch:
*nr = mprotect_batch();
}
if (folio_use_access_time)....
return folio;
}
On 22.05.25 09:47, Dev Jain wrote: > > On 22/05/25 12:43 pm, David Hildenbrand wrote: >> >>>> ... likely with a better function name, >>> >>> >>> I want to be able to reuse the folio from vm_normal_folio(), and we also >>> need >>> >>> nr_ptes to know how much to skip, so if there is no objection in passing >>> int *nr_ptes, >>> >>> or struct folio **foliop to this new function, then I'll carry on with >>> your suggestion :) >> >> Can you quickly prototype what you have in mind and paste it here? >> Will make it easier :) > > > if (prot_numa) > > func(vma, addr, oldpte, &nr); I'd probably return "nr_ptes" and return the folio using a &folio instead. That way, you can easily extend the function to return the folio in the patch where you really need it (not this patch IIUR :) ) -- Cheers, David / dhildenb
On 22/05/25 9:48 pm, David Hildenbrand wrote: > On 22.05.25 09:47, Dev Jain wrote: >> >> On 22/05/25 12:43 pm, David Hildenbrand wrote: >>> >>>>> ... likely with a better function name, >>>> >>>> >>>> I want to be able to reuse the folio from vm_normal_folio(), and we >>>> also >>>> need >>>> >>>> nr_ptes to know how much to skip, so if there is no objection in >>>> passing >>>> int *nr_ptes, >>>> >>>> or struct folio **foliop to this new function, then I'll carry on with >>>> your suggestion :) >>> >>> Can you quickly prototype what you have in mind and paste it here? >>> Will make it easier :) >> >> >> if (prot_numa) >> >> func(vma, addr, oldpte, &nr); > > I'd probably return "nr_ptes" and return the folio using a &folio > instead. > > That way, you can easily extend the function to return the folio in > the patch where you really need it (not this patch IIUR :) ) Just confirming, you mean to return nr_ptes and get the folio by passing &folio, and the function parameter will be struct folio **foliop?
On 04.06.25 12:38, Dev Jain wrote: > > On 22/05/25 9:48 pm, David Hildenbrand wrote: >> On 22.05.25 09:47, Dev Jain wrote: >>> >>> On 22/05/25 12:43 pm, David Hildenbrand wrote: >>>> >>>>>> ... likely with a better function name, >>>>> >>>>> >>>>> I want to be able to reuse the folio from vm_normal_folio(), and we >>>>> also >>>>> need >>>>> >>>>> nr_ptes to know how much to skip, so if there is no objection in >>>>> passing >>>>> int *nr_ptes, >>>>> >>>>> or struct folio **foliop to this new function, then I'll carry on with >>>>> your suggestion :) >>>> >>>> Can you quickly prototype what you have in mind and paste it here? >>>> Will make it easier :) >>> >>> >>> if (prot_numa) >>> >>> func(vma, addr, oldpte, &nr); >> >> I'd probably return "nr_ptes" and return the folio using a &folio >> instead. >> >> That way, you can easily extend the function to return the folio in >> the patch where you really need it (not this patch IIUR :) ) > > Just confirming, you mean to return nr_ptes and get the folio by passing > &folio, and the function parameter will be struct folio **foliop? Yes. -- Cheers, David / dhildenb
On 19/05/2025 08:48, Dev Jain wrote:
> In case of prot_numa, there are various cases in which we can skip to the
> next iteration. Since the skip condition is based on the folio and not
> the PTEs, we can skip a PTE batch.
>
> Signed-off-by: Dev Jain <dev.jain@arm.com>
> ---
> mm/mprotect.c | 36 +++++++++++++++++++++++++++++-------
> 1 file changed, 29 insertions(+), 7 deletions(-)
>
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 88608d0dc2c2..1ee160ed0b14 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -83,6 +83,18 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
> return pte_dirty(pte);
> }
>
> +static int mprotect_batch(struct folio *folio, unsigned long addr, pte_t *ptep,
Perhaps it should be called mprotect_folio_pte_batch() to match the existing
madvise_folio_pte_batch()?
> + pte_t pte, int max_nr_ptes)
> +{
> + const fpb_t flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
> +
> + if (!folio_test_large(folio) || (max_nr_ptes == 1))
> + return 1;
> +
> + return folio_pte_batch(folio, addr, ptep, pte, max_nr_ptes, flags,
> + NULL, NULL, NULL);
> +}
> +
> static long change_pte_range(struct mmu_gather *tlb,
> struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
> unsigned long end, pgprot_t newprot, unsigned long cp_flags)
> @@ -94,6 +106,7 @@ static long change_pte_range(struct mmu_gather *tlb,
> bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
> bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
> bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
> + int nr_ptes;
>
> tlb_change_page_size(tlb, PAGE_SIZE);
> pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
> @@ -108,8 +121,10 @@ static long change_pte_range(struct mmu_gather *tlb,
> flush_tlb_batched_pending(vma->vm_mm);
> arch_enter_lazy_mmu_mode();
> do {
> + nr_ptes = 1;
> oldpte = ptep_get(pte);
> if (pte_present(oldpte)) {
> + int max_nr_ptes = (end - addr) >> PAGE_SHIFT;
> pte_t ptent;
>
> /*
> @@ -126,15 +141,18 @@ static long change_pte_range(struct mmu_gather *tlb,
> continue;
>
> folio = vm_normal_folio(vma, addr, oldpte);
> - if (!folio || folio_is_zone_device(folio) ||
> - folio_test_ksm(folio))
> + if (!folio)
> continue;
You modify mprotect_batch() to handle folio == NULL later, perhaps just add that
here, then you don't need to unpick this conditional and can just goto
skip_branch, even for the !folio case.
Thanks,
Ryan
>
> + if (folio_is_zone_device(folio) ||
> + folio_test_ksm(folio))
> + goto skip_batch;
> +
> /* Also skip shared copy-on-write pages */
> if (is_cow_mapping(vma->vm_flags) &&
> (folio_maybe_dma_pinned(folio) ||
> folio_maybe_mapped_shared(folio)))
> - continue;
> + goto skip_batch;
>
> /*
> * While migration can move some dirty pages,
> @@ -143,7 +161,7 @@ static long change_pte_range(struct mmu_gather *tlb,
> */
> if (folio_is_file_lru(folio) &&
> folio_test_dirty(folio))
> - continue;
> + goto skip_batch;
>
> /*
> * Don't mess with PTEs if page is already on the node
> @@ -151,7 +169,7 @@ static long change_pte_range(struct mmu_gather *tlb,
> */
> nid = folio_nid(folio);
> if (target_node == nid)
> - continue;
> + goto skip_batch;
> toptier = node_is_toptier(nid);
>
> /*
> @@ -159,8 +177,12 @@ static long change_pte_range(struct mmu_gather *tlb,
> * balancing is disabled
> */
> if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
> - toptier)
> + toptier) {
> +skip_batch:
> + nr_ptes = mprotect_batch(folio, addr, pte,
> + oldpte, max_nr_ptes);
> continue;
> + }
> if (folio_use_access_time(folio))
> folio_xchg_access_time(folio,
> jiffies_to_msecs(jiffies));
> @@ -280,7 +302,7 @@ static long change_pte_range(struct mmu_gather *tlb,
> pages++;
> }
> }
> - } while (pte++, addr += PAGE_SIZE, addr != end);
> + } while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end);
> arch_leave_lazy_mmu_mode();
> pte_unmap_unlock(pte - 1, ptl);
>
On 21/05/25 5:28 pm, Ryan Roberts wrote:
> On 19/05/2025 08:48, Dev Jain wrote:
>> In case of prot_numa, there are various cases in which we can skip to the
>> next iteration. Since the skip condition is based on the folio and not
>> the PTEs, we can skip a PTE batch.
>>
>> Signed-off-by: Dev Jain <dev.jain@arm.com>
>> ---
>> mm/mprotect.c | 36 +++++++++++++++++++++++++++++-------
>> 1 file changed, 29 insertions(+), 7 deletions(-)
>>
>> diff --git a/mm/mprotect.c b/mm/mprotect.c
>> index 88608d0dc2c2..1ee160ed0b14 100644
>> --- a/mm/mprotect.c
>> +++ b/mm/mprotect.c
>> @@ -83,6 +83,18 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
>> return pte_dirty(pte);
>> }
>>
>> +static int mprotect_batch(struct folio *folio, unsigned long addr, pte_t *ptep,
> Perhaps it should be called mprotect_folio_pte_batch() to match the existing
> madvise_folio_pte_batch()?
Thanks, this is better.
>
>> + pte_t pte, int max_nr_ptes)
>> +{
>> + const fpb_t flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
>> +
>> + if (!folio_test_large(folio) || (max_nr_ptes == 1))
>> + return 1;
>> +
>> + return folio_pte_batch(folio, addr, ptep, pte, max_nr_ptes, flags,
>> + NULL, NULL, NULL);
>> +}
>> +
>> static long change_pte_range(struct mmu_gather *tlb,
>> struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
>> unsigned long end, pgprot_t newprot, unsigned long cp_flags)
>> @@ -94,6 +106,7 @@ static long change_pte_range(struct mmu_gather *tlb,
>> bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
>> bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
>> bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
>> + int nr_ptes;
>>
>> tlb_change_page_size(tlb, PAGE_SIZE);
>> pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
>> @@ -108,8 +121,10 @@ static long change_pte_range(struct mmu_gather *tlb,
>> flush_tlb_batched_pending(vma->vm_mm);
>> arch_enter_lazy_mmu_mode();
>> do {
>> + nr_ptes = 1;
>> oldpte = ptep_get(pte);
>> if (pte_present(oldpte)) {
>> + int max_nr_ptes = (end - addr) >> PAGE_SHIFT;
>> pte_t ptent;
>>
>> /*
>> @@ -126,15 +141,18 @@ static long change_pte_range(struct mmu_gather *tlb,
>> continue;
>>
>> folio = vm_normal_folio(vma, addr, oldpte);
>> - if (!folio || folio_is_zone_device(folio) ||
>> - folio_test_ksm(folio))
>> + if (!folio)
>> continue;
> You modify mprotect_batch() to handle folio == NULL later, perhaps just add that
> here, then you don't need to unpick this conditional and can just goto
> skip_branch, even for the !folio case.
I'll check this.
>
> Thanks,
> Ryan
>
>>
>> + if (folio_is_zone_device(folio) ||
>> + folio_test_ksm(folio))
>> + goto skip_batch;
>> +
>> /* Also skip shared copy-on-write pages */
>> if (is_cow_mapping(vma->vm_flags) &&
>> (folio_maybe_dma_pinned(folio) ||
>> folio_maybe_mapped_shared(folio)))
>> - continue;
>> + goto skip_batch;
>>
>> /*
>> * While migration can move some dirty pages,
>> @@ -143,7 +161,7 @@ static long change_pte_range(struct mmu_gather *tlb,
>> */
>> if (folio_is_file_lru(folio) &&
>> folio_test_dirty(folio))
>> - continue;
>> + goto skip_batch;
>>
>> /*
>> * Don't mess with PTEs if page is already on the node
>> @@ -151,7 +169,7 @@ static long change_pte_range(struct mmu_gather *tlb,
>> */
>> nid = folio_nid(folio);
>> if (target_node == nid)
>> - continue;
>> + goto skip_batch;
>> toptier = node_is_toptier(nid);
>>
>> /*
>> @@ -159,8 +177,12 @@ static long change_pte_range(struct mmu_gather *tlb,
>> * balancing is disabled
>> */
>> if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
>> - toptier)
>> + toptier) {
>> +skip_batch:
>> + nr_ptes = mprotect_batch(folio, addr, pte,
>> + oldpte, max_nr_ptes);
>> continue;
>> + }
>> if (folio_use_access_time(folio))
>> folio_xchg_access_time(folio,
>> jiffies_to_msecs(jiffies));
>> @@ -280,7 +302,7 @@ static long change_pte_range(struct mmu_gather *tlb,
>> pages++;
>> }
>> }
>> - } while (pte++, addr += PAGE_SIZE, addr != end);
>> + } while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end);
>> arch_leave_lazy_mmu_mode();
>> pte_unmap_unlock(pte - 1, ptl);
>>
On 19/05/2025 08:48, Dev Jain wrote:
> In case of prot_numa, there are various cases in which we can skip to the
> next iteration. Since the skip condition is based on the folio and not
> the PTEs, we can skip a PTE batch.
>
> Signed-off-by: Dev Jain <dev.jain@arm.com>
LGTM; a lot less churn than before.
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
> ---
> mm/mprotect.c | 36 +++++++++++++++++++++++++++++-------
> 1 file changed, 29 insertions(+), 7 deletions(-)
>
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 88608d0dc2c2..1ee160ed0b14 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -83,6 +83,18 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
> return pte_dirty(pte);
> }
>
> +static int mprotect_batch(struct folio *folio, unsigned long addr, pte_t *ptep,
> + pte_t pte, int max_nr_ptes)
> +{
> + const fpb_t flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
> +
> + if (!folio_test_large(folio) || (max_nr_ptes == 1))
> + return 1;
> +
> + return folio_pte_batch(folio, addr, ptep, pte, max_nr_ptes, flags,
> + NULL, NULL, NULL);
> +}
> +
> static long change_pte_range(struct mmu_gather *tlb,
> struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
> unsigned long end, pgprot_t newprot, unsigned long cp_flags)
> @@ -94,6 +106,7 @@ static long change_pte_range(struct mmu_gather *tlb,
> bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
> bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
> bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
> + int nr_ptes;
>
> tlb_change_page_size(tlb, PAGE_SIZE);
> pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
> @@ -108,8 +121,10 @@ static long change_pte_range(struct mmu_gather *tlb,
> flush_tlb_batched_pending(vma->vm_mm);
> arch_enter_lazy_mmu_mode();
> do {
> + nr_ptes = 1;
> oldpte = ptep_get(pte);
> if (pte_present(oldpte)) {
> + int max_nr_ptes = (end - addr) >> PAGE_SHIFT;
> pte_t ptent;
>
> /*
> @@ -126,15 +141,18 @@ static long change_pte_range(struct mmu_gather *tlb,
> continue;
>
> folio = vm_normal_folio(vma, addr, oldpte);
> - if (!folio || folio_is_zone_device(folio) ||
> - folio_test_ksm(folio))
> + if (!folio)
> continue;
>
> + if (folio_is_zone_device(folio) ||
> + folio_test_ksm(folio))
> + goto skip_batch;
> +
> /* Also skip shared copy-on-write pages */
> if (is_cow_mapping(vma->vm_flags) &&
> (folio_maybe_dma_pinned(folio) ||
> folio_maybe_mapped_shared(folio)))
> - continue;
> + goto skip_batch;
>
> /*
> * While migration can move some dirty pages,
> @@ -143,7 +161,7 @@ static long change_pte_range(struct mmu_gather *tlb,
> */
> if (folio_is_file_lru(folio) &&
> folio_test_dirty(folio))
> - continue;
> + goto skip_batch;
>
> /*
> * Don't mess with PTEs if page is already on the node
> @@ -151,7 +169,7 @@ static long change_pte_range(struct mmu_gather *tlb,
> */
> nid = folio_nid(folio);
> if (target_node == nid)
> - continue;
> + goto skip_batch;
> toptier = node_is_toptier(nid);
>
> /*
> @@ -159,8 +177,12 @@ static long change_pte_range(struct mmu_gather *tlb,
> * balancing is disabled
> */
> if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
> - toptier)
> + toptier) {
> +skip_batch:
> + nr_ptes = mprotect_batch(folio, addr, pte,
> + oldpte, max_nr_ptes);
> continue;
> + }
> if (folio_use_access_time(folio))
> folio_xchg_access_time(folio,
> jiffies_to_msecs(jiffies));
> @@ -280,7 +302,7 @@ static long change_pte_range(struct mmu_gather *tlb,
> pages++;
> }
> }
> - } while (pte++, addr += PAGE_SIZE, addr != end);
> + } while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end);
> arch_leave_lazy_mmu_mode();
> pte_unmap_unlock(pte - 1, ptl);
>
© 2016 - 2025 Red Hat, Inc.