arch/arm64/include/asm/tlbflush.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+)
Currently arm64 does an unconditional TLB flush in mprotect(). This is not
required for some cases, for example, when changing from PROT_NONE to
PROT_READ | PROT_WRITE (a real usecase - glibc malloc does this to emulate
growing into the non-main heaps), and unsetting uffd-wp in a range.
Therefore, implement pte_needs_flush() for arm64, which is already
implemented by some other arches as well.
Running a userspace program changing permissions back and forth between
PROT_NONE and PROT_READ | PROT_WRITE, and measuring the average time taken
for the none->rw transition, I get a reduction from 3.2 microseconds to
2.95 microseconds, giving an 8.5% improvement.
Signed-off-by: Dev Jain <dev.jain@arm.com>
---
mm-selftests pass. Based on 6.17-rc6.
arch/arm64/include/asm/tlbflush.h | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 18a5dc0c9a54..4a566d589100 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -524,6 +524,35 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b
{
__flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3);
}
+
+static inline bool __pte_flags_need_flush(pteval_t oldval, pteval_t newval)
+{
+ pteval_t diff = oldval ^ newval;
+
+ /* invalid to valid transition requires no flush */
+ if (!(oldval & PTE_VALID) || (oldval & PTE_PRESENT_INVALID))
+ return false;
+
+ /* Transition in the SW bits and access flag requires no flush */
+ diff &= ~(PTE_SWBITS_MASK | PTE_AF);
+
+ if (!diff)
+ return false;
+ return true;
+}
+
+static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
+{
+ return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte));
+}
+#define pte_needs_flush pte_needs_flush
+
+static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
+{
+ return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd));
+}
+#define huge_pmd_needs_flush huge_pmd_needs_flush
+
#endif
#endif
--
2.30.2
On 18/09/2025 11:36, Dev Jain wrote: > Currently arm64 does an unconditional TLB flush in mprotect(). This is not > required for some cases, for example, when changing from PROT_NONE to > PROT_READ | PROT_WRITE (a real usecase - glibc malloc does this to emulate > growing into the non-main heaps), and unsetting uffd-wp in a range. > > Therefore, implement pte_needs_flush() for arm64, which is already > implemented by some other arches as well. > > Running a userspace program changing permissions back and forth between > PROT_NONE and PROT_READ | PROT_WRITE, and measuring the average time taken > for the none->rw transition, I get a reduction from 3.2 microseconds to > 2.95 microseconds, giving an 8.5% improvement. > > Signed-off-by: Dev Jain <dev.jain@arm.com> > --- > mm-selftests pass. Based on 6.17-rc6. > > arch/arm64/include/asm/tlbflush.h | 29 +++++++++++++++++++++++++++++ > 1 file changed, 29 insertions(+) > > diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h > index 18a5dc0c9a54..4a566d589100 100644 > --- a/arch/arm64/include/asm/tlbflush.h > +++ b/arch/arm64/include/asm/tlbflush.h > @@ -524,6 +524,35 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b > { > __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3); > } > + > +static inline bool __pte_flags_need_flush(pteval_t oldval, pteval_t newval) ptdesc_t is the preferred any-level type. > +{ > + pteval_t diff = oldval ^ newval; > + > + /* invalid to valid transition requires no flush */ > + if (!(oldval & PTE_VALID) || (oldval & PTE_PRESENT_INVALID)) Is the PTE_PRESENT_INVALID really required? If the oldval was invalid, there can't be a TLB entry for it, so no flush is required; that's it, I think? In fact, PTE_PRESENT_INVALID is overlaid with PTE_NG; it only means PTE_PRESENT_INVALID when PTE_INVALID=0, so I think this is broken as is. Valid user-space PTEs always have PTE_NG set, so you will never flush. > + return false; > + > + /* Transition in the SW bits and access flag requires no flush */ > + diff &= ~(PTE_SWBITS_MASK | PTE_AF); Could you explain your thinking on why PTE_AF changes don't need a flush? I would have thought if we want to clear the access flag, that would definitely require a flush? Otherwise how would the MMU know to set the acccess bit on next access if it already has a TLB entry? > + > + if (!diff) > + return false; > + return true; Perhaps just "return !!diff;" here? Thanks, Ryan > +} > + > +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) > +{ > + return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte)); > +} > +#define pte_needs_flush pte_needs_flush > + > +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) > +{ > + return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd)); > +} > +#define huge_pmd_needs_flush huge_pmd_needs_flush > + > #endif > > #endif
On 18/09/25 6:19 pm, Ryan Roberts wrote: > On 18/09/2025 11:36, Dev Jain wrote: >> Currently arm64 does an unconditional TLB flush in mprotect(). This is not >> required for some cases, for example, when changing from PROT_NONE to >> PROT_READ | PROT_WRITE (a real usecase - glibc malloc does this to emulate >> growing into the non-main heaps), and unsetting uffd-wp in a range. >> >> Therefore, implement pte_needs_flush() for arm64, which is already >> implemented by some other arches as well. >> >> Running a userspace program changing permissions back and forth between >> PROT_NONE and PROT_READ | PROT_WRITE, and measuring the average time taken >> for the none->rw transition, I get a reduction from 3.2 microseconds to >> 2.95 microseconds, giving an 8.5% improvement. >> >> Signed-off-by: Dev Jain <dev.jain@arm.com> >> --- >> mm-selftests pass. Based on 6.17-rc6. >> >> arch/arm64/include/asm/tlbflush.h | 29 +++++++++++++++++++++++++++++ >> 1 file changed, 29 insertions(+) >> >> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h >> index 18a5dc0c9a54..4a566d589100 100644 >> --- a/arch/arm64/include/asm/tlbflush.h >> +++ b/arch/arm64/include/asm/tlbflush.h >> @@ -524,6 +524,35 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b >> { >> __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3); >> } >> + >> +static inline bool __pte_flags_need_flush(pteval_t oldval, pteval_t newval) > ptdesc_t is the preferred any-level type. I keep forgetting this :) > >> +{ >> + pteval_t diff = oldval ^ newval; >> + >> + /* invalid to valid transition requires no flush */ >> + if (!(oldval & PTE_VALID) || (oldval & PTE_PRESENT_INVALID)) > Is the PTE_PRESENT_INVALID really required? If the oldval was invalid, there > can't be a TLB entry for it, so no flush is required; that's it, I think? > > In fact, PTE_PRESENT_INVALID is overlaid with PTE_NG; it only means > PTE_PRESENT_INVALID when PTE_INVALID=0, so I think this is broken as is. Valid > user-space PTEs always have PTE_NG set, so you will never flush. Not sure I get you. The condition as I wrote means 1. If PTE_VALID is not set, then do not flush. 2. If PTE_VALID is set, *and* PTE_PRESENT_INVALID is set, then do not flush. So when you say "it only means PTE_PRESENT_INVALID when PTE_INVALID=0", the second condition meets that. > >> + return false; >> + >> + /* Transition in the SW bits and access flag requires no flush */ >> + diff &= ~(PTE_SWBITS_MASK | PTE_AF); > Could you explain your thinking on why PTE_AF changes don't need a flush? I > would have thought if we want to clear the access flag, that would definitely > require a flush? Otherwise how would the MMU know to set the acccess bit on next > access if it already has a TLB entry? You are correct, but AFAIK losing access bit information is not fatal, it will only mess with page aging. So potentially reclaim will lose some accuracy. > >> + >> + if (!diff) >> + return false; >> + return true; > Perhaps just "return !!diff;" here? Sure. > > Thanks, > Ryan > > >> +} >> + >> +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) >> +{ >> + return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte)); >> +} >> +#define pte_needs_flush pte_needs_flush >> + >> +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) >> +{ >> + return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd)); >> +} >> +#define huge_pmd_needs_flush huge_pmd_needs_flush >> + >> #endif >> >> #endif
On 18/09/2025 16:04, Dev Jain wrote: > > On 18/09/25 6:19 pm, Ryan Roberts wrote: >> On 18/09/2025 11:36, Dev Jain wrote: >>> Currently arm64 does an unconditional TLB flush in mprotect(). This is not >>> required for some cases, for example, when changing from PROT_NONE to >>> PROT_READ | PROT_WRITE (a real usecase - glibc malloc does this to emulate >>> growing into the non-main heaps), and unsetting uffd-wp in a range. >>> >>> Therefore, implement pte_needs_flush() for arm64, which is already >>> implemented by some other arches as well. >>> >>> Running a userspace program changing permissions back and forth between >>> PROT_NONE and PROT_READ | PROT_WRITE, and measuring the average time taken >>> for the none->rw transition, I get a reduction from 3.2 microseconds to >>> 2.95 microseconds, giving an 8.5% improvement. >>> >>> Signed-off-by: Dev Jain <dev.jain@arm.com> >>> --- >>> mm-selftests pass. Based on 6.17-rc6. >>> >>> arch/arm64/include/asm/tlbflush.h | 29 +++++++++++++++++++++++++++++ >>> 1 file changed, 29 insertions(+) >>> >>> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/ >>> tlbflush.h >>> index 18a5dc0c9a54..4a566d589100 100644 >>> --- a/arch/arm64/include/asm/tlbflush.h >>> +++ b/arch/arm64/include/asm/tlbflush.h >>> @@ -524,6 +524,35 @@ static inline void arch_tlbbatch_add_pending(struct >>> arch_tlbflush_unmap_batch *b >>> { >>> __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3); >>> } >>> + >>> +static inline bool __pte_flags_need_flush(pteval_t oldval, pteval_t newval) >> ptdesc_t is the preferred any-level type. > > I keep forgetting this :) > >> >>> +{ >>> + pteval_t diff = oldval ^ newval; >>> + >>> + /* invalid to valid transition requires no flush */ >>> + if (!(oldval & PTE_VALID) || (oldval & PTE_PRESENT_INVALID)) >> Is the PTE_PRESENT_INVALID really required? If the oldval was invalid, there >> can't be a TLB entry for it, so no flush is required; that's it, I think? >> >> In fact, PTE_PRESENT_INVALID is overlaid with PTE_NG; it only means >> PTE_PRESENT_INVALID when PTE_INVALID=0, so I think this is broken as is. Valid >> user-space PTEs always have PTE_NG set, so you will never flush. > > Not sure I get you. The condition as I wrote means > > 1. If PTE_VALID is not set, then do not flush. > 2. If PTE_VALID is set, *and* PTE_PRESENT_INVALID is set, then do not flush. > > So when you say "it only means PTE_PRESENT_INVALID when PTE_INVALID=0", the > second condition meets that. Sorry I meant PTE_VALID=0. Your second condition is wrong; PTE_PRESENT_INVALID is only defined when PTE_VALID=0. Think about it; the PTE is valid from the HW's perspective if and only if PTE_VALID=1. So that's the only condition that needs to be checked. See this comment in the code for more info: /* * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be * interpreted according to the HW layout by SW but any attempted HW access to * the address will result in a fault. pte_present() returns true. */ #define PTE_PRESENT_INVALID (PTE_NG) /* only when !PTE_VALID */ > >> >>> + return false; >>> + >>> + /* Transition in the SW bits and access flag requires no flush */ >>> + diff &= ~(PTE_SWBITS_MASK | PTE_AF); >> Could you explain your thinking on why PTE_AF changes don't need a flush? I >> would have thought if we want to clear the access flag, that would definitely >> require a flush? Otherwise how would the MMU know to set the acccess bit on next >> access if it already has a TLB entry? > > You are correct, but AFAIK losing access bit information is not fatal, it will only > mess with page aging. So potentially reclaim will lose some accuracy. Sure, but it means that your change has a cost; reduced page aging accuracy. That part of the change should at least be separated into its own commit and probably backed up with performance numbers. Otherwise, I think we should retain the original behaviour. > >> >>> + >>> + if (!diff) >>> + return false; >>> + return true; >> Perhaps just "return !!diff;" here? > > Sure. > >> >> Thanks, >> Ryan >> >> >>> +} >>> + >>> +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) >>> +{ >>> + return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte)); >>> +} >>> +#define pte_needs_flush pte_needs_flush >>> + >>> +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) >>> +{ >>> + return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd)); >>> +} >>> +#define huge_pmd_needs_flush huge_pmd_needs_flush >>> + >>> #endif >>> #endif
On 18/09/25 8:52 pm, Ryan Roberts wrote: > On 18/09/2025 16:04, Dev Jain wrote: >> On 18/09/25 6:19 pm, Ryan Roberts wrote: >>> On 18/09/2025 11:36, Dev Jain wrote: >>>> Currently arm64 does an unconditional TLB flush in mprotect(). This is not >>>> required for some cases, for example, when changing from PROT_NONE to >>>> PROT_READ | PROT_WRITE (a real usecase - glibc malloc does this to emulate >>>> growing into the non-main heaps), and unsetting uffd-wp in a range. >>>> >>>> Therefore, implement pte_needs_flush() for arm64, which is already >>>> implemented by some other arches as well. >>>> >>>> Running a userspace program changing permissions back and forth between >>>> PROT_NONE and PROT_READ | PROT_WRITE, and measuring the average time taken >>>> for the none->rw transition, I get a reduction from 3.2 microseconds to >>>> 2.95 microseconds, giving an 8.5% improvement. >>>> >>>> Signed-off-by: Dev Jain <dev.jain@arm.com> >>>> --- >>>> mm-selftests pass. Based on 6.17-rc6. >>>> >>>> arch/arm64/include/asm/tlbflush.h | 29 +++++++++++++++++++++++++++++ >>>> 1 file changed, 29 insertions(+) >>>> >>>> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/ >>>> tlbflush.h >>>> index 18a5dc0c9a54..4a566d589100 100644 >>>> --- a/arch/arm64/include/asm/tlbflush.h >>>> +++ b/arch/arm64/include/asm/tlbflush.h >>>> @@ -524,6 +524,35 @@ static inline void arch_tlbbatch_add_pending(struct >>>> arch_tlbflush_unmap_batch *b >>>> { >>>> __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3); >>>> } >>>> + >>>> +static inline bool __pte_flags_need_flush(pteval_t oldval, pteval_t newval) >>> ptdesc_t is the preferred any-level type. >> I keep forgetting this :) >> >>>> +{ >>>> + pteval_t diff = oldval ^ newval; >>>> + >>>> + /* invalid to valid transition requires no flush */ >>>> + if (!(oldval & PTE_VALID) || (oldval & PTE_PRESENT_INVALID)) >>> Is the PTE_PRESENT_INVALID really required? If the oldval was invalid, there >>> can't be a TLB entry for it, so no flush is required; that's it, I think? >>> >>> In fact, PTE_PRESENT_INVALID is overlaid with PTE_NG; it only means >>> PTE_PRESENT_INVALID when PTE_INVALID=0, so I think this is broken as is. Valid >>> user-space PTEs always have PTE_NG set, so you will never flush. >> Not sure I get you. The condition as I wrote means >> >> 1. If PTE_VALID is not set, then do not flush. >> 2. If PTE_VALID is set, *and* PTE_PRESENT_INVALID is set, then do not flush. >> >> So when you say "it only means PTE_PRESENT_INVALID when PTE_INVALID=0", the >> second condition meets that. > Sorry I meant PTE_VALID=0. Your second condition is wrong; PTE_PRESENT_INVALID > is only defined when PTE_VALID=0. > > Think about it; the PTE is valid from the HW's perspective if and only if > PTE_VALID=1. So that's the only condition that needs to be checked. > > > See this comment in the code for more info: > > /* > * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be > * interpreted according to the HW layout by SW but any attempted HW access to > * the address will result in a fault. pte_present() returns true. > */ > #define PTE_PRESENT_INVALID (PTE_NG) /* only when !PTE_VALID */ Got it, thanks. > >>>> + return false; >>>> + >>>> + /* Transition in the SW bits and access flag requires no flush */ >>>> + diff &= ~(PTE_SWBITS_MASK | PTE_AF); >>> Could you explain your thinking on why PTE_AF changes don't need a flush? I >>> would have thought if we want to clear the access flag, that would definitely >>> require a flush? Otherwise how would the MMU know to set the acccess bit on next >>> access if it already has a TLB entry? >> You are correct, but AFAIK losing access bit information is not fatal, it will only >> mess with page aging. So potentially reclaim will lose some accuracy. > Sure, but it means that your change has a cost; reduced page aging accuracy. > That part of the change should at least be separated into its own commit and > probably backed up with performance numbers. Otherwise, I think we should retain > the original behaviour. You are right, I checked other callsites clearing the access bit and they do some sort of TLB maintenance. I'll drop this. > > >>>> + >>>> + if (!diff) >>>> + return false; >>>> + return true; >>> Perhaps just "return !!diff;" here? >> Sure. >> >>> Thanks, >>> Ryan >>> >>> >>>> +} >>>> + >>>> +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) >>>> +{ >>>> + return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte)); >>>> +} >>>> +#define pte_needs_flush pte_needs_flush >>>> + >>>> +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) >>>> +{ >>>> + return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd)); >>>> +} >>>> +#define huge_pmd_needs_flush huge_pmd_needs_flush >>>> + >>>> #endif >>>> #endif
On 2025/9/18 18:36, Dev Jain wrote: > Currently arm64 does an unconditional TLB flush in mprotect(). This is not > required for some cases, for example, when changing from PROT_NONE to > PROT_READ | PROT_WRITE (a real usecase - glibc malloc does this to emulate > growing into the non-main heaps), and unsetting uffd-wp in a range. > > Therefore, implement pte_needs_flush() for arm64, which is already > implemented by some other arches as well. > > Running a userspace program changing permissions back and forth between > PROT_NONE and PROT_READ | PROT_WRITE, and measuring the average time taken > for the none->rw transition, I get a reduction from 3.2 microseconds to > 2.95 microseconds, giving an 8.5% improvement. > Hi Dev, > Signed-off-by: Dev Jain <dev.jain@arm.com> > --- > mm-selftests pass. Based on 6.17-rc6. > > arch/arm64/include/asm/tlbflush.h | 29 +++++++++++++++++++++++++++++ > 1 file changed, 29 insertions(+) > > diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h > index 18a5dc0c9a54..4a566d589100 100644 > --- a/arch/arm64/include/asm/tlbflush.h > +++ b/arch/arm64/include/asm/tlbflush.h > @@ -524,6 +524,35 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b > { > __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3); > } > + > +static inline bool __pte_flags_need_flush(pteval_t oldval, pteval_t newval) > +{ > + pteval_t diff = oldval ^ newval; > + > + /* invalid to valid transition requires no flush */ > + if (!(oldval & PTE_VALID) || (oldval & PTE_PRESENT_INVALID)) > + return false; > + > + /* Transition in the SW bits and access flag requires no flush */ > + diff &= ~(PTE_SWBITS_MASK | PTE_AF); > + > + if (!diff) > + return false; > + return true; > +} > + LibMicro mprotect testcase show 3~5% improvement with different size in old kernel(we did this before, but only check PTE_VALID and PTE_PROT_NONE in our kernel), it seems that no one change other sw bit by mprotect? Anyway, Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com> > +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) > +{ > + return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte)); > +} > +#define pte_needs_flush pte_needs_flush > + > +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) > +{ > + return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd)); > +} > +#define huge_pmd_needs_flush huge_pmd_needs_flush > + > #endif > > #endif
On 18/09/25 5:52 pm, Kefeng Wang wrote: > > > On 2025/9/18 18:36, Dev Jain wrote: >> Currently arm64 does an unconditional TLB flush in mprotect(). This >> is not >> required for some cases, for example, when changing from PROT_NONE to >> PROT_READ | PROT_WRITE (a real usecase - glibc malloc does this to >> emulate >> growing into the non-main heaps), and unsetting uffd-wp in a range. >> >> Therefore, implement pte_needs_flush() for arm64, which is already >> implemented by some other arches as well. >> >> Running a userspace program changing permissions back and forth between >> PROT_NONE and PROT_READ | PROT_WRITE, and measuring the average time >> taken >> for the none->rw transition, I get a reduction from 3.2 microseconds to >> 2.95 microseconds, giving an 8.5% improvement. >> > > Hi Dev, > >> Signed-off-by: Dev Jain <dev.jain@arm.com> >> --- >> mm-selftests pass. Based on 6.17-rc6. >> >> arch/arm64/include/asm/tlbflush.h | 29 +++++++++++++++++++++++++++++ >> 1 file changed, 29 insertions(+) >> >> diff --git a/arch/arm64/include/asm/tlbflush.h >> b/arch/arm64/include/asm/tlbflush.h >> index 18a5dc0c9a54..4a566d589100 100644 >> --- a/arch/arm64/include/asm/tlbflush.h >> +++ b/arch/arm64/include/asm/tlbflush.h >> @@ -524,6 +524,35 @@ static inline void >> arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b >> { >> __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3); >> } >> + >> +static inline bool __pte_flags_need_flush(pteval_t oldval, pteval_t >> newval) >> +{ >> + pteval_t diff = oldval ^ newval; >> + >> + /* invalid to valid transition requires no flush */ >> + if (!(oldval & PTE_VALID) || (oldval & PTE_PRESENT_INVALID)) >> + return false; >> + >> + /* Transition in the SW bits and access flag requires no flush */ >> + diff &= ~(PTE_SWBITS_MASK | PTE_AF); >> + >> + if (!diff) >> + return false; >> + return true; >> +} >> + > > LibMicro mprotect testcase show 3~5% improvement with different size in > old kernel(we did this before, but only check PTE_VALID and > PTE_PROT_NONE in our kernel), it seems that no one change other sw bit > by mprotect? Not mprotect, but when unsetting uffd-wp, we do mwriteprotect_range -> uffd_wp_range -> change_protection() with MM_CP_UFFD_WP_RESOLVE set. > > Anyway, Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com> Thanks! > >> +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) >> +{ >> + return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte)); >> +} >> +#define pte_needs_flush pte_needs_flush >> + >> +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) >> +{ >> + return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd)); >> +} >> +#define huge_pmd_needs_flush huge_pmd_needs_flush >> + >> #endif >> #endif >
© 2016 - 2025 Red Hat, Inc.