Flushing a page from the tlb is just a special case of flushing a range.
So let's rework flush_tlb_page() so that it simply wraps
___flush_tlb_range(). While at it, let's also update the API to take the
same flags that we use when flushing a range. This allows us to delete
all the ugly "_nosync", "_local" and "_nonotify" variants.
Thanks to constant folding, all of the complex looping and tlbi-by-range
options get eliminated so that the generated code for flush_tlb_page()
looks very similar to the previous version.
Reviewed-by: Linu Cherian <linu.cherian@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
---
arch/arm64/include/asm/pgtable.h | 6 +--
arch/arm64/include/asm/tlbflush.h | 81 ++++++++++---------------------
arch/arm64/mm/fault.c | 2 +-
3 files changed, 29 insertions(+), 60 deletions(-)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 736747fbc843..b96a7ca465a1 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -136,10 +136,10 @@ static inline void arch_leave_lazy_mmu_mode(void)
* entries exist.
*/
#define flush_tlb_fix_spurious_fault(vma, address, ptep) \
- local_flush_tlb_page_nonotify(vma, address)
+ __flush_tlb_page(vma, address, TLBF_NOBROADCAST | TLBF_NONOTIFY)
#define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) \
- local_flush_tlb_page_nonotify(vma, address)
+ __flush_tlb_page(vma, address, TLBF_NOBROADCAST | TLBF_NONOTIFY)
/*
* ZERO_PAGE is a global shared page that is always zero: used
@@ -1351,7 +1351,7 @@ static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
* context-switch, which provides a DSB to complete the TLB
* invalidation.
*/
- flush_tlb_page_nosync(vma, address);
+ __flush_tlb_page(vma, address, TLBF_NOSYNC);
}
return young;
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index f03831cd8719..88f46760e2c2 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -255,10 +255,7 @@ static inline void __tlbi_level(tlbi_op op, u64 addr, u32 level)
* unmapping pages from vmalloc/io space.
*
* flush_tlb_page(vma, addr)
- * Invalidate a single user mapping for address 'addr' in the
- * address space corresponding to 'vma->mm'. Note that this
- * operation only invalidates a single, last-level page-table
- * entry and therefore does not affect any walk-caches.
+ * Equivalent to __flush_tlb_page(..., flags=TLBF_NONE)
*
*
* Next, we have some undocumented invalidation routines that you probably
@@ -286,13 +283,14 @@ static inline void __tlbi_level(tlbi_op op, u64 addr, u32 level)
* TLBF_NOSYNC (don't issue trailing dsb) and TLBF_NOBROADCAST
* (only perform the invalidation for the local cpu).
*
- * local_flush_tlb_page(vma, addr)
- * Local variant of flush_tlb_page(). Stale TLB entries may
- * remain in remote CPUs.
- *
- * local_flush_tlb_page_nonotify(vma, addr)
- * Same as local_flush_tlb_page() except MMU notifier will not be
- * called.
+ * __flush_tlb_page(vma, addr, flags)
+ * Invalidate a single user mapping for address 'addr' in the
+ * address space corresponding to 'vma->mm'. Note that this
+ * operation only invalidates a single, last-level page-table entry
+ * and therefore does not affect any walk-caches. flags may contain
+ * any combination of TLBF_NONOTIFY (don't call mmu notifiers),
+ * TLBF_NOSYNC (don't issue trailing dsb) and TLBF_NOBROADCAST
+ * (only perform the invalidation for the local cpu).
*
* Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
* on top of these routines, since that is our interface to the mmu_gather
@@ -326,51 +324,6 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
-static inline void __local_flush_tlb_page_nonotify_nosync(struct mm_struct *mm,
- unsigned long uaddr)
-{
- dsb(nshst);
- __tlbi_level_asid(vale1, uaddr, TLBI_TTL_UNKNOWN, ASID(mm));
-}
-
-static inline void local_flush_tlb_page_nonotify(struct vm_area_struct *vma,
- unsigned long uaddr)
-{
- __local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr);
- dsb(nsh);
-}
-
-static inline void local_flush_tlb_page(struct vm_area_struct *vma,
- unsigned long uaddr)
-{
- __local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr);
- mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, uaddr & PAGE_MASK,
- (uaddr & PAGE_MASK) + PAGE_SIZE);
- dsb(nsh);
-}
-
-static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
- unsigned long uaddr)
-{
- dsb(ishst);
- __tlbi_level_asid(vale1is, uaddr, TLBI_TTL_UNKNOWN, ASID(mm));
- mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK,
- (uaddr & PAGE_MASK) + PAGE_SIZE);
-}
-
-static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
- unsigned long uaddr)
-{
- return __flush_tlb_page_nosync(vma->vm_mm, uaddr);
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
- unsigned long uaddr)
-{
- flush_tlb_page_nosync(vma, uaddr);
- dsb(ish);
-}
-
static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
{
/*
@@ -633,6 +586,22 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
__flush_tlb_range(vma, start, end, PAGE_SIZE, TLBI_TTL_UNKNOWN, TLBF_NONE);
}
+static inline void __flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long uaddr, tlbf_t flags)
+{
+ unsigned long start = round_down(uaddr, PAGE_SIZE);
+ unsigned long end = start + PAGE_SIZE;
+
+ ___flush_tlb_range(vma, start, end, PAGE_SIZE, TLBI_TTL_UNKNOWN,
+ TLBF_NOWALKCACHE | flags);
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long uaddr)
+{
+ __flush_tlb_page(vma, uaddr, TLBF_NONE);
+}
+
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
const unsigned long stride = PAGE_SIZE;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index be9dab2c7d6a..f91aa686f142 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -239,7 +239,7 @@ int __ptep_set_access_flags(struct vm_area_struct *vma,
* flush_tlb_fix_spurious_fault().
*/
if (dirty)
- local_flush_tlb_page(vma, address);
+ __flush_tlb_page(vma, address, TLBF_NOBROADCAST);
return 1;
}
--
2.43.0
On Mon, 19 Jan 2026 17:21:59 +0000
Ryan Roberts <ryan.roberts@arm.com> wrote:
> Flushing a page from the tlb is just a special case of flushing a range.
> So let's rework flush_tlb_page() so that it simply wraps
> ___flush_tlb_range(). While at it, let's also update the API to take the
> same flags that we use when flushing a range. This allows us to delete
> all the ugly "_nosync", "_local" and "_nonotify" variants.
>
> Thanks to constant folding, all of the complex looping and tlbi-by-range
> options get eliminated so that the generated code for flush_tlb_page()
> looks very similar to the previous version.
>
> Reviewed-by: Linu Cherian <linu.cherian@arm.com>
> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
So this does include the use of the
Case TLBF_NOBROADCAST from previous patch, but only whilst (I think)
slightly changing behavior.
Gah. I'm regretting looking at this series. The original code is really hard to
read :) Rather you than me to fix it!
> static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> {
> const unsigned long stride = PAGE_SIZE;
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index be9dab2c7d6a..f91aa686f142 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -239,7 +239,7 @@ int __ptep_set_access_flags(struct vm_area_struct *vma,
> * flush_tlb_fix_spurious_fault().
> */
> if (dirty)
> - local_flush_tlb_page(vma, address);
> + __flush_tlb_page(vma, address, TLBF_NOBROADCAST);
Ultimately I think this previously did __tlbi(vale1) and now does __tlbi(vae1)
Original call was to __local_flush_tlb_page_notify_nosync()
I'd like to see that sort of change called out and explained in the patch description.
It's a broader scoped flush so not a bug, but still a functional change.
> return 1;
> }
>
On 27/01/2026 12:59, Jonathan Cameron wrote:
> On Mon, 19 Jan 2026 17:21:59 +0000
> Ryan Roberts <ryan.roberts@arm.com> wrote:
>
>> Flushing a page from the tlb is just a special case of flushing a range.
>> So let's rework flush_tlb_page() so that it simply wraps
>> ___flush_tlb_range(). While at it, let's also update the API to take the
>> same flags that we use when flushing a range. This allows us to delete
>> all the ugly "_nosync", "_local" and "_nonotify" variants.
>>
>> Thanks to constant folding, all of the complex looping and tlbi-by-range
>> options get eliminated so that the generated code for flush_tlb_page()
>> looks very similar to the previous version.
>>
>> Reviewed-by: Linu Cherian <linu.cherian@arm.com>
>> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
>
> So this does include the use of the
>
> Case TLBF_NOBROADCAST from previous patch, but only whilst (I think)
> slightly changing behavior.
>
> Gah. I'm regretting looking at this series. The original code is really hard to
> read :) Rather you than me to fix it!
>
>> static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>> {
>> const unsigned long stride = PAGE_SIZE;
>> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
>> index be9dab2c7d6a..f91aa686f142 100644
>> --- a/arch/arm64/mm/fault.c
>> +++ b/arch/arm64/mm/fault.c
>> @@ -239,7 +239,7 @@ int __ptep_set_access_flags(struct vm_area_struct *vma,
>> * flush_tlb_fix_spurious_fault().
>> */
>> if (dirty)
>> - local_flush_tlb_page(vma, address);
>> + __flush_tlb_page(vma, address, TLBF_NOBROADCAST);
>
> Ultimately I think this previously did __tlbi(vale1) and now does __tlbi(vae1)
> Original call was to __local_flush_tlb_page_notify_nosync()
No not quite; the new code is still doing __tlbi(vale1).
The trick is that the __flush_tlb_page() wrapper unconditionally adds
TLBF_NOWALKCACHE to the flags. Since this API is operating on a *page* it is
implicit that we should only be evicting a leaf entry (as per the old
implementation).
You'll see I've also updated the documentation to make that clear in tlbflush.h.
Now that you have raised it, I can see how it might be confusing though, since
__flush_tlb_page() does not explicitly have TLBF_NOWALKCACHE. We could require
all __flush_tlb_page() callers to explicitly pass TLBF_NOWALKCACHE if you think
that helps? It would still be implicit for flush_tlb_page() (the generic kernel
API) though.
>
> I'd like to see that sort of change called out and explained in the patch description.
> It's a broader scoped flush so not a bug, but still a functional change.
As I say, the emitted code is the same. It's my new API that's the problem here...
Thanks,
Ryan
>
>> return 1;
>> }
>>
>
On Tue, 27 Jan 2026 14:03:43 +0000
Ryan Roberts <ryan.roberts@arm.com> wrote:
> On 27/01/2026 12:59, Jonathan Cameron wrote:
> > On Mon, 19 Jan 2026 17:21:59 +0000
> > Ryan Roberts <ryan.roberts@arm.com> wrote:
> >
> >> Flushing a page from the tlb is just a special case of flushing a range.
> >> So let's rework flush_tlb_page() so that it simply wraps
> >> ___flush_tlb_range(). While at it, let's also update the API to take the
> >> same flags that we use when flushing a range. This allows us to delete
> >> all the ugly "_nosync", "_local" and "_nonotify" variants.
> >>
> >> Thanks to constant folding, all of the complex looping and tlbi-by-range
> >> options get eliminated so that the generated code for flush_tlb_page()
> >> looks very similar to the previous version.
> >>
> >> Reviewed-by: Linu Cherian <linu.cherian@arm.com>
> >> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
> >
> > So this does include the use of the
> >
> > Case TLBF_NOBROADCAST from previous patch, but only whilst (I think)
> > slightly changing behavior.
> >
> > Gah. I'm regretting looking at this series. The original code is really hard to
> > read :) Rather you than me to fix it!
> >
> >> static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> >> {
> >> const unsigned long stride = PAGE_SIZE;
> >> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> >> index be9dab2c7d6a..f91aa686f142 100644
> >> --- a/arch/arm64/mm/fault.c
> >> +++ b/arch/arm64/mm/fault.c
> >> @@ -239,7 +239,7 @@ int __ptep_set_access_flags(struct vm_area_struct *vma,
> >> * flush_tlb_fix_spurious_fault().
> >> */
> >> if (dirty)
> >> - local_flush_tlb_page(vma, address);
> >> + __flush_tlb_page(vma, address, TLBF_NOBROADCAST);
> >
> > Ultimately I think this previously did __tlbi(vale1) and now does __tlbi(vae1)
> > Original call was to __local_flush_tlb_page_notify_nosync()
>
> No not quite; the new code is still doing __tlbi(vale1).
>
> The trick is that the __flush_tlb_page() wrapper unconditionally adds
> TLBF_NOWALKCACHE to the flags. Since this API is operating on a *page* it is
> implicit that we should only be evicting a leaf entry (as per the old
> implementation).
>
> You'll see I've also updated the documentation to make that clear in tlbflush.h.
>
> Now that you have raised it, I can see how it might be confusing though, since
> __flush_tlb_page() does not explicitly have TLBF_NOWALKCACHE. We could require
> all __flush_tlb_page() callers to explicitly pass TLBF_NOWALKCACHE if you think
> that helps? It would still be implicit for flush_tlb_page() (the generic kernel
> API) though.
Ah. I'd indeed missed that tweaking of the flags.
Not sure. You probably have a better feel for this ABI than I do and the likely
expectations of users.
J
>
> >
> > I'd like to see that sort of change called out and explained in the patch description.
> > It's a broader scoped flush so not a bug, but still a functional change.
>
> As I say, the emitted code is the same. It's my new API that's the problem here...
>
> Thanks,
> Ryan
>
> >
> >> return 1;
> >> }
> >>
> >
>
>
© 2016 - 2026 Red Hat, Inc.