[PATCH v2 12/13] arm64: mm: Wrap flush_tlb_page() around ___flush_tlb_range()

Ryan Roberts posted 13 patches 2 weeks, 6 days ago
[PATCH v2 12/13] arm64: mm: Wrap flush_tlb_page() around ___flush_tlb_range()
Posted by Ryan Roberts 2 weeks, 6 days ago
Flushing a page from the tlb is just a special case of flushing a range.
So let's rework flush_tlb_page() so that it simply wraps
___flush_tlb_range(). While at it, let's also update the API to take the
same flags that we use when flushing a range. This allows us to delete
all the ugly "_nosync", "_local" and "_nonotify" variants.

Thanks to constant folding, all of the complex looping and tlbi-by-range
options get eliminated so that the generated code for flush_tlb_page()
looks very similar to the previous version.

Reviewed-by: Linu Cherian <linu.cherian@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
---
 arch/arm64/include/asm/pgtable.h  |  6 +--
 arch/arm64/include/asm/tlbflush.h | 81 ++++++++++---------------------
 arch/arm64/mm/fault.c             |  2 +-
 3 files changed, 29 insertions(+), 60 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 736747fbc843..b96a7ca465a1 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -136,10 +136,10 @@ static inline void arch_leave_lazy_mmu_mode(void)
  * entries exist.
  */
 #define flush_tlb_fix_spurious_fault(vma, address, ptep)	\
-	local_flush_tlb_page_nonotify(vma, address)
+	__flush_tlb_page(vma, address, TLBF_NOBROADCAST | TLBF_NONOTIFY)
 
 #define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp)	\
-	local_flush_tlb_page_nonotify(vma, address)
+	__flush_tlb_page(vma, address, TLBF_NOBROADCAST | TLBF_NONOTIFY)
 
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
@@ -1351,7 +1351,7 @@ static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
 		 * context-switch, which provides a DSB to complete the TLB
 		 * invalidation.
 		 */
-		flush_tlb_page_nosync(vma, address);
+		__flush_tlb_page(vma, address, TLBF_NOSYNC);
 	}
 
 	return young;
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index f03831cd8719..88f46760e2c2 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -255,10 +255,7 @@ static inline void __tlbi_level(tlbi_op op, u64 addr, u32 level)
  *		unmapping pages from vmalloc/io space.
  *
  *	flush_tlb_page(vma, addr)
- *		Invalidate a single user mapping for address 'addr' in the
- *		address space corresponding to 'vma->mm'.  Note that this
- *		operation only invalidates a single, last-level page-table
- *		entry and therefore does not affect any walk-caches.
+ *		Equivalent to __flush_tlb_page(..., flags=TLBF_NONE)
  *
  *
  *	Next, we have some undocumented invalidation routines that you probably
@@ -286,13 +283,14 @@ static inline void __tlbi_level(tlbi_op op, u64 addr, u32 level)
  *		TLBF_NOSYNC (don't issue trailing dsb) and TLBF_NOBROADCAST
  *		(only perform the invalidation for the local cpu).
  *
- *	local_flush_tlb_page(vma, addr)
- *		Local variant of flush_tlb_page().  Stale TLB entries may
- *		remain in remote CPUs.
- *
- *	local_flush_tlb_page_nonotify(vma, addr)
- *		Same as local_flush_tlb_page() except MMU notifier will not be
- *		called.
+ *	__flush_tlb_page(vma, addr, flags)
+ *		Invalidate a single user mapping for address 'addr' in the
+ *		address space corresponding to 'vma->mm'.  Note that this
+ *		operation only invalidates a single, last-level page-table entry
+ *		and therefore does not affect any walk-caches. flags may contain
+ *		any combination of TLBF_NONOTIFY (don't call mmu notifiers),
+ *		TLBF_NOSYNC (don't issue trailing dsb) and TLBF_NOBROADCAST
+ *		(only perform the invalidation for the local cpu).
  *
  *	Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
  *	on top of these routines, since that is our interface to the mmu_gather
@@ -326,51 +324,6 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
 }
 
-static inline void __local_flush_tlb_page_nonotify_nosync(struct mm_struct *mm,
-							  unsigned long uaddr)
-{
-	dsb(nshst);
-	__tlbi_level_asid(vale1, uaddr, TLBI_TTL_UNKNOWN, ASID(mm));
-}
-
-static inline void local_flush_tlb_page_nonotify(struct vm_area_struct *vma,
-						 unsigned long uaddr)
-{
-	__local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr);
-	dsb(nsh);
-}
-
-static inline void local_flush_tlb_page(struct vm_area_struct *vma,
-					unsigned long uaddr)
-{
-	__local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr);
-	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, uaddr & PAGE_MASK,
-						(uaddr & PAGE_MASK) + PAGE_SIZE);
-	dsb(nsh);
-}
-
-static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
-					   unsigned long uaddr)
-{
-	dsb(ishst);
-	__tlbi_level_asid(vale1is, uaddr, TLBI_TTL_UNKNOWN, ASID(mm));
-	mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK,
-						(uaddr & PAGE_MASK) + PAGE_SIZE);
-}
-
-static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
-					 unsigned long uaddr)
-{
-	return __flush_tlb_page_nosync(vma->vm_mm, uaddr);
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
-				  unsigned long uaddr)
-{
-	flush_tlb_page_nosync(vma, uaddr);
-	dsb(ish);
-}
-
 static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
 {
 	/*
@@ -633,6 +586,22 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 	__flush_tlb_range(vma, start, end, PAGE_SIZE, TLBI_TTL_UNKNOWN, TLBF_NONE);
 }
 
+static inline void __flush_tlb_page(struct vm_area_struct *vma,
+				    unsigned long uaddr, tlbf_t flags)
+{
+	unsigned long start = round_down(uaddr, PAGE_SIZE);
+	unsigned long end = start + PAGE_SIZE;
+
+	___flush_tlb_range(vma, start, end, PAGE_SIZE, TLBI_TTL_UNKNOWN,
+			   TLBF_NOWALKCACHE | flags);
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long uaddr)
+{
+	__flush_tlb_page(vma, uaddr, TLBF_NONE);
+}
+
 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
 	const unsigned long stride = PAGE_SIZE;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index be9dab2c7d6a..f91aa686f142 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -239,7 +239,7 @@ int __ptep_set_access_flags(struct vm_area_struct *vma,
 	 * flush_tlb_fix_spurious_fault().
 	 */
 	if (dirty)
-		local_flush_tlb_page(vma, address);
+		__flush_tlb_page(vma, address, TLBF_NOBROADCAST);
 	return 1;
 }
 
-- 
2.43.0
Re: [PATCH v2 12/13] arm64: mm: Wrap flush_tlb_page() around ___flush_tlb_range()
Posted by Jonathan Cameron 1 week, 5 days ago
On Mon, 19 Jan 2026 17:21:59 +0000
Ryan Roberts <ryan.roberts@arm.com> wrote:

> Flushing a page from the tlb is just a special case of flushing a range.
> So let's rework flush_tlb_page() so that it simply wraps
> ___flush_tlb_range(). While at it, let's also update the API to take the
> same flags that we use when flushing a range. This allows us to delete
> all the ugly "_nosync", "_local" and "_nonotify" variants.
> 
> Thanks to constant folding, all of the complex looping and tlbi-by-range
> options get eliminated so that the generated code for flush_tlb_page()
> looks very similar to the previous version.
> 
> Reviewed-by: Linu Cherian <linu.cherian@arm.com>
> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>

So this does include the use of the

Case TLBF_NOBROADCAST from previous patch, but only whilst (I think)
slightly changing behavior.

Gah.  I'm regretting looking at this series. The original code is really hard to
read :)  Rather you than me to fix it!

>  static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>  {
>  	const unsigned long stride = PAGE_SIZE;
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index be9dab2c7d6a..f91aa686f142 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -239,7 +239,7 @@ int __ptep_set_access_flags(struct vm_area_struct *vma,
>  	 * flush_tlb_fix_spurious_fault().
>  	 */
>  	if (dirty)
> -		local_flush_tlb_page(vma, address);
> +		__flush_tlb_page(vma, address, TLBF_NOBROADCAST);

Ultimately I think this previously did __tlbi(vale1) and now does __tlbi(vae1)
Original call was to __local_flush_tlb_page_notify_nosync()

I'd like to see that sort of change called out and explained in the patch description.
It's a broader scoped flush so not a bug, but still a functional change.

>  	return 1;
>  }
>
Re: [PATCH v2 12/13] arm64: mm: Wrap flush_tlb_page() around ___flush_tlb_range()
Posted by Ryan Roberts 1 week, 5 days ago
On 27/01/2026 12:59, Jonathan Cameron wrote:
> On Mon, 19 Jan 2026 17:21:59 +0000
> Ryan Roberts <ryan.roberts@arm.com> wrote:
> 
>> Flushing a page from the tlb is just a special case of flushing a range.
>> So let's rework flush_tlb_page() so that it simply wraps
>> ___flush_tlb_range(). While at it, let's also update the API to take the
>> same flags that we use when flushing a range. This allows us to delete
>> all the ugly "_nosync", "_local" and "_nonotify" variants.
>>
>> Thanks to constant folding, all of the complex looping and tlbi-by-range
>> options get eliminated so that the generated code for flush_tlb_page()
>> looks very similar to the previous version.
>>
>> Reviewed-by: Linu Cherian <linu.cherian@arm.com>
>> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
> 
> So this does include the use of the
> 
> Case TLBF_NOBROADCAST from previous patch, but only whilst (I think)
> slightly changing behavior.
> 
> Gah.  I'm regretting looking at this series. The original code is really hard to
> read :)  Rather you than me to fix it!
> 
>>  static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>>  {
>>  	const unsigned long stride = PAGE_SIZE;
>> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
>> index be9dab2c7d6a..f91aa686f142 100644
>> --- a/arch/arm64/mm/fault.c
>> +++ b/arch/arm64/mm/fault.c
>> @@ -239,7 +239,7 @@ int __ptep_set_access_flags(struct vm_area_struct *vma,
>>  	 * flush_tlb_fix_spurious_fault().
>>  	 */
>>  	if (dirty)
>> -		local_flush_tlb_page(vma, address);
>> +		__flush_tlb_page(vma, address, TLBF_NOBROADCAST);
> 
> Ultimately I think this previously did __tlbi(vale1) and now does __tlbi(vae1)
> Original call was to __local_flush_tlb_page_notify_nosync()

No not quite; the new code is still doing __tlbi(vale1).

The trick is that the __flush_tlb_page() wrapper unconditionally adds
TLBF_NOWALKCACHE to the flags. Since this API is operating on a *page* it is
implicit that we should only be evicting a leaf entry (as per the old
implementation).

You'll see I've also updated the documentation to make that clear in tlbflush.h.

Now that you have raised it, I can see how it might be confusing though, since
__flush_tlb_page() does not explicitly have TLBF_NOWALKCACHE. We could require
all __flush_tlb_page() callers to explicitly pass TLBF_NOWALKCACHE if you think
that helps? It would still be implicit for flush_tlb_page() (the generic kernel
API) though.

> 
> I'd like to see that sort of change called out and explained in the patch description.
> It's a broader scoped flush so not a bug, but still a functional change.

As I say, the emitted code is the same. It's my new API that's the problem here...

Thanks,
Ryan

> 
>>  	return 1;
>>  }
>>  
>
Re: [PATCH v2 12/13] arm64: mm: Wrap flush_tlb_page() around ___flush_tlb_range()
Posted by Jonathan Cameron 1 week, 5 days ago
On Tue, 27 Jan 2026 14:03:43 +0000
Ryan Roberts <ryan.roberts@arm.com> wrote:

> On 27/01/2026 12:59, Jonathan Cameron wrote:
> > On Mon, 19 Jan 2026 17:21:59 +0000
> > Ryan Roberts <ryan.roberts@arm.com> wrote:
> >   
> >> Flushing a page from the tlb is just a special case of flushing a range.
> >> So let's rework flush_tlb_page() so that it simply wraps
> >> ___flush_tlb_range(). While at it, let's also update the API to take the
> >> same flags that we use when flushing a range. This allows us to delete
> >> all the ugly "_nosync", "_local" and "_nonotify" variants.
> >>
> >> Thanks to constant folding, all of the complex looping and tlbi-by-range
> >> options get eliminated so that the generated code for flush_tlb_page()
> >> looks very similar to the previous version.
> >>
> >> Reviewed-by: Linu Cherian <linu.cherian@arm.com>
> >> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>  
> > 
> > So this does include the use of the
> > 
> > Case TLBF_NOBROADCAST from previous patch, but only whilst (I think)
> > slightly changing behavior.
> > 
> > Gah.  I'm regretting looking at this series. The original code is really hard to
> > read :)  Rather you than me to fix it!
> >   
> >>  static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> >>  {
> >>  	const unsigned long stride = PAGE_SIZE;
> >> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> >> index be9dab2c7d6a..f91aa686f142 100644
> >> --- a/arch/arm64/mm/fault.c
> >> +++ b/arch/arm64/mm/fault.c
> >> @@ -239,7 +239,7 @@ int __ptep_set_access_flags(struct vm_area_struct *vma,
> >>  	 * flush_tlb_fix_spurious_fault().
> >>  	 */
> >>  	if (dirty)
> >> -		local_flush_tlb_page(vma, address);
> >> +		__flush_tlb_page(vma, address, TLBF_NOBROADCAST);  
> > 
> > Ultimately I think this previously did __tlbi(vale1) and now does __tlbi(vae1)
> > Original call was to __local_flush_tlb_page_notify_nosync()  
> 
> No not quite; the new code is still doing __tlbi(vale1).
> 
> The trick is that the __flush_tlb_page() wrapper unconditionally adds
> TLBF_NOWALKCACHE to the flags. Since this API is operating on a *page* it is
> implicit that we should only be evicting a leaf entry (as per the old
> implementation).
> 
> You'll see I've also updated the documentation to make that clear in tlbflush.h.
> 
> Now that you have raised it, I can see how it might be confusing though, since
> __flush_tlb_page() does not explicitly have TLBF_NOWALKCACHE. We could require
> all __flush_tlb_page() callers to explicitly pass TLBF_NOWALKCACHE if you think
> that helps? It would still be implicit for flush_tlb_page() (the generic kernel
> API) though.

Ah. I'd indeed missed that tweaking of the flags.

Not sure. You probably have a better feel for this ABI than I do and the likely
expectations of users.

J

> 
> > 
> > I'd like to see that sort of change called out and explained in the patch description.
> > It's a broader scoped flush so not a bug, but still a functional change.  
> 
> As I say, the emitted code is the same. It's my new API that's the problem here...

> 
> Thanks,
> Ryan
> 
> >   
> >>  	return 1;
> >>  }
> >>    
> >   
> 
>