[RESEND PATCH v5 05/11] arm64: tlb: Refactor the core flush algorithm of __flush_tlb_range

Raghavendra Rao Ananta posted 11 patches 2 years, 7 months ago
There is a newer version of this series
[RESEND PATCH v5 05/11] arm64: tlb: Refactor the core flush algorithm of __flush_tlb_range
Posted by Raghavendra Rao Ananta 2 years, 7 months ago
Currently, the core TLB flush functionality of __flush_tlb_range()
hardcodes vae1is (and variants) for the flush operation. In the
upcoming patches, the KVM code reuses this core algorithm with
ipas2e1is for range based TLB invalidations based on the IPA.
Hence, extract the core flush functionality of __flush_tlb_range()
into its own macro that accepts an 'op' argument to pass any
TLBI operation, such that other callers (KVM) can benefit.

No functional changes intended.

Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/tlbflush.h | 108 +++++++++++++++---------------
 1 file changed, 55 insertions(+), 53 deletions(-)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 412a3b9a3c25d..4775378b6da1b 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -278,14 +278,61 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
  */
 #define MAX_TLBI_OPS	PTRS_PER_PTE
 
+/* When the CPU does not support TLB range operations, flush the TLB
+ * entries one by one at the granularity of 'stride'. If the TLB
+ * range ops are supported, then:
+ *
+ * 1. If 'pages' is odd, flush the first page through non-range
+ *    operations;
+ *
+ * 2. For remaining pages: the minimum range granularity is decided
+ *    by 'scale', so multiple range TLBI operations may be required.
+ *    Start from scale = 0, flush the corresponding number of pages
+ *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
+ *    until no pages left.
+ *
+ * Note that certain ranges can be represented by either num = 31 and
+ * scale or num = 0 and scale + 1. The loop below favours the latter
+ * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+ */
+#define __flush_tlb_range_op(op, start, pages, stride,			\
+				asid, tlb_level, tlbi_user) do {	\
+	int num = 0;							\
+	int scale = 0;							\
+	unsigned long addr;						\
+									\
+	while (pages > 0) {						\
+		if (!system_supports_tlb_range() ||			\
+		    pages % 2 == 1) {					\
+			addr = __TLBI_VADDR(start, asid);		\
+			__tlbi_level(op, addr, tlb_level);		\
+			if (tlbi_user)					\
+				__tlbi_user_level(op, addr, tlb_level);	\
+			start += stride;				\
+			pages -= stride >> PAGE_SHIFT;			\
+			continue;					\
+		}							\
+									\
+		num = __TLBI_RANGE_NUM(pages, scale);			\
+		if (num >= 0) {						\
+			addr = __TLBI_VADDR_RANGE(start, asid, scale,	\
+						  num, tlb_level);	\
+			__tlbi(r##op, addr);				\
+			if (tlbi_user)					\
+				__tlbi_user(r##op, addr);		\
+			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
+			pages -= __TLBI_RANGE_PAGES(num, scale);	\
+		}							\
+		scale++;						\
+	}								\
+} while (0)
+
 static inline void __flush_tlb_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end,
 				     unsigned long stride, bool last_level,
 				     int tlb_level)
 {
-	int num = 0;
-	int scale = 0;
-	unsigned long asid, addr, pages;
+	unsigned long asid, pages;
 
 	start = round_down(start, stride);
 	end = round_up(end, stride);
@@ -307,56 +354,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 	dsb(ishst);
 	asid = ASID(vma->vm_mm);
 
-	/*
-	 * When the CPU does not support TLB range operations, flush the TLB
-	 * entries one by one at the granularity of 'stride'. If the TLB
-	 * range ops are supported, then:
-	 *
-	 * 1. If 'pages' is odd, flush the first page through non-range
-	 *    operations;
-	 *
-	 * 2. For remaining pages: the minimum range granularity is decided
-	 *    by 'scale', so multiple range TLBI operations may be required.
-	 *    Start from scale = 0, flush the corresponding number of pages
-	 *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
-	 *    until no pages left.
-	 *
-	 * Note that certain ranges can be represented by either num = 31 and
-	 * scale or num = 0 and scale + 1. The loop below favours the latter
-	 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
-	 */
-	while (pages > 0) {
-		if (!system_supports_tlb_range() ||
-		    pages % 2 == 1) {
-			addr = __TLBI_VADDR(start, asid);
-			if (last_level) {
-				__tlbi_level(vale1is, addr, tlb_level);
-				__tlbi_user_level(vale1is, addr, tlb_level);
-			} else {
-				__tlbi_level(vae1is, addr, tlb_level);
-				__tlbi_user_level(vae1is, addr, tlb_level);
-			}
-			start += stride;
-			pages -= stride >> PAGE_SHIFT;
-			continue;
-		}
-
-		num = __TLBI_RANGE_NUM(pages, scale);
-		if (num >= 0) {
-			addr = __TLBI_VADDR_RANGE(start, asid, scale,
-						  num, tlb_level);
-			if (last_level) {
-				__tlbi(rvale1is, addr);
-				__tlbi_user(rvale1is, addr);
-			} else {
-				__tlbi(rvae1is, addr);
-				__tlbi_user(rvae1is, addr);
-			}
-			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
-			pages -= __TLBI_RANGE_PAGES(num, scale);
-		}
-		scale++;
-	}
+	if (last_level)
+		__flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
+	else
+		__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
+
 	dsb(ish);
 }
 
-- 
2.41.0.162.gfafddb0af9-goog
Re: [RESEND PATCH v5 05/11] arm64: tlb: Refactor the core flush algorithm of __flush_tlb_range
Posted by Gavin Shan 2 years, 7 months ago
On 6/22/23 03:49, Raghavendra Rao Ananta wrote:
> Currently, the core TLB flush functionality of __flush_tlb_range()
> hardcodes vae1is (and variants) for the flush operation. In the
> upcoming patches, the KVM code reuses this core algorithm with
> ipas2e1is for range based TLB invalidations based on the IPA.
> Hence, extract the core flush functionality of __flush_tlb_range()
> into its own macro that accepts an 'op' argument to pass any
> TLBI operation, such that other callers (KVM) can benefit.
> 
> No functional changes intended.
> 
> Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> ---
>   arch/arm64/include/asm/tlbflush.h | 108 +++++++++++++++---------------
>   1 file changed, 55 insertions(+), 53 deletions(-)
> 

With the following nits addressed:

Reviewed-by: Gavin Shan <gshan@redhat.com>

> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> index 412a3b9a3c25d..4775378b6da1b 100644
> --- a/arch/arm64/include/asm/tlbflush.h
> +++ b/arch/arm64/include/asm/tlbflush.h
> @@ -278,14 +278,61 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
>    */
>   #define MAX_TLBI_OPS	PTRS_PER_PTE
>   
> +/* When the CPU does not support TLB range operations, flush the TLB
> + * entries one by one at the granularity of 'stride'. If the TLB
> + * range ops are supported, then:
> + *
> + * 1. If 'pages' is odd, flush the first page through non-range
> + *    operations;
> + *
> + * 2. For remaining pages: the minimum range granularity is decided
> + *    by 'scale', so multiple range TLBI operations may be required.
> + *    Start from scale = 0, flush the corresponding number of pages
> + *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
> + *    until no pages left.
> + *
> + * Note that certain ranges can be represented by either num = 31 and
> + * scale or num = 0 and scale + 1. The loop below favours the latter
> + * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
> + */
> +#define __flush_tlb_range_op(op, start, pages, stride,			\
> +				asid, tlb_level, tlbi_user) do {	\
> +	int num = 0;							\
> +	int scale = 0;							\
> +	unsigned long addr;						\
> +									\
> +	while (pages > 0) {						\
> +		if (!system_supports_tlb_range() ||			\
> +		    pages % 2 == 1) {					\
> +			addr = __TLBI_VADDR(start, asid);		\
> +			__tlbi_level(op, addr, tlb_level);		\
> +			if (tlbi_user)					\
> +				__tlbi_user_level(op, addr, tlb_level);	\
> +			start += stride;				\
> +			pages -= stride >> PAGE_SHIFT;			\
> +			continue;					\
> +		}							\
> +									\
> +		num = __TLBI_RANGE_NUM(pages, scale);			\
> +		if (num >= 0) {						\
> +			addr = __TLBI_VADDR_RANGE(start, asid, scale,	\
> +						  num, tlb_level);	\
> +			__tlbi(r##op, addr);				\
> +			if (tlbi_user)					\
> +				__tlbi_user(r##op, addr);		\
> +			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
> +			pages -= __TLBI_RANGE_PAGES(num, scale);	\
> +		}							\
> +		scale++;						\
> +	}								\
> +} while (0)
> +

There is a warning reported from 'checkpatch.pl'.

     WARNING: suspect code indent for conditional statements (32, 8)
     #52: FILE: arch/arm64/include/asm/tlbflush.h:299:
     +				asid, tlb_level, tlbi_user) do {	\
     [...]
     +	unsigned long addr;						\

     total: 0 errors, 1 warnings, 125 lines checked

You probably need to tweak it as below, to avoid the warning.

     #define __flush_tlb_range_op(op, start, pages, stride,                \
                                  asid, tlb_level, tlbi_user)              \
     do {                                                                  \


>   static inline void __flush_tlb_range(struct vm_area_struct *vma,
>   				     unsigned long start, unsigned long end,
>   				     unsigned long stride, bool last_level,
>   				     int tlb_level)
>   {
> -	int num = 0;
> -	int scale = 0;
> -	unsigned long asid, addr, pages;
> +	unsigned long asid, pages;
>   
>   	start = round_down(start, stride);
>   	end = round_up(end, stride);
> @@ -307,56 +354,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
>   	dsb(ishst);
>   	asid = ASID(vma->vm_mm);
>   
> -	/*
> -	 * When the CPU does not support TLB range operations, flush the TLB
> -	 * entries one by one at the granularity of 'stride'. If the TLB
> -	 * range ops are supported, then:
> -	 *
> -	 * 1. If 'pages' is odd, flush the first page through non-range
> -	 *    operations;
> -	 *
> -	 * 2. For remaining pages: the minimum range granularity is decided
> -	 *    by 'scale', so multiple range TLBI operations may be required.
> -	 *    Start from scale = 0, flush the corresponding number of pages
> -	 *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
> -	 *    until no pages left.
> -	 *
> -	 * Note that certain ranges can be represented by either num = 31 and
> -	 * scale or num = 0 and scale + 1. The loop below favours the latter
> -	 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
> -	 */
> -	while (pages > 0) {
> -		if (!system_supports_tlb_range() ||
> -		    pages % 2 == 1) {
> -			addr = __TLBI_VADDR(start, asid);
> -			if (last_level) {
> -				__tlbi_level(vale1is, addr, tlb_level);
> -				__tlbi_user_level(vale1is, addr, tlb_level);
> -			} else {
> -				__tlbi_level(vae1is, addr, tlb_level);
> -				__tlbi_user_level(vae1is, addr, tlb_level);
> -			}
> -			start += stride;
> -			pages -= stride >> PAGE_SHIFT;
> -			continue;
> -		}
> -
> -		num = __TLBI_RANGE_NUM(pages, scale);
> -		if (num >= 0) {
> -			addr = __TLBI_VADDR_RANGE(start, asid, scale,
> -						  num, tlb_level);
> -			if (last_level) {
> -				__tlbi(rvale1is, addr);
> -				__tlbi_user(rvale1is, addr);
> -			} else {
> -				__tlbi(rvae1is, addr);
> -				__tlbi_user(rvae1is, addr);
> -			}
> -			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
> -			pages -= __TLBI_RANGE_PAGES(num, scale);
> -		}
> -		scale++;
> -	}
> +	if (last_level)
> +		__flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
> +	else
> +		__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
> +
>   	dsb(ish);
>   }
>   

Thanks,
Gavin
Re: [RESEND PATCH v5 05/11] arm64: tlb: Refactor the core flush algorithm of __flush_tlb_range
Posted by Raghavendra Rao Ananta 2 years, 7 months ago
On Tue, Jul 4, 2023 at 5:11 PM Gavin Shan <gshan@redhat.com> wrote:
>
> On 6/22/23 03:49, Raghavendra Rao Ananta wrote:
> > Currently, the core TLB flush functionality of __flush_tlb_range()
> > hardcodes vae1is (and variants) for the flush operation. In the
> > upcoming patches, the KVM code reuses this core algorithm with
> > ipas2e1is for range based TLB invalidations based on the IPA.
> > Hence, extract the core flush functionality of __flush_tlb_range()
> > into its own macro that accepts an 'op' argument to pass any
> > TLBI operation, such that other callers (KVM) can benefit.
> >
> > No functional changes intended.
> >
> > Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
> > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> > ---
> >   arch/arm64/include/asm/tlbflush.h | 108 +++++++++++++++---------------
> >   1 file changed, 55 insertions(+), 53 deletions(-)
> >
>
> With the following nits addressed:
>
> Reviewed-by: Gavin Shan <gshan@redhat.com>
>
> > diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> > index 412a3b9a3c25d..4775378b6da1b 100644
> > --- a/arch/arm64/include/asm/tlbflush.h
> > +++ b/arch/arm64/include/asm/tlbflush.h
> > @@ -278,14 +278,61 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
> >    */
> >   #define MAX_TLBI_OPS        PTRS_PER_PTE
> >
> > +/* When the CPU does not support TLB range operations, flush the TLB
> > + * entries one by one at the granularity of 'stride'. If the TLB
> > + * range ops are supported, then:
> > + *
> > + * 1. If 'pages' is odd, flush the first page through non-range
> > + *    operations;
> > + *
> > + * 2. For remaining pages: the minimum range granularity is decided
> > + *    by 'scale', so multiple range TLBI operations may be required.
> > + *    Start from scale = 0, flush the corresponding number of pages
> > + *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
> > + *    until no pages left.
> > + *
> > + * Note that certain ranges can be represented by either num = 31 and
> > + * scale or num = 0 and scale + 1. The loop below favours the latter
> > + * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
> > + */
> > +#define __flush_tlb_range_op(op, start, pages, stride,                       \
> > +                             asid, tlb_level, tlbi_user) do {        \
> > +     int num = 0;                                                    \
> > +     int scale = 0;                                                  \
> > +     unsigned long addr;                                             \
> > +                                                                     \
> > +     while (pages > 0) {                                             \
> > +             if (!system_supports_tlb_range() ||                     \
> > +                 pages % 2 == 1) {                                   \
> > +                     addr = __TLBI_VADDR(start, asid);               \
> > +                     __tlbi_level(op, addr, tlb_level);              \
> > +                     if (tlbi_user)                                  \
> > +                             __tlbi_user_level(op, addr, tlb_level); \
> > +                     start += stride;                                \
> > +                     pages -= stride >> PAGE_SHIFT;                  \
> > +                     continue;                                       \
> > +             }                                                       \
> > +                                                                     \
> > +             num = __TLBI_RANGE_NUM(pages, scale);                   \
> > +             if (num >= 0) {                                         \
> > +                     addr = __TLBI_VADDR_RANGE(start, asid, scale,   \
> > +                                               num, tlb_level);      \
> > +                     __tlbi(r##op, addr);                            \
> > +                     if (tlbi_user)                                  \
> > +                             __tlbi_user(r##op, addr);               \
> > +                     start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
> > +                     pages -= __TLBI_RANGE_PAGES(num, scale);        \
> > +             }                                                       \
> > +             scale++;                                                \
> > +     }                                                               \
> > +} while (0)
> > +
>
> There is a warning reported from 'checkpatch.pl'.
>
>      WARNING: suspect code indent for conditional statements (32, 8)
>      #52: FILE: arch/arm64/include/asm/tlbflush.h:299:
>      +                          asid, tlb_level, tlbi_user) do {        \
>      [...]
>      +  unsigned long addr;                                             \
>
>      total: 0 errors, 1 warnings, 125 lines checked
>
> You probably need to tweak it as below, to avoid the warning.
>
>      #define __flush_tlb_range_op(op, start, pages, stride,                \
>                                   asid, tlb_level, tlbi_user)              \
>      do {                                                                  \
>
Thanks for the suggestion, Gavin. I'll fix this.

- Raghavendra
>
> >   static inline void __flush_tlb_range(struct vm_area_struct *vma,
> >                                    unsigned long start, unsigned long end,
> >                                    unsigned long stride, bool last_level,
> >                                    int tlb_level)
> >   {
> > -     int num = 0;
> > -     int scale = 0;
> > -     unsigned long asid, addr, pages;
> > +     unsigned long asid, pages;
> >
> >       start = round_down(start, stride);
> >       end = round_up(end, stride);
> > @@ -307,56 +354,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
> >       dsb(ishst);
> >       asid = ASID(vma->vm_mm);
> >
> > -     /*
> > -      * When the CPU does not support TLB range operations, flush the TLB
> > -      * entries one by one at the granularity of 'stride'. If the TLB
> > -      * range ops are supported, then:
> > -      *
> > -      * 1. If 'pages' is odd, flush the first page through non-range
> > -      *    operations;
> > -      *
> > -      * 2. For remaining pages: the minimum range granularity is decided
> > -      *    by 'scale', so multiple range TLBI operations may be required.
> > -      *    Start from scale = 0, flush the corresponding number of pages
> > -      *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
> > -      *    until no pages left.
> > -      *
> > -      * Note that certain ranges can be represented by either num = 31 and
> > -      * scale or num = 0 and scale + 1. The loop below favours the latter
> > -      * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
> > -      */
> > -     while (pages > 0) {
> > -             if (!system_supports_tlb_range() ||
> > -                 pages % 2 == 1) {
> > -                     addr = __TLBI_VADDR(start, asid);
> > -                     if (last_level) {
> > -                             __tlbi_level(vale1is, addr, tlb_level);
> > -                             __tlbi_user_level(vale1is, addr, tlb_level);
> > -                     } else {
> > -                             __tlbi_level(vae1is, addr, tlb_level);
> > -                             __tlbi_user_level(vae1is, addr, tlb_level);
> > -                     }
> > -                     start += stride;
> > -                     pages -= stride >> PAGE_SHIFT;
> > -                     continue;
> > -             }
> > -
> > -             num = __TLBI_RANGE_NUM(pages, scale);
> > -             if (num >= 0) {
> > -                     addr = __TLBI_VADDR_RANGE(start, asid, scale,
> > -                                               num, tlb_level);
> > -                     if (last_level) {
> > -                             __tlbi(rvale1is, addr);
> > -                             __tlbi_user(rvale1is, addr);
> > -                     } else {
> > -                             __tlbi(rvae1is, addr);
> > -                             __tlbi_user(rvae1is, addr);
> > -                     }
> > -                     start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
> > -                     pages -= __TLBI_RANGE_PAGES(num, scale);
> > -             }
> > -             scale++;
> > -     }
> > +     if (last_level)
> > +             __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
> > +     else
> > +             __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
> > +
> >       dsb(ish);
> >   }
> >
>
> Thanks,
> Gavin
>