[PATCH 3/3] xen/arm32: add CPU capability for IPA-based TLBI

Haseeb Ashraf posted 3 patches 4 days, 5 hours ago
[PATCH 3/3] xen/arm32: add CPU capability for IPA-based TLBI
Posted by Haseeb Ashraf 4 days, 5 hours ago
From: Haseeb Ashraf <haseeb.ashraf@siemens.com>

This feature is available since armv8 and can be used to perform
IPA-based TLBI for arm32. XENMEM_remove_from_physmap performs this
invalidation in each hypercall so this code path will be optimized,
instead of performing a TLBIALL each time in presence of nTLBPA.

Suggested-by: Julien Grall <julien@xen.org>
Signed-off-by: Haseeb Ashraf <haseeb.ashraf@siemens.com>

Changes in v3:
- There are no functional changes in this version. There are minor
  code updates and comment updates as per the feedback on v2.
- The cpregs are defined in order as per Coprocessor-> CRn-> Opcode 1
  -> CRm-> Opcode 2.
- Added comment to explain why IPA-based TLBI is added only in
  presence of FEAT_nTLBPA.
- Replaced `goto default_tlbi` with if...else.
- Removed extra definitions of MM32_UNITLB_* macros which were not
  being used.

Changes in v2:
- This commit is implemented in v2 as per the feedback to implement
  IPA-based TLBI for Arm32 in addition to Arm64.
---
 xen/arch/arm/cpufeature.c                 | 12 +++++++
 xen/arch/arm/include/asm/arm32/flushtlb.h | 42 ++++++++++++++++++++---
 xen/arch/arm/include/asm/cpregs.h         |  4 +++
 xen/arch/arm/include/asm/cpufeature.h     | 15 ++++----
 xen/arch/arm/include/asm/processor.h      |  3 ++
 5 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/xen/arch/arm/cpufeature.c b/xen/arch/arm/cpufeature.c
index 9fa1c45869..d18c6449c6 100644
--- a/xen/arch/arm/cpufeature.c
+++ b/xen/arch/arm/cpufeature.c
@@ -18,6 +18,11 @@ DECLARE_BITMAP(cpu_hwcaps, ARM_NCAPS);
 struct cpuinfo_arm __read_mostly domain_cpuinfo;
 
 #ifdef CONFIG_ARM_32
+static bool has_tlb_ipa_instruction(const struct arm_cpu_capabilities *entry)
+{
+    return system_cpuinfo.mm32.unitlb == MM32_UNITLB_BY_IPA;
+}
+
 static bool has_ntlbpa(const struct arm_cpu_capabilities *entry)
 {
     return system_cpuinfo.mm32.ntlbpa == MM32_NTLBPA_SUPPORT_IMP;
@@ -37,6 +42,13 @@ static bool has_sb_instruction(const struct arm_cpu_capabilities *entry)
 #endif
 
 static const struct arm_cpu_capabilities arm_features[] = {
+#ifdef CONFIG_ARM_32
+    {
+        .desc = "IPA-based TLB Invalidation",
+        .capability = ARM32_HAS_TLB_IPA,
+        .matches = has_tlb_ipa_instruction,
+    },
+#endif
 #if defined(CONFIG_ARM_32) || defined(CONFIG_ARM_64)
     {
         .desc = "Intermediate caching of translation table walks (nTLBPA)",
diff --git a/xen/arch/arm/include/asm/arm32/flushtlb.h b/xen/arch/arm/include/asm/arm32/flushtlb.h
index 7cff042508..3e6f86f6d2 100644
--- a/xen/arch/arm/include/asm/arm32/flushtlb.h
+++ b/xen/arch/arm/include/asm/arm32/flushtlb.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_ARM_ARM32_FLUSHTLB_H__
 #define __ASM_ARM_ARM32_FLUSHTLB_H__
 
+#include <xen/sizes.h> /* For SZ_* macros. */
+
 /*
  * Every invalidation operation use the following patterns:
  *
@@ -104,12 +106,42 @@ static inline void flush_guest_tlb_range_ipa(paddr_t ipa,
                                              unsigned long size)
 {
     /*
-     * Following can invalidate both stage-1 and stage-2 TLBs depending upon
-     * the execution mode.
-     *
-     * See ARMv8 (DDI 0487L.b): G5-11698 Table G5-23.
+     * IPA-based TLBI is used only in presence of nTLBPA, otherwise, stage-1
+     * invalidation would still be required and there is no separate TLBI for
+     * stage-1 on Arm32. So in absence of nTLBPA, it is pointless to flush by
+     * IPA.
      */
-    flush_guest_tlb();
+    if ( cpus_have_const_cap(ARM_HAS_NTLBPA) &&
+         cpus_have_const_cap(ARM32_HAS_TLB_IPA) )
+    {
+        /*
+         * If IPA range is too big (empirically found to be 256M), then
+         * fallback to full TLB flush
+         */
+        if ( size > SZ_256M )
+            /*
+             * Following can invalidate both stage-1 and stage-2 TLBs depending
+             * upon the execution mode.
+             *
+             * See ARMv8 (DDI 0487L.b): G5-11698 Table G5-23.
+             */
+            flush_guest_tlb();
+        else
+        {
+            paddr_t end = ipa + size;
+
+            dsb(ishst); /* Ensure prior page-tables updates have completed */
+            while ( ipa < end )
+            {
+                /* Flush stage-2 TLBs for ipa address. */
+                asm volatile(STORE_CP32(0, TLBIIPAS2IS)
+                             : : "r" (ipa >> PAGE_SHIFT) : "memory");
+                ipa += PAGE_SIZE;
+            }
+            dsb(ish);
+            isb();
+        }
+    }
 }
 
 #endif /* __ASM_ARM_ARM32_FLUSHTLB_H__ */
diff --git a/xen/arch/arm/include/asm/cpregs.h b/xen/arch/arm/include/asm/cpregs.h
index a7503a190f..51f091dace 100644
--- a/xen/arch/arm/include/asm/cpregs.h
+++ b/xen/arch/arm/include/asm/cpregs.h
@@ -223,9 +223,13 @@
 #define TLBIMVA         p15,0,c8,c7,1   /* invalidate unified TLB entry by MVA */
 #define TLBIASID        p15,0,c8,c7,2   /* invalid unified TLB by ASID match */
 #define TLBIMVAA        p15,0,c8,c7,3   /* invalidate unified TLB entries by MVA all ASID */
+#define TLBIIPAS2IS     p15,4,c8,c0,1   /* Invalidate unified TLB entry for stage 2 by IPA inner shareable */
+#define TLBIIPAS2LIS    p15,4,c8,c0,5   /* Invalidate unified TLB entry for stage 2 last level by IPA inner shareable */
 #define TLBIALLHIS      p15,4,c8,c3,0   /* Invalidate Entire Hyp. Unified TLB inner shareable */
 #define TLBIMVAHIS      p15,4,c8,c3,1   /* Invalidate Unified Hyp. TLB by MVA inner shareable */
 #define TLBIALLNSNHIS   p15,4,c8,c3,4   /* Invalidate Entire Non-Secure Non-Hyp. Unified TLB inner shareable */
+#define TLBIIPAS2       p15,4,c8,c4,1   /* Invalidate unified TLB entry for stage 2 by IPA */
+#define TLBIIPAS2L      p15,4,c8,c4,5   /* Invalidate unified TLB entry for stage 2 last level by IPA */
 #define TLBIALLH        p15,4,c8,c7,0   /* Invalidate Entire Hyp. Unified TLB */
 #define TLBIMVAH        p15,4,c8,c7,1   /* Invalidate Unified Hyp. TLB by MVA */
 #define TLBIALLNSNH     p15,4,c8,c7,4   /* Invalidate Entire Non-Secure Non-Hyp. Unified TLB */
diff --git a/xen/arch/arm/include/asm/cpufeature.h b/xen/arch/arm/include/asm/cpufeature.h
index 9f796ed4c1..07f1d770b3 100644
--- a/xen/arch/arm/include/asm/cpufeature.h
+++ b/xen/arch/arm/include/asm/cpufeature.h
@@ -77,8 +77,9 @@
 #define ARM_HAS_SB 16
 #define ARM64_WORKAROUND_1508412 17
 #define ARM_HAS_NTLBPA 18
+#define ARM32_HAS_TLB_IPA 19
 
-#define ARM_NCAPS           19
+#define ARM_NCAPS           20
 
 #ifndef __ASSEMBLER__
 
@@ -440,15 +441,17 @@ struct cpuinfo_arm {
             /* MMFR1 */
             unsigned long __res1:32;
             /* MMFR2 */
-            unsigned long __res2:32;
+            unsigned long __res2:16;
+            unsigned long unitlb:4;
+            unsigned long __res3:12;
             /* MMFR3 */
-            unsigned long __res3:32;
-            /* MMFR4 */
             unsigned long __res4:32;
+            /* MMFR4 */
+            unsigned long __res5:32;
             /* MMFR5 */
-            unsigned long __res5:4;
+            unsigned long __res6:4;
             unsigned long ntlbpa:4;
-            unsigned long __res6:24;
+            unsigned long __res7:24;
         };
     } mm32;
 
diff --git a/xen/arch/arm/include/asm/processor.h b/xen/arch/arm/include/asm/processor.h
index 85f3b643a0..eda39566e1 100644
--- a/xen/arch/arm/include/asm/processor.h
+++ b/xen/arch/arm/include/asm/processor.h
@@ -460,6 +460,9 @@
 #define FSRL_STATUS_DEBUG       (_AC(0x22,UL)<<0)
 
 #ifdef CONFIG_ARM_32
+#define MM32_UNITLB_NI              0x0
+#define MM32_UNITLB_BY_IPA          0x6
+
 #define MM32_NTLBPA_SUPPORT_NI      0x0
 #define MM32_NTLBPA_SUPPORT_IMP     0x1
 #endif
-- 
2.43.0