From: Haseeb Ashraf <haseeb.ashraf@siemens.com>
FEAT_nTLBPA (quoting definition) introduces a mechanism to identify
if the intermediate caching of translation table walks does not
include non-coherent caches of previous valid translation table
entries since the last completed TLBI applicable to the PE.
As there won't be any non-coherent caches since the last completed
TLBI, stage-1 TLBI won't be required while performing stage-2 TLBI.
This feature is optionally available in both arm32 and arm64.
Suggested-by: Mohamed Mediouni <mohamed@unpredictable.fr>
Signed-off-by: Haseeb Ashraf <haseeb.ashraf@siemens.com>
Changes in v3:
- This commit has no functional change in v3, only rebasing changes
due to updates in commit-1.
Changes in v2:
- This commit is implemented in v2 and is splitted from commit-1 in
v1. This is implemented by using CPU capability.
---
xen/arch/arm/cpufeature.c | 19 ++++++
xen/arch/arm/include/asm/arm32/flushtlb.h | 14 +++--
xen/arch/arm/include/asm/arm64/flushtlb.h | 77 ++++++++++++++++-------
xen/arch/arm/include/asm/cpufeature.h | 24 ++++++-
xen/arch/arm/include/asm/processor.h | 7 +++
5 files changed, 109 insertions(+), 32 deletions(-)
diff --git a/xen/arch/arm/cpufeature.c b/xen/arch/arm/cpufeature.c
index 1a80738571..9fa1c45869 100644
--- a/xen/arch/arm/cpufeature.c
+++ b/xen/arch/arm/cpufeature.c
@@ -17,7 +17,19 @@ DECLARE_BITMAP(cpu_hwcaps, ARM_NCAPS);
struct cpuinfo_arm __read_mostly domain_cpuinfo;
+#ifdef CONFIG_ARM_32
+static bool has_ntlbpa(const struct arm_cpu_capabilities *entry)
+{
+ return system_cpuinfo.mm32.ntlbpa == MM32_NTLBPA_SUPPORT_IMP;
+}
+#endif
+
#ifdef CONFIG_ARM_64
+static bool has_ntlbpa(const struct arm_cpu_capabilities *entry)
+{
+ return system_cpuinfo.mm64.ntlbpa == MM64_NTLBPA_SUPPORT_IMP;
+}
+
static bool has_sb_instruction(const struct arm_cpu_capabilities *entry)
{
return system_cpuinfo.isa64.sb;
@@ -25,6 +37,13 @@ static bool has_sb_instruction(const struct arm_cpu_capabilities *entry)
#endif
static const struct arm_cpu_capabilities arm_features[] = {
+#if defined(CONFIG_ARM_32) || defined(CONFIG_ARM_64)
+ {
+ .desc = "Intermediate caching of translation table walks (nTLBPA)",
+ .capability = ARM_HAS_NTLBPA,
+ .matches = has_ntlbpa,
+ },
+#endif
#ifdef CONFIG_ARM_64
{
.desc = "Speculation barrier instruction (SB)",
diff --git a/xen/arch/arm/include/asm/arm32/flushtlb.h b/xen/arch/arm/include/asm/arm32/flushtlb.h
index 3c0c2123d4..7cff042508 100644
--- a/xen/arch/arm/include/asm/arm32/flushtlb.h
+++ b/xen/arch/arm/include/asm/arm32/flushtlb.h
@@ -49,8 +49,8 @@ TLB_HELPER(flush_xen_tlb_local, TLBIALLH, nsh)
* Flush TLB of local processor. Use when flush for only stage-1 is intended.
*
* The following function should be used where intention is to clear only
- * stage-1 TLBs. This would be helpful in future in identifying which stage-1
- * TLB flushes can be skipped such as in present of FEAT_nTLBPA.
+ * stage-1 TLBs. This would be helpful in identifying which stage-1 TLB flushes
+ * can be skipped such as in present of FEAT_nTLBPA.
*/
static inline void flush_guest_tlb_s1_local(void)
{
@@ -60,7 +60,8 @@ static inline void flush_guest_tlb_s1_local(void)
*
* See ARMv8 (DDI 0487L.b): G5-11698 Table G5-23.
*/
- return flush_guest_tlb_local();
+ if ( !cpus_have_const_cap(ARM_HAS_NTLBPA) )
+ flush_guest_tlb_local();
}
/*
@@ -68,8 +69,8 @@ static inline void flush_guest_tlb_s1_local(void)
* stage-1 is intended.
*
* The following function should be used where intention is to clear only
- * stage-1 TLBs. This would be helpful in future in identifying which stage-1
- * TLB flushes can be skipped such as in present of FEAT_nTLBPA.
+ * stage-1 TLBs. This would be helpful in identifying which stage-1 TLB flushes
+ * can be skipped such as in present of FEAT_nTLBPA.
*/
static inline void flush_guest_tlb_s1(void)
{
@@ -79,7 +80,8 @@ static inline void flush_guest_tlb_s1(void)
*
* See ARMv8 (DDI 0487L.b): G5-11698 Table G5-23.
*/
- return flush_guest_tlb();
+ if ( !cpus_have_const_cap(ARM_HAS_NTLBPA) )
+ flush_guest_tlb();
}
/* Flush TLB of local processor for address va. */
diff --git a/xen/arch/arm/include/asm/arm64/flushtlb.h b/xen/arch/arm/include/asm/arm64/flushtlb.h
index 67ae616993..0f0d5050e5 100644
--- a/xen/arch/arm/include/asm/arm64/flushtlb.h
+++ b/xen/arch/arm/include/asm/arm64/flushtlb.h
@@ -47,6 +47,24 @@ static inline void name(void) \
: : : "memory"); \
}
+#define TLB_HELPER_NTLBPA(name, tlbop, sh) \
+static inline void name(void) \
+{ \
+ if ( !cpus_have_const_cap(ARM_HAS_NTLBPA) ) \
+ asm_inline volatile ( \
+ "dsb " # sh "st;" \
+ "tlbi " # tlbop ";" \
+ ALTERNATIVE( \
+ "nop; nop;", \
+ "dsb ish;" \
+ "tlbi " # tlbop ";", \
+ ARM64_WORKAROUND_REPEAT_TLBI, \
+ CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
+ "dsb " # sh ";" \
+ "isb;" \
+ : : : "memory"); \
+}
+
/*
* FLush TLB by VA. This will likely be used in a loop, so the caller
* is responsible to use the appropriate memory barriers before/after
@@ -75,10 +93,10 @@ TLB_HELPER(flush_guest_tlb_local, vmalls12e1, nsh)
TLB_HELPER(flush_guest_tlb, vmalls12e1is, ish)
/* Flush local TLBs, current VMID, stage-1 only */
-TLB_HELPER(flush_guest_tlb_s1_local, vmalle1, nsh)
+TLB_HELPER_NTLBPA(flush_guest_tlb_s1_local, vmalle1, nsh)
/* Flush innershareable TLBs, current VMID, stage-1 only */
-TLB_HELPER(flush_guest_tlb_s1, vmalle1is, ish)
+TLB_HELPER_NTLBPA(flush_guest_tlb_s1, vmalle1is, ish)
/* Flush local TLBs, all VMIDs, non-hypervisor mode */
TLB_HELPER(flush_all_guests_tlb_local, alle1, nsh)
@@ -104,8 +122,6 @@ TLB_HELPER_VA(__flush_xen_tlb_one, vae2is)
*/
static inline void flush_guest_tlb_range_ipa(paddr_t ipa, unsigned long size)
{
- paddr_t end;
-
/*
* If IPA range is too big (empirically found to be 256M), then fallback to
* full TLB flush.
@@ -113,27 +129,42 @@ static inline void flush_guest_tlb_range_ipa(paddr_t ipa, unsigned long size)
if ( size > SZ_256M )
return flush_guest_tlb();
- end = ipa + size;
-
- /*
- * See ARM ARM DDI 0487L.b D8.17.6.1 (Invalidating TLB entries from stage 2
- * translations) for details of TLBI sequence.
- */
- dsb(ishst); /* Ensure prior page-tables updates have completed */
- while ( ipa < end )
+ else if ( size > 0 )
{
- /* Flush stage-2 TLBs for ipa address */
- asm_inline volatile (
- "tlbi ipas2e1is, %0;" : : "r" (ipa >> PAGE_SHIFT) : "memory" );
- ipa += PAGE_SIZE;
+ paddr_t end = ipa + size;
+
+ /*
+ * See ARM ARM DDI 0487L.b D8.17.6.1 (Invalidating TLB entries from
+ * stage 2 translations) for details on TLBI sequence.
+ */
+ dsb(ishst); /* Ensure prior page-tables updates have completed */
+ while ( ipa < end )
+ {
+ /* Flush stage-2 TLBs for ipa address */
+ asm_inline volatile (
+ "tlbi ipas2e1is, %0;" : : "r" (ipa >> PAGE_SHIFT) : "memory" );
+ ipa += PAGE_SIZE;
+ }
+ if ( cpus_have_const_cap(ARM_HAS_NTLBPA) )
+ asm_inline volatile (
+ ALTERNATIVE(
+ "nop; nop;",
+ "dsb ish;"
+ "tlbi ipas2e1is, %0;",
+ ARM64_WORKAROUND_REPEAT_TLBI,
+ CONFIG_ARM64_WORKAROUND_REPEAT_TLBI)
+ "dsb ish;"
+ "isb;"
+ : : "r" ((ipa - PAGE_SIZE) >> PAGE_SHIFT) : "memory" );
+ else
+ /*
+ * As ARM64_WORKAROUND_REPEAT_TLBI is required to be applied to
+ * last TLBI of the sequence, it is only needed to be handled in
+ * the following invocation. Final dsb() and isb() are also applied
+ * in the following invocation.
+ */
+ flush_guest_tlb_s1();
}
- /*
- * As ARM64_WORKAROUND_REPEAT_TLBI is required to be applied to last TLBI
- * of the sequence, it is only needed to be handled in the following
- * invocation. Final dsb() and isb() are also applied in the following
- * invocation.
- */
- flush_guest_tlb_s1();
}
#endif /* __ASM_ARM_ARM64_FLUSHTLB_H__ */
diff --git a/xen/arch/arm/include/asm/cpufeature.h b/xen/arch/arm/include/asm/cpufeature.h
index 13353c8e1a..9f796ed4c1 100644
--- a/xen/arch/arm/include/asm/cpufeature.h
+++ b/xen/arch/arm/include/asm/cpufeature.h
@@ -76,8 +76,9 @@
#define ARM_WORKAROUND_BHB_SMCC_3 15
#define ARM_HAS_SB 16
#define ARM64_WORKAROUND_1508412 17
+#define ARM_HAS_NTLBPA 18
-#define ARM_NCAPS 18
+#define ARM_NCAPS 19
#ifndef __ASSEMBLER__
@@ -269,7 +270,8 @@ struct cpuinfo_arm {
unsigned long ets:4;
unsigned long __res1:4;
unsigned long afp:4;
- unsigned long __res2:12;
+ unsigned long ntlbpa:4;
+ unsigned long __res2:8;
unsigned long ecbhb:4;
/* MMFR2 */
@@ -430,8 +432,24 @@ struct cpuinfo_arm {
register_t bits[1];
} aux32;
- struct {
+ union {
register_t bits[6];
+ struct {
+ /* MMFR0 */
+ unsigned long __res0:32;
+ /* MMFR1 */
+ unsigned long __res1:32;
+ /* MMFR2 */
+ unsigned long __res2:32;
+ /* MMFR3 */
+ unsigned long __res3:32;
+ /* MMFR4 */
+ unsigned long __res4:32;
+ /* MMFR5 */
+ unsigned long __res5:4;
+ unsigned long ntlbpa:4;
+ unsigned long __res6:24;
+ };
} mm32;
struct {
diff --git a/xen/arch/arm/include/asm/processor.h b/xen/arch/arm/include/asm/processor.h
index 1a48c9ff3b..85f3b643a0 100644
--- a/xen/arch/arm/include/asm/processor.h
+++ b/xen/arch/arm/include/asm/processor.h
@@ -459,9 +459,16 @@
/* FSR long format */
#define FSRL_STATUS_DEBUG (_AC(0x22,UL)<<0)
+#ifdef CONFIG_ARM_32
+#define MM32_NTLBPA_SUPPORT_NI 0x0
+#define MM32_NTLBPA_SUPPORT_IMP 0x1
+#endif
+
#ifdef CONFIG_ARM_64
#define MM64_VMID_8_BITS_SUPPORT 0x0
#define MM64_VMID_16_BITS_SUPPORT 0x2
+#define MM64_NTLBPA_SUPPORT_NI 0x0
+#define MM64_NTLBPA_SUPPORT_IMP 0x1
#endif
#ifndef __ASSEMBLER__
--
2.43.0
© 2016 - 2025 Red Hat, Inc.