[PATCH] xen/arm: Add workaround for Cortex-A76/Neoverse-N1 erratum #1286807

Michal Orzel posted 1 patch 3 years, 5 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/xen tags/patchew/20201116072422.17400-1-michal.orzel@arm.com
Maintainers: Stefano Stabellini <sstabellini@kernel.org>, Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>, Julien Grall <julien@xen.org>
There is a newer version of this series
docs/misc/arm/silicon-errata.txt     |  2 ++
xen/arch/arm/Kconfig                 | 18 +++++++++++++++++
xen/arch/arm/cpuerrata.c             | 14 ++++++++++++++
xen/include/asm-arm/arm64/flushtlb.h | 29 +++++++++++++++++++---------
xen/include/asm-arm/cpufeature.h     |  3 ++-
5 files changed, 56 insertions(+), 10 deletions(-)
[PATCH] xen/arm: Add workaround for Cortex-A76/Neoverse-N1 erratum #1286807
Posted by Michal Orzel 3 years, 5 months ago
On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0),
if a virtual address for a cacheable mapping of a location is being
accessed by a core while another core is remapping the virtual
address to a new physical page using the recommended break-before-make
sequence, then under very rare circumstances TLBI+DSB completes before
a read using the translation being invalidated has been observed by
other observers. The workaround repeats the TLBI+DSB operation.

Signed-off-by: Michal Orzel <michal.orzel@arm.com>
---
 docs/misc/arm/silicon-errata.txt     |  2 ++
 xen/arch/arm/Kconfig                 | 18 +++++++++++++++++
 xen/arch/arm/cpuerrata.c             | 14 ++++++++++++++
 xen/include/asm-arm/arm64/flushtlb.h | 29 +++++++++++++++++++---------
 xen/include/asm-arm/cpufeature.h     |  3 ++-
 5 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
index 552c4151d3..d183ba543f 100644
--- a/docs/misc/arm/silicon-errata.txt
+++ b/docs/misc/arm/silicon-errata.txt
@@ -53,5 +53,7 @@ stable hypervisors.
 | ARM            | Cortex-A72      | #853709         | N/A                     |
 | ARM            | Cortex-A73      | #858921         | ARM_ERRATUM_858921      |
 | ARM            | Cortex-A76      | #1165522        | N/A                     |
+| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807   |
 | ARM            | Neoverse-N1     | #1165522        | N/A
+| ARM            | Neoverse-N1     | #1286807        | ARM64_ERRATUM_1286807   |
 | ARM            | MMU-500         | #842869         | N/A                     |
diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index f938dd21bd..5d6d906d72 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -244,6 +244,24 @@ config ARM_ERRATUM_858921
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_1286807
+	bool "Cortex-A76/Neoverse-N1: 1286807: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
+	default y
+	depends on ARM_64
+	help
+	  This option adds a workaround for ARM Cortex-A76/Neoverse-N1 erratum 1286807.
+
+	  On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0), if a virtual
+	  address for a cacheable mapping of a location is being
+	  accessed by a core while another core is remapping the virtual
+	  address to a new physical page using the recommended
+	  break-before-make sequence, then under very rare circumstances
+	  TLBI+DSB completes before a read using the translation being
+	  invalidated has been observed by other observers. The
+	  workaround repeats the TLBI+DSB operation.
+
+	  If unsure, say Y.
+
 endmenu
 
 config ARM64_HARDEN_BRANCH_PREDICTOR
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index 567911d380..cb4795beec 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -424,6 +424,20 @@ static const struct arm_cpu_capabilities arm_errata[] = {
                    (1 << MIDR_VARIANT_SHIFT) | 2),
     },
 #endif
+#ifdef CONFIG_ARM64_ERRATUM_1286807
+    {
+        /* Cortex-A76 r0p0 - r3p0 */
+        .desc = "ARM erratum 1286807",
+        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
+        MIDR_RANGE(MIDR_CORTEX_A76, 0, 3 << MIDR_VARIANT_SHIFT),
+    },
+    {
+        /* Neoverse-N1 r0p0 - r3p0 */
+        .desc = "ARM erratum 1286807",
+        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
+        MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 3 << MIDR_VARIANT_SHIFT),
+    },
+#endif
 #ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
     {
         .capability = ARM_HARDEN_BRANCH_PREDICTOR,
diff --git a/xen/include/asm-arm/arm64/flushtlb.h b/xen/include/asm-arm/arm64/flushtlb.h
index ceec59542e..6726362211 100644
--- a/xen/include/asm-arm/arm64/flushtlb.h
+++ b/xen/include/asm-arm/arm64/flushtlb.h
@@ -9,6 +9,11 @@
  * DSB ISH          // Ensure the TLB invalidation has completed
  * ISB              // See explanation below
  *
+ * ARM64_WORKAROUND_REPEAT_TLBI:
+ * Modification of the translation table for a virtual address might lead to
+ * read-after-read ordering violation.
+ * The workaround repeats TLBI+DSB operation.
+ *
  * For Xen page-tables the ISB will discard any instructions fetched
  * from the old mappings.
  *
@@ -16,15 +21,21 @@
  * (and therefore the TLB invalidation) before continuing. So we know
  * the TLBs cannot contain an entry for a mapping we may have removed.
  */
-#define TLB_HELPER(name, tlbop) \
-static inline void name(void)   \
-{                               \
-    asm volatile(               \
-        "dsb  ishst;"           \
-        "tlbi "  # tlbop  ";"   \
-        "dsb  ish;"             \
-        "isb;"                  \
-        : : : "memory");        \
+#define TLB_HELPER(name, tlbop)       \
+static inline void name(void)         \
+{                                     \
+    asm volatile(                     \
+        "dsb  ishst;"                 \
+        "tlbi "  # tlbop  ";"         \
+        ALTERNATIVE(                  \
+        "nop; nop;",                  \
+        "dsb  ish;"                   \
+        "tlbi "  # tlbop  ";",        \
+        ARM64_WORKAROUND_REPEAT_TLBI, \
+        CONFIG_ARM64_ERRATUM_1286807) \
+        "dsb  ish;"                   \
+        "isb;"                        \
+        : : : "memory");              \
 }
 
 /* Flush local TLBs, current VMID only. */
diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
index 016a9fe203..c7b5052992 100644
--- a/xen/include/asm-arm/cpufeature.h
+++ b/xen/include/asm-arm/cpufeature.h
@@ -46,8 +46,9 @@
 #define ARM_SMCCC_1_1 8
 #define ARM64_WORKAROUND_AT_SPECULATE 9
 #define ARM_WORKAROUND_858921 10
+#define ARM64_WORKAROUND_REPEAT_TLBI 11
 
-#define ARM_NCAPS           11
+#define ARM_NCAPS           12
 
 #ifndef __ASSEMBLY__
 
-- 
2.28.0


Re: [PATCH] xen/arm: Add workaround for Cortex-A76/Neoverse-N1 erratum #1286807
Posted by Bertrand Marquis 3 years, 5 months ago
Hi,

> On 16 Nov 2020, at 07:24, Michal Orzel <Michal.Orzel@arm.com> wrote:
> 
> On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0),
> if a virtual address for a cacheable mapping of a location is being
> accessed by a core while another core is remapping the virtual
> address to a new physical page using the recommended break-before-make
> sequence, then under very rare circumstances TLBI+DSB completes before
> a read using the translation being invalidated has been observed by
> other observers. The workaround repeats the TLBI+DSB operation.
> 
> Signed-off-by: Michal Orzel <michal.orzel@arm.com>
Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com>

Cheers
Bertrand

> ---
> docs/misc/arm/silicon-errata.txt     |  2 ++
> xen/arch/arm/Kconfig                 | 18 +++++++++++++++++
> xen/arch/arm/cpuerrata.c             | 14 ++++++++++++++
> xen/include/asm-arm/arm64/flushtlb.h | 29 +++++++++++++++++++---------
> xen/include/asm-arm/cpufeature.h     |  3 ++-
> 5 files changed, 56 insertions(+), 10 deletions(-)
> 
> diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
> index 552c4151d3..d183ba543f 100644
> --- a/docs/misc/arm/silicon-errata.txt
> +++ b/docs/misc/arm/silicon-errata.txt
> @@ -53,5 +53,7 @@ stable hypervisors.
> | ARM            | Cortex-A72      | #853709         | N/A                     |
> | ARM            | Cortex-A73      | #858921         | ARM_ERRATUM_858921      |
> | ARM            | Cortex-A76      | #1165522        | N/A                     |
> +| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807   |
> | ARM            | Neoverse-N1     | #1165522        | N/A
> +| ARM            | Neoverse-N1     | #1286807        | ARM64_ERRATUM_1286807   |
> | ARM            | MMU-500         | #842869         | N/A                     |
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index f938dd21bd..5d6d906d72 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -244,6 +244,24 @@ config ARM_ERRATUM_858921
> 
> 	  If unsure, say Y.
> 
> +config ARM64_ERRATUM_1286807
> +	bool "Cortex-A76/Neoverse-N1: 1286807: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
> +	default y
> +	depends on ARM_64
> +	help
> +	  This option adds a workaround for ARM Cortex-A76/Neoverse-N1 erratum 1286807.
> +
> +	  On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0), if a virtual
> +	  address for a cacheable mapping of a location is being
> +	  accessed by a core while another core is remapping the virtual
> +	  address to a new physical page using the recommended
> +	  break-before-make sequence, then under very rare circumstances
> +	  TLBI+DSB completes before a read using the translation being
> +	  invalidated has been observed by other observers. The
> +	  workaround repeats the TLBI+DSB operation.
> +
> +	  If unsure, say Y.
> +
> endmenu
> 
> config ARM64_HARDEN_BRANCH_PREDICTOR
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index 567911d380..cb4795beec 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -424,6 +424,20 @@ static const struct arm_cpu_capabilities arm_errata[] = {
>                    (1 << MIDR_VARIANT_SHIFT) | 2),
>     },
> #endif
> +#ifdef CONFIG_ARM64_ERRATUM_1286807
> +    {
> +        /* Cortex-A76 r0p0 - r3p0 */
> +        .desc = "ARM erratum 1286807",
> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
> +        MIDR_RANGE(MIDR_CORTEX_A76, 0, 3 << MIDR_VARIANT_SHIFT),
> +    },
> +    {
> +        /* Neoverse-N1 r0p0 - r3p0 */
> +        .desc = "ARM erratum 1286807",
> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
> +        MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 3 << MIDR_VARIANT_SHIFT),
> +    },
> +#endif
> #ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
>     {
>         .capability = ARM_HARDEN_BRANCH_PREDICTOR,
> diff --git a/xen/include/asm-arm/arm64/flushtlb.h b/xen/include/asm-arm/arm64/flushtlb.h
> index ceec59542e..6726362211 100644
> --- a/xen/include/asm-arm/arm64/flushtlb.h
> +++ b/xen/include/asm-arm/arm64/flushtlb.h
> @@ -9,6 +9,11 @@
>  * DSB ISH          // Ensure the TLB invalidation has completed
>  * ISB              // See explanation below
>  *
> + * ARM64_WORKAROUND_REPEAT_TLBI:
> + * Modification of the translation table for a virtual address might lead to
> + * read-after-read ordering violation.
> + * The workaround repeats TLBI+DSB operation.
> + *
>  * For Xen page-tables the ISB will discard any instructions fetched
>  * from the old mappings.
>  *
> @@ -16,15 +21,21 @@
>  * (and therefore the TLB invalidation) before continuing. So we know
>  * the TLBs cannot contain an entry for a mapping we may have removed.
>  */
> -#define TLB_HELPER(name, tlbop) \
> -static inline void name(void)   \
> -{                               \
> -    asm volatile(               \
> -        "dsb  ishst;"           \
> -        "tlbi "  # tlbop  ";"   \
> -        "dsb  ish;"             \
> -        "isb;"                  \
> -        : : : "memory");        \
> +#define TLB_HELPER(name, tlbop)       \
> +static inline void name(void)         \
> +{                                     \
> +    asm volatile(                     \
> +        "dsb  ishst;"                 \
> +        "tlbi "  # tlbop  ";"         \
> +        ALTERNATIVE(                  \
> +        "nop; nop;",                  \
> +        "dsb  ish;"                   \
> +        "tlbi "  # tlbop  ";",        \
> +        ARM64_WORKAROUND_REPEAT_TLBI, \
> +        CONFIG_ARM64_ERRATUM_1286807) \
> +        "dsb  ish;"                   \
> +        "isb;"                        \
> +        : : : "memory");              \
> }
> 
> /* Flush local TLBs, current VMID only. */
> diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
> index 016a9fe203..c7b5052992 100644
> --- a/xen/include/asm-arm/cpufeature.h
> +++ b/xen/include/asm-arm/cpufeature.h
> @@ -46,8 +46,9 @@
> #define ARM_SMCCC_1_1 8
> #define ARM64_WORKAROUND_AT_SPECULATE 9
> #define ARM_WORKAROUND_858921 10
> +#define ARM64_WORKAROUND_REPEAT_TLBI 11
> 
> -#define ARM_NCAPS           11
> +#define ARM_NCAPS           12
> 
> #ifndef __ASSEMBLY__
> 
> -- 
> 2.28.0
> 


RE: [PATCH] xen/arm: Add workaround for Cortex-A76/Neoverse-N1 erratum #1286807
Posted by Wei Chen 3 years, 5 months ago
Hi,

> -----Original Message-----
> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of
> Bertrand Marquis
> Sent: 2020年11月16日 16:48
> To: Michal Orzel <Michal.Orzel@arm.com>
> Cc: open list:X86 <xen-devel@lists.xenproject.org>; Stefano Stabellini
> <sstabellini@kernel.org>; Julien Grall <julien@xen.org>; Volodymyr Babchuk
> <Volodymyr_Babchuk@epam.com>
> Subject: Re: [PATCH] xen/arm: Add workaround for Cortex-A76/Neoverse-N1
> erratum #1286807
> 
> Hi,
> 
> > On 16 Nov 2020, at 07:24, Michal Orzel <Michal.Orzel@arm.com> wrote:
> >
> > On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0),
> > if a virtual address for a cacheable mapping of a location is being
> > accessed by a core while another core is remapping the virtual
> > address to a new physical page using the recommended break-before-make
> > sequence, then under very rare circumstances TLBI+DSB completes before
> > a read using the translation being invalidated has been observed by
> > other observers. The workaround repeats the TLBI+DSB operation.
> >
> > Signed-off-by: Michal Orzel <michal.orzel@arm.com>
> Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com>
> 

Reviewed-by: Wei Chen <Wei.Chen@arm.com>

> Cheers
> Bertrand
> 
> > ---
> > docs/misc/arm/silicon-errata.txt     |  2 ++
> > xen/arch/arm/Kconfig                 | 18 +++++++++++++++++
> > xen/arch/arm/cpuerrata.c             | 14 ++++++++++++++
> > xen/include/asm-arm/arm64/flushtlb.h | 29 +++++++++++++++++++---------
> > xen/include/asm-arm/cpufeature.h     |  3 ++-
> > 5 files changed, 56 insertions(+), 10 deletions(-)
> >
> > diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-
> errata.txt
> > index 552c4151d3..d183ba543f 100644
> > --- a/docs/misc/arm/silicon-errata.txt
> > +++ b/docs/misc/arm/silicon-errata.txt
> > @@ -53,5 +53,7 @@ stable hypervisors.
> > | ARM            | Cortex-A72      | #853709         | N/A                     |
> > | ARM            | Cortex-A73      | #858921         | ARM_ERRATUM_858921      |
> > | ARM            | Cortex-A76      | #1165522        | N/A                     |
> > +| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807
> |
> > | ARM            | Neoverse-N1     | #1165522        | N/A
> > +| ARM            | Neoverse-N1     | #1286807        | ARM64_ERRATUM_1286807
> |
> > | ARM            | MMU-500         | #842869         | N/A                     |
> > diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> > index f938dd21bd..5d6d906d72 100644
> > --- a/xen/arch/arm/Kconfig
> > +++ b/xen/arch/arm/Kconfig
> > @@ -244,6 +244,24 @@ config ARM_ERRATUM_858921
> >
> > 	  If unsure, say Y.
> >
> > +config ARM64_ERRATUM_1286807
> > +	bool "Cortex-A76/Neoverse-N1: 1286807: Modification of the
> translation table for a virtual address might lead to read-after-read ordering
> violation"
> > +	default y
> > +	depends on ARM_64
> > +	help
> > +	  This option adds a workaround for ARM Cortex-A76/Neoverse-N1
> erratum 1286807.
> > +
> > +	  On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0), if a
> virtual
> > +	  address for a cacheable mapping of a location is being
> > +	  accessed by a core while another core is remapping the virtual
> > +	  address to a new physical page using the recommended
> > +	  break-before-make sequence, then under very rare circumstances
> > +	  TLBI+DSB completes before a read using the translation being
> > +	  invalidated has been observed by other observers. The
> > +	  workaround repeats the TLBI+DSB operation.
> > +
> > +	  If unsure, say Y.
> > +
> > endmenu
> >
> > config ARM64_HARDEN_BRANCH_PREDICTOR
> > diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> > index 567911d380..cb4795beec 100644
> > --- a/xen/arch/arm/cpuerrata.c
> > +++ b/xen/arch/arm/cpuerrata.c
> > @@ -424,6 +424,20 @@ static const struct arm_cpu_capabilities arm_errata[]
> = {
> >                    (1 << MIDR_VARIANT_SHIFT) | 2),
> >     },
> > #endif
> > +#ifdef CONFIG_ARM64_ERRATUM_1286807
> > +    {
> > +        /* Cortex-A76 r0p0 - r3p0 */
> > +        .desc = "ARM erratum 1286807",
> > +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
> > +        MIDR_RANGE(MIDR_CORTEX_A76, 0, 3 << MIDR_VARIANT_SHIFT),
> > +    },
> > +    {
> > +        /* Neoverse-N1 r0p0 - r3p0 */
> > +        .desc = "ARM erratum 1286807",
> > +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
> > +        MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 3 << MIDR_VARIANT_SHIFT),
> > +    },
> > +#endif
> > #ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
> >     {
> >         .capability = ARM_HARDEN_BRANCH_PREDICTOR,
> > diff --git a/xen/include/asm-arm/arm64/flushtlb.h b/xen/include/asm-
> arm/arm64/flushtlb.h
> > index ceec59542e..6726362211 100644
> > --- a/xen/include/asm-arm/arm64/flushtlb.h
> > +++ b/xen/include/asm-arm/arm64/flushtlb.h
> > @@ -9,6 +9,11 @@
> >  * DSB ISH          // Ensure the TLB invalidation has completed
> >  * ISB              // See explanation below
> >  *
> > + * ARM64_WORKAROUND_REPEAT_TLBI:
> > + * Modification of the translation table for a virtual address might lead to
> > + * read-after-read ordering violation.
> > + * The workaround repeats TLBI+DSB operation.
> > + *
> >  * For Xen page-tables the ISB will discard any instructions fetched
> >  * from the old mappings.
> >  *
> > @@ -16,15 +21,21 @@
> >  * (and therefore the TLB invalidation) before continuing. So we know
> >  * the TLBs cannot contain an entry for a mapping we may have removed.
> >  */
> > -#define TLB_HELPER(name, tlbop) \
> > -static inline void name(void)   \
> > -{                               \
> > -    asm volatile(               \
> > -        "dsb  ishst;"           \
> > -        "tlbi "  # tlbop  ";"   \
> > -        "dsb  ish;"             \
> > -        "isb;"                  \
> > -        : : : "memory");        \
> > +#define TLB_HELPER(name, tlbop)       \
> > +static inline void name(void)         \
> > +{                                     \
> > +    asm volatile(                     \
> > +        "dsb  ishst;"                 \
> > +        "tlbi "  # tlbop  ";"         \
> > +        ALTERNATIVE(                  \
> > +        "nop; nop;",                  \
> > +        "dsb  ish;"                   \
> > +        "tlbi "  # tlbop  ";",        \
> > +        ARM64_WORKAROUND_REPEAT_TLBI, \
> > +        CONFIG_ARM64_ERRATUM_1286807) \
> > +        "dsb  ish;"                   \
> > +        "isb;"                        \
> > +        : : : "memory");              \
> > }
> >
> > /* Flush local TLBs, current VMID only. */
> > diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-
> arm/cpufeature.h
> > index 016a9fe203..c7b5052992 100644
> > --- a/xen/include/asm-arm/cpufeature.h
> > +++ b/xen/include/asm-arm/cpufeature.h
> > @@ -46,8 +46,9 @@
> > #define ARM_SMCCC_1_1 8
> > #define ARM64_WORKAROUND_AT_SPECULATE 9
> > #define ARM_WORKAROUND_858921 10
> > +#define ARM64_WORKAROUND_REPEAT_TLBI 11
> >
> > -#define ARM_NCAPS           11
> > +#define ARM_NCAPS           12
> >
> > #ifndef __ASSEMBLY__
> >
> > --
> > 2.28.0
> >
> 

Re: [PATCH] xen/arm: Add workaround for Cortex-A76/Neoverse-N1 erratum #1286807
Posted by Julien Grall 3 years, 5 months ago
Hi Michal,

On 16/11/2020 07:24, Michal Orzel wrote:
> On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0),
> if a virtual address for a cacheable mapping of a location is being
> accessed by a core while another core is remapping the virtual
> address to a new physical page using the recommended break-before-make
> sequence, then under very rare circumstances TLBI+DSB completes before
> a read using the translation being invalidated has been observed by
> other observers. The workaround repeats the TLBI+DSB operation.

The commit message suggests the second TLBI+DSB operation is only 
necessary for innershearable TLBs.

I agree that it is probably be best to cover all the TLB flush 
operations. However, it would be good to clarify it in the commit 
message that this is done on purpose.

> 
> Signed-off-by: Michal Orzel <michal.orzel@arm.com>
> ---
>   docs/misc/arm/silicon-errata.txt     |  2 ++
>   xen/arch/arm/Kconfig                 | 18 +++++++++++++++++
>   xen/arch/arm/cpuerrata.c             | 14 ++++++++++++++
>   xen/include/asm-arm/arm64/flushtlb.h | 29 +++++++++++++++++++---------
>   xen/include/asm-arm/cpufeature.h     |  3 ++-
>   5 files changed, 56 insertions(+), 10 deletions(-)
> 
> diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
> index 552c4151d3..d183ba543f 100644
> --- a/docs/misc/arm/silicon-errata.txt
> +++ b/docs/misc/arm/silicon-errata.txt
> @@ -53,5 +53,7 @@ stable hypervisors.
>   | ARM            | Cortex-A72      | #853709         | N/A                     |
>   | ARM            | Cortex-A73      | #858921         | ARM_ERRATUM_858921      |
>   | ARM            | Cortex-A76      | #1165522        | N/A                     |
> +| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807   |
>   | ARM            | Neoverse-N1     | #1165522        | N/A
> +| ARM            | Neoverse-N1     | #1286807        | ARM64_ERRATUM_1286807   |
>   | ARM            | MMU-500         | #842869         | N/A                     |
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index f938dd21bd..5d6d906d72 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -244,6 +244,24 @@ config ARM_ERRATUM_858921
>   
>   	  If unsure, say Y.
>   
> +config ARM64_ERRATUM_1286807
> +	bool "Cortex-A76/Neoverse-N1: 1286807: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
> +	default y
> +	depends on ARM_64
> +	help
> +	  This option adds a workaround for ARM Cortex-A76/Neoverse-N1 erratum 1286807.
> +
> +	  On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0), if a virtual
> +	  address for a cacheable mapping of a location is being
> +	  accessed by a core while another core is remapping the virtual
> +	  address to a new physical page using the recommended
> +	  break-before-make sequence, then under very rare circumstances
> +	  TLBI+DSB completes before a read using the translation being
> +	  invalidated has been observed by other observers. The
> +	  workaround repeats the TLBI+DSB operation.
> +
> +	  If unsure, say Y.
> +
>   endmenu
>   
>   config ARM64_HARDEN_BRANCH_PREDICTOR
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index 567911d380..cb4795beec 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -424,6 +424,20 @@ static const struct arm_cpu_capabilities arm_errata[] = {
>                      (1 << MIDR_VARIANT_SHIFT) | 2),
>       },
>   #endif
> +#ifdef CONFIG_ARM64_ERRATUM_1286807
> +    {
> +        /* Cortex-A76 r0p0 - r3p0 */
> +        .desc = "ARM erratum 1286807",
> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
> +        MIDR_RANGE(MIDR_CORTEX_A76, 0, 3 << MIDR_VARIANT_SHIFT),
> +    },
> +    {
> +        /* Neoverse-N1 r0p0 - r3p0 */
> +        .desc = "ARM erratum 1286807",
> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
> +        MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 3 << MIDR_VARIANT_SHIFT),
> +    },
> +#endif
>   #ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
>       {
>           .capability = ARM_HARDEN_BRANCH_PREDICTOR,
> diff --git a/xen/include/asm-arm/arm64/flushtlb.h b/xen/include/asm-arm/arm64/flushtlb.h
> index ceec59542e..6726362211 100644
> --- a/xen/include/asm-arm/arm64/flushtlb.h
> +++ b/xen/include/asm-arm/arm64/flushtlb.h
> @@ -9,6 +9,11 @@
>    * DSB ISH          // Ensure the TLB invalidation has completed
>    * ISB              // See explanation below
>    *
> + * ARM64_WORKAROUND_REPEAT_TLBI:
> + * Modification of the translation table for a virtual address might lead to
> + * read-after-read ordering violation.
> + * The workaround repeats TLBI+DSB operation.
> + *
>    * For Xen page-tables the ISB will discard any instructions fetched
>    * from the old mappings.
>    *
> @@ -16,15 +21,21 @@
>    * (and therefore the TLB invalidation) before continuing. So we know
>    * the TLBs cannot contain an entry for a mapping we may have removed.
>    */
> -#define TLB_HELPER(name, tlbop) \
> -static inline void name(void)   \
> -{                               \
> -    asm volatile(               \
> -        "dsb  ishst;"           \
> -        "tlbi "  # tlbop  ";"   \
> -        "dsb  ish;"             \
> -        "isb;"                  \
> -        : : : "memory");        \
> +#define TLB_HELPER(name, tlbop)       \
> +static inline void name(void)         \
> +{                                     \
> +    asm volatile(                     \
> +        "dsb  ishst;"                 \
> +        "tlbi "  # tlbop  ";"         \
> +        ALTERNATIVE(                  \
> +        "nop; nop;",                  \

This is a bit difficult to read. I would suggest to indent the arguments 
of ALTERNITIVE() by an extra soft tab.

> +        "dsb  ish;"                   \
> +        "tlbi "  # tlbop  ";",        \
> +        ARM64_WORKAROUND_REPEAT_TLBI, \
> +        CONFIG_ARM64_ERRATUM_1286807) \

I think it would be fine to have this code unconditionally built. But if 
you prefer to keep it conditionally, then I would suggest to introduce 
CONFIG_ARM64_WORKAROUND_REPEAT_TLBI and gate the ALTERNATIVE with it.

The new config would be selected by CONFIG_ARM64_ERRATUM_1286807. This 
will make easier for future workaround to use it (AFAICT there are other 
platforms require the same thing).

Cheers,

-- 
Julien Grall

Re: [PATCH] xen/arm: Add workaround for Cortex-A76/Neoverse-N1 erratum #1286807
Posted by Michal Orzel 3 years, 5 months ago

On 16.11.2020 11:12, Julien Grall wrote:
> Hi Michal,
> 
> On 16/11/2020 07:24, Michal Orzel wrote:
>> On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0),
>> if a virtual address for a cacheable mapping of a location is being
>> accessed by a core while another core is remapping the virtual
>> address to a new physical page using the recommended break-before-make
>> sequence, then under very rare circumstances TLBI+DSB completes before
>> a read using the translation being invalidated has been observed by
>> other observers. The workaround repeats the TLBI+DSB operation.
> 
> The commit message suggests the second TLBI+DSB operation is only necessary for innershearable TLBs.
> 
> I agree that it is probably be best to cover all the TLB flush operations. However, it would be good to clarify it in the commit message that this is done on purpose.
> 
Hi Julien,

Ok I agree that such note can be added.
I will push v2 then.
>>
>> Signed-off-by: Michal Orzel <michal.orzel@arm.com>
>> ---
>>   docs/misc/arm/silicon-errata.txt     |  2 ++
>>   xen/arch/arm/Kconfig                 | 18 +++++++++++++++++
>>   xen/arch/arm/cpuerrata.c             | 14 ++++++++++++++
>>   xen/include/asm-arm/arm64/flushtlb.h | 29 +++++++++++++++++++---------
>>   xen/include/asm-arm/cpufeature.h     |  3 ++-
>>   5 files changed, 56 insertions(+), 10 deletions(-)
>>
>> diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
>> index 552c4151d3..d183ba543f 100644
>> --- a/docs/misc/arm/silicon-errata.txt
>> +++ b/docs/misc/arm/silicon-errata.txt
>> @@ -53,5 +53,7 @@ stable hypervisors.
>>   | ARM            | Cortex-A72      | #853709         | N/A                     |
>>   | ARM            | Cortex-A73      | #858921         | ARM_ERRATUM_858921      |
>>   | ARM            | Cortex-A76      | #1165522        | N/A                     |
>> +| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807   |
>>   | ARM            | Neoverse-N1     | #1165522        | N/A
>> +| ARM            | Neoverse-N1     | #1286807        | ARM64_ERRATUM_1286807   |
>>   | ARM            | MMU-500         | #842869         | N/A                     |
>> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
>> index f938dd21bd..5d6d906d72 100644
>> --- a/xen/arch/arm/Kconfig
>> +++ b/xen/arch/arm/Kconfig
>> @@ -244,6 +244,24 @@ config ARM_ERRATUM_858921
>>           If unsure, say Y.
>>   +config ARM64_ERRATUM_1286807
>> +    bool "Cortex-A76/Neoverse-N1: 1286807: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
>> +    default y
>> +    depends on ARM_64
>> +    help
>> +      This option adds a workaround for ARM Cortex-A76/Neoverse-N1 erratum 1286807.
>> +
>> +      On the affected Cortex-A76/Neoverse-N1 cores (r0p0 to r3p0), if a virtual
>> +      address for a cacheable mapping of a location is being
>> +      accessed by a core while another core is remapping the virtual
>> +      address to a new physical page using the recommended
>> +      break-before-make sequence, then under very rare circumstances
>> +      TLBI+DSB completes before a read using the translation being
>> +      invalidated has been observed by other observers. The
>> +      workaround repeats the TLBI+DSB operation.
>> +
>> +      If unsure, say Y.
>> +
>>   endmenu
>>     config ARM64_HARDEN_BRANCH_PREDICTOR
>> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
>> index 567911d380..cb4795beec 100644
>> --- a/xen/arch/arm/cpuerrata.c
>> +++ b/xen/arch/arm/cpuerrata.c
>> @@ -424,6 +424,20 @@ static const struct arm_cpu_capabilities arm_errata[] = {
>>                      (1 << MIDR_VARIANT_SHIFT) | 2),
>>       },
>>   #endif
>> +#ifdef CONFIG_ARM64_ERRATUM_1286807
>> +    {
>> +        /* Cortex-A76 r0p0 - r3p0 */
>> +        .desc = "ARM erratum 1286807",
>> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
>> +        MIDR_RANGE(MIDR_CORTEX_A76, 0, 3 << MIDR_VARIANT_SHIFT),
>> +    },
>> +    {
>> +        /* Neoverse-N1 r0p0 - r3p0 */
>> +        .desc = "ARM erratum 1286807",
>> +        .capability = ARM64_WORKAROUND_REPEAT_TLBI,
>> +        MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 3 << MIDR_VARIANT_SHIFT),
>> +    },
>> +#endif
>>   #ifdef CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR
>>       {
>>           .capability = ARM_HARDEN_BRANCH_PREDICTOR,
>> diff --git a/xen/include/asm-arm/arm64/flushtlb.h b/xen/include/asm-arm/arm64/flushtlb.h
>> index ceec59542e..6726362211 100644
>> --- a/xen/include/asm-arm/arm64/flushtlb.h
>> +++ b/xen/include/asm-arm/arm64/flushtlb.h
>> @@ -9,6 +9,11 @@
>>    * DSB ISH          // Ensure the TLB invalidation has completed
>>    * ISB              // See explanation below
>>    *
>> + * ARM64_WORKAROUND_REPEAT_TLBI:
>> + * Modification of the translation table for a virtual address might lead to
>> + * read-after-read ordering violation.
>> + * The workaround repeats TLBI+DSB operation.
>> + *
>>    * For Xen page-tables the ISB will discard any instructions fetched
>>    * from the old mappings.
>>    *
>> @@ -16,15 +21,21 @@
>>    * (and therefore the TLB invalidation) before continuing. So we know
>>    * the TLBs cannot contain an entry for a mapping we may have removed.
>>    */
>> -#define TLB_HELPER(name, tlbop) \
>> -static inline void name(void)   \
>> -{                               \
>> -    asm volatile(               \
>> -        "dsb  ishst;"           \
>> -        "tlbi "  # tlbop  ";"   \
>> -        "dsb  ish;"             \
>> -        "isb;"                  \
>> -        : : : "memory");        \
>> +#define TLB_HELPER(name, tlbop)       \
>> +static inline void name(void)         \
>> +{                                     \
>> +    asm volatile(                     \
>> +        "dsb  ishst;"                 \
>> +        "tlbi "  # tlbop  ";"         \
>> +        ALTERNATIVE(                  \
>> +        "nop; nop;",                  \
> 
> This is a bit difficult to read. I would suggest to indent the arguments of ALTERNITIVE() by an extra soft tab.
> 
>> +        "dsb  ish;"                   \
>> +        "tlbi "  # tlbop  ";",        \
>> +        ARM64_WORKAROUND_REPEAT_TLBI, \
>> +        CONFIG_ARM64_ERRATUM_1286807) \
> 
> I think it would be fine to have this code unconditionally built. But if you prefer to keep it conditionally, then I would suggest to introduce CONFIG_ARM64_WORKAROUND_REPEAT_TLBI and gate the ALTERNATIVE with it.
> 
> The new config would be selected by CONFIG_ARM64_ERRATUM_1286807. This will make easier for future workaround to use it (AFAICT there are other platforms require the same thing).
> 
> Cheers,
>