[PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround

Penny Zheng posted 1 patch 3 years, 5 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/xen tags/patchew/20201109082110.1133996-1-penny.zheng@arm.com
Maintainers: Julien Grall <julien@xen.org>, Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>, Stefano Stabellini <sstabellini@kernel.org>
docs/misc/arm/silicon-errata.txt |  1 +
xen/arch/arm/Kconfig             | 18 ++++++++++++++++++
xen/arch/arm/cpuerrata.c         |  8 ++++++++
xen/arch/arm/vtimer.c            |  2 +-
xen/include/asm-arm/cpuerrata.h  |  2 ++
xen/include/asm-arm/cpufeature.h |  3 ++-
xen/include/asm-arm/time.h       | 22 +++++++++++++++++++++-
7 files changed, 53 insertions(+), 3 deletions(-)
[PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
Posted by Penny Zheng 3 years, 5 months ago
CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
might return a wrong value when the counter crosses a 32bit boundary.

Until now, there is no case for Xen itself to access CNTVCT_EL0,
and it also should be the Guest OS's responsibility to deal with
this part.

But for CNTPCT, there exists several cases in Xen involving reading
CNTPCT, so a possible workaround is that performing the read twice,
and to return one or the other depending on whether a transition has
taken place.

Signed-off-by: Penny Zheng <penny.zheng@arm.com>
---
 docs/misc/arm/silicon-errata.txt |  1 +
 xen/arch/arm/Kconfig             | 18 ++++++++++++++++++
 xen/arch/arm/cpuerrata.c         |  8 ++++++++
 xen/arch/arm/vtimer.c            |  2 +-
 xen/include/asm-arm/cpuerrata.h  |  2 ++
 xen/include/asm-arm/cpufeature.h |  3 ++-
 xen/include/asm-arm/time.h       | 22 +++++++++++++++++++++-
 7 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
index 1f18a9df58..552c4151d3 100644
--- a/docs/misc/arm/silicon-errata.txt
+++ b/docs/misc/arm/silicon-errata.txt
@@ -51,6 +51,7 @@ stable hypervisors.
 | ARM            | Cortex-A57      | #1319537        | N/A                     |
 | ARM            | Cortex-A72      | #1319367        | N/A                     |
 | ARM            | Cortex-A72      | #853709         | N/A                     |
+| ARM            | Cortex-A73      | #858921         | ARM_ERRATUM_858921      |
 | ARM            | Cortex-A76      | #1165522        | N/A                     |
 | ARM            | Neoverse-N1     | #1165522        | N/A
 | ARM            | MMU-500         | #842869         | N/A                     |
diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index 2777388265..f938dd21bd 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -226,6 +226,24 @@ config ARM64_ERRATUM_834220
 
 	  If unsure, say Y.
 
+config ARM_ERRATUM_858921
+	bool "Cortex-A73: 858921: Possible wrong read value for CNTVCT or CNTPCT"
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 858921 on Cortex-A73 (all versions).
+
+	  Affected Cortex-A73 might return wrong read value for CNTVCT or CNTPCT
+	  when the counter crosses a 32bit boundary.
+
+	  The workaround involves performing the read twice, and to return
+	  one or the other value depending on whether a transition has taken place.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
 endmenu
 
 config ARM64_HARDEN_BRANCH_PREDICTOR
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index 6731d873e8..567911d380 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -469,6 +469,14 @@ static const struct arm_cpu_capabilities arm_errata[] = {
         .capability = ARM_SSBD,
         .matches = has_ssbd_mitigation,
     },
+#endif
+#ifdef CONFIG_ARM_ERRATUM_858921
+    {
+        /* Cortex-A73 (all versions) */
+        .desc = "ARM erratum 858921",
+        .capability = ARM_WORKAROUND_858921,
+        MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
+    },
 #endif
     {
         /* Neoverse r0p0 - r2p0 */
diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
index 6d39fc944f..c2b27915c6 100644
--- a/xen/arch/arm/vtimer.c
+++ b/xen/arch/arm/vtimer.c
@@ -62,7 +62,7 @@ static void virt_timer_expired(void *data)
 
 int domain_vtimer_init(struct domain *d, struct xen_arch_domainconfig *config)
 {
-    d->arch.virt_timer_base.offset = READ_SYSREG64(CNTPCT_EL0);
+    d->arch.virt_timer_base.offset = get_cycles();
     d->time_offset.seconds = ticks_to_ns(d->arch.virt_timer_base.offset - boot_count);
     do_div(d->time_offset.seconds, 1000000000);
 
diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h
index 88ef3ca934..8d7e7b9375 100644
--- a/xen/include/asm-arm/cpuerrata.h
+++ b/xen/include/asm-arm/cpuerrata.h
@@ -28,6 +28,8 @@ static inline bool check_workaround_##erratum(void)             \
 CHECK_WORKAROUND_HELPER(766422, ARM32_WORKAROUND_766422, CONFIG_ARM_32)
 CHECK_WORKAROUND_HELPER(834220, ARM64_WORKAROUND_834220, CONFIG_ARM_64)
 CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD, CONFIG_ARM_SSBD)
+CHECK_WORKAROUND_HELPER(858921, ARM_WORKAROUND_858921,
+                        CONFIG_ARM_ERRATUM_858921)
 
 #undef CHECK_WORKAROUND_HELPER
 
diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
index 10878ead8a..016a9fe203 100644
--- a/xen/include/asm-arm/cpufeature.h
+++ b/xen/include/asm-arm/cpufeature.h
@@ -45,8 +45,9 @@
 #define ARM_SSBD 7
 #define ARM_SMCCC_1_1 8
 #define ARM64_WORKAROUND_AT_SPECULATE 9
+#define ARM_WORKAROUND_858921 10
 
-#define ARM_NCAPS           10
+#define ARM_NCAPS           11
 
 #ifndef __ASSEMBLY__
 
diff --git a/xen/include/asm-arm/time.h b/xen/include/asm-arm/time.h
index 9cb6f9b0b4..1b2c13614b 100644
--- a/xen/include/asm-arm/time.h
+++ b/xen/include/asm-arm/time.h
@@ -3,6 +3,7 @@
 
 #include <asm/sysregs.h>
 #include <asm/system.h>
+#include <asm/cpuerrata.h>
 
 #define DT_MATCH_TIMER                      \
     DT_MATCH_COMPATIBLE("arm,armv7-timer"), \
@@ -13,7 +14,26 @@ typedef uint64_t cycles_t;
 static inline cycles_t get_cycles (void)
 {
         isb();
-        return READ_SYSREG64(CNTPCT_EL0);
+        /*
+         * ARM_WORKAROUND_858921: Cortex-A73 (all versions) counter read
+         * can return a wrong value when the counter crosses a 32bit boundary.
+         */
+        if ( !check_workaround_858921() )
+            return READ_SYSREG64(CNTPCT_EL0);
+        else
+        {
+            /*
+             * A recommended workaround for erratum 858921 is to:
+             *  1- Read twice CNTPCT.
+             *  2- Compare bit[32] of the two read values.
+             *      - If bit[32] is different, keep the old value.
+             *      - If bit[32] is the same, keep the new value.
+             */
+            cycles_t old, new;
+            old = READ_SYSREG64(CNTPCT_EL0);
+            new = READ_SYSREG64(CNTPCT_EL0);
+            return (((old ^ new) >> 32) & 1) ? old : new;
+        }
 }
 
 /* List of timer's IRQ */
-- 
2.25.1


Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
Posted by Bertrand Marquis 3 years, 5 months ago
Hi,

> On 9 Nov 2020, at 08:21, Penny Zheng <Penny.Zheng@arm.com> wrote:
> 
> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
> might return a wrong value when the counter crosses a 32bit boundary.
> 
> Until now, there is no case for Xen itself to access CNTVCT_EL0,
> and it also should be the Guest OS's responsibility to deal with
> this part.
> 
> But for CNTPCT, there exists several cases in Xen involving reading
> CNTPCT, so a possible workaround is that performing the read twice,
> and to return one or the other depending on whether a transition has
> taken place.
> 
> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com>

Regards
Bertrand

> ---
> docs/misc/arm/silicon-errata.txt |  1 +
> xen/arch/arm/Kconfig             | 18 ++++++++++++++++++
> xen/arch/arm/cpuerrata.c         |  8 ++++++++
> xen/arch/arm/vtimer.c            |  2 +-
> xen/include/asm-arm/cpuerrata.h  |  2 ++
> xen/include/asm-arm/cpufeature.h |  3 ++-
> xen/include/asm-arm/time.h       | 22 +++++++++++++++++++++-
> 7 files changed, 53 insertions(+), 3 deletions(-)
> 
> diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
> index 1f18a9df58..552c4151d3 100644
> --- a/docs/misc/arm/silicon-errata.txt
> +++ b/docs/misc/arm/silicon-errata.txt
> @@ -51,6 +51,7 @@ stable hypervisors.
> | ARM            | Cortex-A57      | #1319537        | N/A                     |
> | ARM            | Cortex-A72      | #1319367        | N/A                     |
> | ARM            | Cortex-A72      | #853709         | N/A                     |
> +| ARM            | Cortex-A73      | #858921         | ARM_ERRATUM_858921      |
> | ARM            | Cortex-A76      | #1165522        | N/A                     |
> | ARM            | Neoverse-N1     | #1165522        | N/A
> | ARM            | MMU-500         | #842869         | N/A                     |
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index 2777388265..f938dd21bd 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -226,6 +226,24 @@ config ARM64_ERRATUM_834220
> 
> 	  If unsure, say Y.
> 
> +config ARM_ERRATUM_858921
> +	bool "Cortex-A73: 858921: Possible wrong read value for CNTVCT or CNTPCT"
> +	default y
> +	help
> +	  This option adds an alternative code sequence to work around ARM
> +	  erratum 858921 on Cortex-A73 (all versions).
> +
> +	  Affected Cortex-A73 might return wrong read value for CNTVCT or CNTPCT
> +	  when the counter crosses a 32bit boundary.
> +
> +	  The workaround involves performing the read twice, and to return
> +	  one or the other value depending on whether a transition has taken place.
> +	  Please note that this does not necessarily enable the workaround,
> +	  as it depends on the alternative framework, which will only patch
> +	  the kernel if an affected CPU is detected.
> +
> +	  If unsure, say Y.
> +
> endmenu
> 
> config ARM64_HARDEN_BRANCH_PREDICTOR
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index 6731d873e8..567911d380 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -469,6 +469,14 @@ static const struct arm_cpu_capabilities arm_errata[] = {
>         .capability = ARM_SSBD,
>         .matches = has_ssbd_mitigation,
>     },
> +#endif
> +#ifdef CONFIG_ARM_ERRATUM_858921
> +    {
> +        /* Cortex-A73 (all versions) */
> +        .desc = "ARM erratum 858921",
> +        .capability = ARM_WORKAROUND_858921,
> +        MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
> +    },
> #endif
>     {
>         /* Neoverse r0p0 - r2p0 */
> diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
> index 6d39fc944f..c2b27915c6 100644
> --- a/xen/arch/arm/vtimer.c
> +++ b/xen/arch/arm/vtimer.c
> @@ -62,7 +62,7 @@ static void virt_timer_expired(void *data)
> 
> int domain_vtimer_init(struct domain *d, struct xen_arch_domainconfig *config)
> {
> -    d->arch.virt_timer_base.offset = READ_SYSREG64(CNTPCT_EL0);
> +    d->arch.virt_timer_base.offset = get_cycles();
>     d->time_offset.seconds = ticks_to_ns(d->arch.virt_timer_base.offset - boot_count);
>     do_div(d->time_offset.seconds, 1000000000);
> 
> diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h
> index 88ef3ca934..8d7e7b9375 100644
> --- a/xen/include/asm-arm/cpuerrata.h
> +++ b/xen/include/asm-arm/cpuerrata.h
> @@ -28,6 +28,8 @@ static inline bool check_workaround_##erratum(void)             \
> CHECK_WORKAROUND_HELPER(766422, ARM32_WORKAROUND_766422, CONFIG_ARM_32)
> CHECK_WORKAROUND_HELPER(834220, ARM64_WORKAROUND_834220, CONFIG_ARM_64)
> CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD, CONFIG_ARM_SSBD)
> +CHECK_WORKAROUND_HELPER(858921, ARM_WORKAROUND_858921,
> +                        CONFIG_ARM_ERRATUM_858921)
> 
> #undef CHECK_WORKAROUND_HELPER
> 
> diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
> index 10878ead8a..016a9fe203 100644
> --- a/xen/include/asm-arm/cpufeature.h
> +++ b/xen/include/asm-arm/cpufeature.h
> @@ -45,8 +45,9 @@
> #define ARM_SSBD 7
> #define ARM_SMCCC_1_1 8
> #define ARM64_WORKAROUND_AT_SPECULATE 9
> +#define ARM_WORKAROUND_858921 10
> 
> -#define ARM_NCAPS           10
> +#define ARM_NCAPS           11
> 
> #ifndef __ASSEMBLY__
> 
> diff --git a/xen/include/asm-arm/time.h b/xen/include/asm-arm/time.h
> index 9cb6f9b0b4..1b2c13614b 100644
> --- a/xen/include/asm-arm/time.h
> +++ b/xen/include/asm-arm/time.h
> @@ -3,6 +3,7 @@
> 
> #include <asm/sysregs.h>
> #include <asm/system.h>
> +#include <asm/cpuerrata.h>
> 
> #define DT_MATCH_TIMER                      \
>     DT_MATCH_COMPATIBLE("arm,armv7-timer"), \
> @@ -13,7 +14,26 @@ typedef uint64_t cycles_t;
> static inline cycles_t get_cycles (void)
> {
>         isb();
> -        return READ_SYSREG64(CNTPCT_EL0);
> +        /*
> +         * ARM_WORKAROUND_858921: Cortex-A73 (all versions) counter read
> +         * can return a wrong value when the counter crosses a 32bit boundary.
> +         */
> +        if ( !check_workaround_858921() )
> +            return READ_SYSREG64(CNTPCT_EL0);
> +        else
> +        {
> +            /*
> +             * A recommended workaround for erratum 858921 is to:
> +             *  1- Read twice CNTPCT.
> +             *  2- Compare bit[32] of the two read values.
> +             *      - If bit[32] is different, keep the old value.
> +             *      - If bit[32] is the same, keep the new value.
> +             */
> +            cycles_t old, new;
> +            old = READ_SYSREG64(CNTPCT_EL0);
> +            new = READ_SYSREG64(CNTPCT_EL0);
> +            return (((old ^ new) >> 32) & 1) ? old : new;
> +        }
> }
> 
> /* List of timer's IRQ */
> -- 
> 2.25.1
> 


Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
Posted by Julien Grall 3 years, 5 months ago
Hi,

On 09/11/2020 08:21, Penny Zheng wrote:
> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
> might return a wrong value when the counter crosses a 32bit boundary.
> 
> Until now, there is no case for Xen itself to access CNTVCT_EL0,
> and it also should be the Guest OS's responsibility to deal with
> this part.
> 
> But for CNTPCT, there exists several cases in Xen involving reading
> CNTPCT, so a possible workaround is that performing the read twice,
> and to return one or the other depending on whether a transition has
> taken place.
> 
> Signed-off-by: Penny Zheng <penny.zheng@arm.com>

Acked-by: Julien Grall <jgrall@amazon.com>

On a related topic, do we need a fix similar to Linux commit 
75a19a0202db "arm64: arch_timer: Ensure counter register reads occur 
with seqlock held"?

Cheers,

-- 
Julien Grall

RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
Posted by Penny Zheng 3 years, 5 months ago

> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: Monday, November 9, 2020 8:04 PM
> To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org;
> sstabellini@kernel.org
> Cc: Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis
> <Bertrand.Marquis@arm.com>; Wei Chen <Wei.Chen@arm.com>; Kaly Xin
> <Kaly.Xin@arm.com>; nd <nd@arm.com>
> Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
> 
> Hi,
> 
> On 09/11/2020 08:21, Penny Zheng wrote:
> > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
> > might return a wrong value when the counter crosses a 32bit boundary.
> >
> > Until now, there is no case for Xen itself to access CNTVCT_EL0, and
> > it also should be the Guest OS's responsibility to deal with this
> > part.
> >
> > But for CNTPCT, there exists several cases in Xen involving reading
> > CNTPCT, so a possible workaround is that performing the read twice,
> > and to return one or the other depending on whether a transition has
> > taken place.
> >
> > Signed-off-by: Penny Zheng <penny.zheng@arm.com>
> 
> Acked-by: Julien Grall <jgrall@amazon.com>
>
Thank you. 😉

> On a related topic, do we need a fix similar to Linux commit 75a19a0202db
> "arm64: arch_timer: Ensure counter register reads occur with seqlock held"?
> 
Sure, I'll check this commit and talk with my teams for further work.

Cheers

--
Penny Zheng
> Cheers,
> 
> --
> Julien Grall
RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
Posted by Wei Chen 3 years, 5 months ago
HI Julien,

> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: 2020年11月9日 20:04
> To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org;
> sstabellini@kernel.org
> Cc: Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis
> <Bertrand.Marquis@arm.com>; Wei Chen <Wei.Chen@arm.com>; Kaly Xin
> <Kaly.Xin@arm.com>; nd <nd@arm.com>
> Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
> 
> Hi,
> 
> On 09/11/2020 08:21, Penny Zheng wrote:
> > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
> > might return a wrong value when the counter crosses a 32bit boundary.
> >
> > Until now, there is no case for Xen itself to access CNTVCT_EL0,
> > and it also should be the Guest OS's responsibility to deal with
> > this part.
> >
> > But for CNTPCT, there exists several cases in Xen involving reading
> > CNTPCT, so a possible workaround is that performing the read twice,
> > and to return one or the other depending on whether a transition has
> > taken place.
> >
> > Signed-off-by: Penny Zheng <penny.zheng@arm.com>
> 
> Acked-by: Julien Grall <jgrall@amazon.com>
> 
> On a related topic, do we need a fix similar to Linux commit
> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur
> with seqlock held"?
> 

I take a look at this Linux commit, it seems to prevent the seq-lock to be
speculated.  Using an enforce ordering instead of ISB after the read counter
operation seems to be for performance reasons.

I have found that you had placed an ISB before read counter in get_cycles
to prevent counter value to be speculated. But you haven't placed the second
ISB after reading. Is it because we haven't used the get_cycles in seq lock
critical context (Maybe I didn't find the right place)? So should we need to fix it
now, or you prefer to fix it now for future usage?

Regards,
Wei Chen

> Cheers,
> 
> --
> Julien Grall
RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
Posted by Stefano Stabellini 3 years, 4 months ago
Resuming this old thread.

On Fri, 13 Nov 2020, Wei Chen wrote:
> > Hi,
> > 
> > On 09/11/2020 08:21, Penny Zheng wrote:
> > > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
> > > might return a wrong value when the counter crosses a 32bit boundary.
> > >
> > > Until now, there is no case for Xen itself to access CNTVCT_EL0,
> > > and it also should be the Guest OS's responsibility to deal with
> > > this part.
> > >
> > > But for CNTPCT, there exists several cases in Xen involving reading
> > > CNTPCT, so a possible workaround is that performing the read twice,
> > > and to return one or the other depending on whether a transition has
> > > taken place.
> > >
> > > Signed-off-by: Penny Zheng <penny.zheng@arm.com>
> > 
> > Acked-by: Julien Grall <jgrall@amazon.com>
> > 
> > On a related topic, do we need a fix similar to Linux commit
> > 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur
> > with seqlock held"?
> > 
> 
> I take a look at this Linux commit, it seems to prevent the seq-lock to be
> speculated.  Using an enforce ordering instead of ISB after the read counter
> operation seems to be for performance reasons.
> 
> I have found that you had placed an ISB before read counter in get_cycles
> to prevent counter value to be speculated. But you haven't placed the second
> ISB after reading. Is it because we haven't used the get_cycles in seq lock
> critical context (Maybe I didn't find the right place)? So should we need to fix it
> now, or you prefer to fix it now for future usage?

Looking at the call sites, it doesn't look like we need any ISB after
get_cycles as it is not used in any critical context. There is also a
data dependency with the value returned by it.

So I am thinking we don't need any fix. At most we need an in-code comment?

RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
Posted by Wei Chen 3 years, 4 months ago
Hi Stefano,

> -----Original Message-----
> From: Stefano Stabellini <sstabellini@kernel.org>
> Sent: 2020年11月26日 8:00
> To: Wei Chen <Wei.Chen@arm.com>
> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; xen-
> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara
> <Andre.Przywara@arm.com>; Bertrand Marquis <Bertrand.Marquis@arm.com>;
> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
> 
> Resuming this old thread.
> 
> On Fri, 13 Nov 2020, Wei Chen wrote:
> > > Hi,
> > >
> > > On 09/11/2020 08:21, Penny Zheng wrote:
> > > > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
> > > > might return a wrong value when the counter crosses a 32bit boundary.
> > > >
> > > > Until now, there is no case for Xen itself to access CNTVCT_EL0,
> > > > and it also should be the Guest OS's responsibility to deal with
> > > > this part.
> > > >
> > > > But for CNTPCT, there exists several cases in Xen involving reading
> > > > CNTPCT, so a possible workaround is that performing the read twice,
> > > > and to return one or the other depending on whether a transition has
> > > > taken place.
> > > >
> > > > Signed-off-by: Penny Zheng <penny.zheng@arm.com>
> > >
> > > Acked-by: Julien Grall <jgrall@amazon.com>
> > >
> > > On a related topic, do we need a fix similar to Linux commit
> > > 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur
> > > with seqlock held"?
> > >
> >
> > I take a look at this Linux commit, it seems to prevent the seq-lock to be
> > speculated.  Using an enforce ordering instead of ISB after the read counter
> > operation seems to be for performance reasons.
> >
> > I have found that you had placed an ISB before read counter in get_cycles
> > to prevent counter value to be speculated. But you haven't placed the second
> > ISB after reading. Is it because we haven't used the get_cycles in seq lock
> > critical context (Maybe I didn't find the right place)? So should we need to fix it
> > now, or you prefer to fix it now for future usage?
> 
> Looking at the call sites, it doesn't look like we need any ISB after
> get_cycles as it is not used in any critical context. There is also a
> data dependency with the value returned by it.
> 
> So I am thinking we don't need any fix. At most we need an in-code comment?

I agree with you to add an in-code comment. It will remind us in future when we
use the get_cycles in critical context. Adding it now will probably only lead to
meaningless performance degradation. Because Xen may never use it in a similar
scenario.

BTW: 
Can we send a patch that just contains a pure comment : )

Regards,
Wei Chen
Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
Posted by Julien Grall 3 years, 4 months ago
Hi Wei,

Your e-mail font seems to be different to the usual plain text one. Are 
you sending the e-mail using HTML by any chance?

On 26/11/2020 02:07, Wei Chen wrote:
> Hi Stefano,
> 
>> -----Original Message-----
>> From: Stefano Stabellini <sstabellini@kernel.org>
>> Sent: 2020年11月26日 8:00
>> To: Wei Chen <Wei.Chen@arm.com>
>> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; xen-
>> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara
>> <Andre.Przywara@arm.com>; Bertrand Marquis <Bertrand.Marquis@arm.com>;
>> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com>
>> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
>>
>> Resuming this old thread.
>>
>> On Fri, 13 Nov 2020, Wei Chen wrote:
>>>> Hi,
>>>>
>>>> On 09/11/2020 08:21, Penny Zheng wrote:
>>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
>>>>> might return a wrong value when the counter crosses a 32bit boundary.
>>>>>
>>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0,
>>>>> and it also should be the Guest OS's responsibility to deal with
>>>>> this part.
>>>>>
>>>>> But for CNTPCT, there exists several cases in Xen involving reading
>>>>> CNTPCT, so a possible workaround is that performing the read twice,
>>>>> and to return one or the other depending on whether a transition has
>>>>> taken place.
>>>>>
>>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
>>>>
>>>> Acked-by: Julien Grall <jgrall@amazon.com>
>>>>
>>>> On a related topic, do we need a fix similar to Linux commit
>>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur
>>>> with seqlock held"?
>>>>
>>>
>>> I take a look at this Linux commit, it seems to prevent the seq-lock to be
>>> speculated.  Using an enforce ordering instead of ISB after the read counter
>>> operation seems to be for performance reasons.
>>>
>>> I have found that you had placed an ISB before read counter in get_cycles
>>> to prevent counter value to be speculated. But you haven't placed the second
>>> ISB after reading. Is it because we haven't used the get_cycles in seq lock
>>> critical context (Maybe I didn't find the right place)? So should we need to fix it
>>> now, or you prefer to fix it now for future usage?
>>
>> Looking at the call sites, it doesn't look like we need any ISB after
>> get_cycles as it is not used in any critical context. There is also a
>> data dependency with the value returned by it.

I am assuming you looked at all the users of NOW(). Is that right?

>>
>> So I am thinking we don't need any fix. At most we need an in-code comment?
> 
> I agree with you to add an in-code comment. It will remind us in future when we
> use the get_cycles in critical context. Adding it now will probably only lead to
> meaningless performance degradation.

I read this as there would be no perfomance impact if we add the 
ordering it now. Did you intend to say?

> Because Xen may never use it in a similar
> scenario.

Right, there are two potentials approach here:
   - Wait until there are a user
       * Pros: Doesn't impact performance
       * Cons: We rely on users/reviewers to catch any misuse
   - Harden the code
       * Pros: Less risk to introduce a bug inadvertently
       * Cons: May impact the performance

In general, I prefer that the code is hardened by default if the 
performance impact is limited. This may save us hours of 
debugging/reproducing bug.

In addition, AFAICT, the x86 version of get_cycles() is already able to 
provide that ordering. So there are chances that code may rely on it.

While I don't necessarily agree to add barriers everywhere by default 
(this may have big impact on the platform). I think it is better to have 
an accurate number of cycles.

Cheers,

-- 
Julien Grall

RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
Posted by Wei Chen 3 years, 4 months ago
Hi Julien,

> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: 2020年11月26日 18:55
> To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini <sstabellini@kernel.org>
> Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org;
> Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis
> <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd
> <nd@arm.com>
> Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
> 
> Hi Wei,
> 
> Your e-mail font seems to be different to the usual plain text one. Are
> you sending the e-mail using HTML by any chance?
> 

It's strange, I always use the plain text.

> On 26/11/2020 02:07, Wei Chen wrote:
> > Hi Stefano,
> >
> >> -----Original Message-----
> >> From: Stefano Stabellini <sstabellini@kernel.org>
> >> Sent: 2020��11��26�� 8:00
> >> To: Wei Chen <Wei.Chen@arm.com>
> >> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>;
> xen-
> >> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara
> >> <Andre.Przywara@arm.com>; Bertrand Marquis
> <Bertrand.Marquis@arm.com>;
> >> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com>
> >> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
> >>
> >> Resuming this old thread.
> >>
> >> On Fri, 13 Nov 2020, Wei Chen wrote:
> >>>> Hi,
> >>>>
> >>>> On 09/11/2020 08:21, Penny Zheng wrote:
> >>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
> >>>>> might return a wrong value when the counter crosses a 32bit boundary.
> >>>>>
> >>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0,
> >>>>> and it also should be the Guest OS's responsibility to deal with
> >>>>> this part.
> >>>>>
> >>>>> But for CNTPCT, there exists several cases in Xen involving reading
> >>>>> CNTPCT, so a possible workaround is that performing the read twice,
> >>>>> and to return one or the other depending on whether a transition has
> >>>>> taken place.
> >>>>>
> >>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
> >>>>
> >>>> Acked-by: Julien Grall <jgrall@amazon.com>
> >>>>
> >>>> On a related topic, do we need a fix similar to Linux commit
> >>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur
> >>>> with seqlock held"?
> >>>>
> >>>
> >>> I take a look at this Linux commit, it seems to prevent the seq-lock to be
> >>> speculated.  Using an enforce ordering instead of ISB after the read counter
> >>> operation seems to be for performance reasons.
> >>>
> >>> I have found that you had placed an ISB before read counter in get_cycles
> >>> to prevent counter value to be speculated. But you haven't placed the
> second
> >>> ISB after reading. Is it because we haven't used the get_cycles in seq lock
> >>> critical context (Maybe I didn't find the right place)? So should we need to
> fix it
> >>> now, or you prefer to fix it now for future usage?
> >>
> >> Looking at the call sites, it doesn't look like we need any ISB after
> >> get_cycles as it is not used in any critical context. There is also a
> >> data dependency with the value returned by it.
> 
> I am assuming you looked at all the users of NOW(). Is that right?
> 
> >>
> >> So I am thinking we don't need any fix. At most we need an in-code comment?
> >
> > I agree with you to add an in-code comment. It will remind us in future when
> we
> > use the get_cycles in critical context. Adding it now will probably only lead to
> > meaningless performance degradation.
> 
> I read this as there would be no perfomance impact if we add the
> ordering it now. Did you intend to say?

Sorry about my English. I intended to say "Adding it now may introduce some
performance cost. And this performance cost may be not worth. Because Xen
may never use it in a similar scenario "

> 
> > Because Xen may never use it in a similar
> > scenario.
> 
> Right, there are two potentials approach here:
>    - Wait until there are a user
>        * Pros: Doesn't impact performance
>        * Cons: We rely on users/reviewers to catch any misuse
>    - Harden the code
>        * Pros: Less risk to introduce a bug inadvertently
>        * Cons: May impact the performance
> 
> In general, I prefer that the code is hardened by default if the
> performance impact is limited. This may save us hours of
> debugging/reproducing bug.
> 

From a preventive point of view, you're right.

> In addition, AFAICT, the x86 version of get_cycles() is already able to
> provide that ordering. So there are chances that code may rely on it.
> 
> While I don't necessarily agree to add barriers everywhere by default
> (this may have big impact on the platform). I think it is better to have
> an accurate number of cycles.
> 

As x86 had done it, I think it’s ok to do it for Arm. This will keep a function 
behaves the same on different architectures.

Thanks,
Wei Chen

> Cheers,
> 
> --
> Julien Grall
Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
Posted by Julien Grall 3 years, 4 months ago

On 26/11/2020 11:27, Wei Chen wrote:
> Hi Julien,

Hi Wei,

>> -----Original Message-----
>> From: Julien Grall <julien@xen.org>
>> Sent: 2020年11月26日 18:55
>> To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini <sstabellini@kernel.org>
>> Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org;
>> Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis
>> <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd
>> <nd@arm.com>
>> Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
>>
>> Hi Wei,
>>
>> Your e-mail font seems to be different to the usual plain text one. Are
>> you sending the e-mail using HTML by any chance?
>>
> 
> It's strange, I always use the plain text.

Maybe exchange decided to mangle the e-mail :). Anyway, this new message 
looks fine.

> 
>> On 26/11/2020 02:07, Wei Chen wrote:
>>> Hi Stefano,
>>>
>>>> -----Original Message-----
>>>> From: Stefano Stabellini <sstabellini@kernel.org>
>>>> Sent: 2020��11��26�� 8:00
>>>> To: Wei Chen <Wei.Chen@arm.com>
>>>> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>;
>> xen-
>>>> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara
>>>> <Andre.Przywara@arm.com>; Bertrand Marquis
>> <Bertrand.Marquis@arm.com>;
>>>> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com>
>>>> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
>>>>
>>>> Resuming this old thread.
>>>>
>>>> On Fri, 13 Nov 2020, Wei Chen wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On 09/11/2020 08:21, Penny Zheng wrote:
>>>>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
>>>>>>> might return a wrong value when the counter crosses a 32bit boundary.
>>>>>>>
>>>>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0,
>>>>>>> and it also should be the Guest OS's responsibility to deal with
>>>>>>> this part.
>>>>>>>
>>>>>>> But for CNTPCT, there exists several cases in Xen involving reading
>>>>>>> CNTPCT, so a possible workaround is that performing the read twice,
>>>>>>> and to return one or the other depending on whether a transition has
>>>>>>> taken place.
>>>>>>>
>>>>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
>>>>>>
>>>>>> Acked-by: Julien Grall <jgrall@amazon.com>
>>>>>>
>>>>>> On a related topic, do we need a fix similar to Linux commit
>>>>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur
>>>>>> with seqlock held"?
>>>>>>
>>>>>
>>>>> I take a look at this Linux commit, it seems to prevent the seq-lock to be
>>>>> speculated.  Using an enforce ordering instead of ISB after the read counter
>>>>> operation seems to be for performance reasons.
>>>>>
>>>>> I have found that you had placed an ISB before read counter in get_cycles
>>>>> to prevent counter value to be speculated. But you haven't placed the
>> second
>>>>> ISB after reading. Is it because we haven't used the get_cycles in seq lock
>>>>> critical context (Maybe I didn't find the right place)? So should we need to
>> fix it
>>>>> now, or you prefer to fix it now for future usage?
>>>>
>>>> Looking at the call sites, it doesn't look like we need any ISB after
>>>> get_cycles as it is not used in any critical context. There is also a
>>>> data dependency with the value returned by it.
>>
>> I am assuming you looked at all the users of NOW(). Is that right?
>>
>>>>
>>>> So I am thinking we don't need any fix. At most we need an in-code comment?
>>>
>>> I agree with you to add an in-code comment. It will remind us in future when
>> we
>>> use the get_cycles in critical context. Adding it now will probably only lead to
>>> meaningless performance degradation.
>>
>> I read this as there would be no perfomance impact if we add the
>> ordering it now. Did you intend to say?
> 
> Sorry about my English. I intended to say "Adding it now may introduce some
> performance cost. And this performance cost may be not worth. Because Xen
> may never use it in a similar scenario "

Don't worry! I think the performance should not be noticeable if we use 
the same trick as Linux.

>> In addition, AFAICT, the x86 version of get_cycles() is already able to
>> provide that ordering. So there are chances that code may rely on it.
>>
>> While I don't necessarily agree to add barriers everywhere by default
>> (this may have big impact on the platform). I think it is better to have
>> an accurate number of cycles.
>>
> 
> As x86 had done it, I think it’s ok to do it for Arm. This will keep a function
> behaves the same on different architectures.

Just to be clear, I am not 100% sure this is what Intel is doing. 
Although this is my understanding of the comment in the code.

@Stefano, what do you think?

@Wei, assuming Stefano is happy with the proposal, would you be happy to 
send a patch for that?

Best regards,

-- 
Julien Grall

RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
Posted by Wei Chen 3 years, 4 months ago
Hi Julien,

> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: 2020年12月3日 2:11
> To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini <sstabellini@kernel.org>
> Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org;
> Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis
> <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd
> <nd@arm.com>
> Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
> 
> 
> 
> On 26/11/2020 11:27, Wei Chen wrote:
> > Hi Julien,
> 
> Hi Wei,
> 
> >> -----Original Message-----
> >> From: Julien Grall <julien@xen.org>
> >> Sent: 2020年11月26日 18:55
> >> To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini
> <sstabellini@kernel.org>
> >> Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org;
> >> Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis
> >> <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd
> >> <nd@arm.com>
> >> Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
> >>
> >> Hi Wei,
> >>
> >> Your e-mail font seems to be different to the usual plain text one. Are
> >> you sending the e-mail using HTML by any chance?
> >>
> >
> > It's strange, I always use the plain text.
> 
> Maybe exchange decided to mangle the e-mail :). Anyway, this new message
> looks fine.
> 
> >
> >> On 26/11/2020 02:07, Wei Chen wrote:
> >>> Hi Stefano,
> >>>
> >>>> -----Original Message-----
> >>>> From: Stefano Stabellini <sstabellini@kernel.org>
> >>>> Sent: 2020��11��26�� 8:00
> >>>> To: Wei Chen <Wei.Chen@arm.com>
> >>>> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>;
> >> xen-
> >>>> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara
> >>>> <Andre.Przywara@arm.com>; Bertrand Marquis
> >> <Bertrand.Marquis@arm.com>;
> >>>> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com>
> >>>> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921
> workaround
> >>>>
> >>>> Resuming this old thread.
> >>>>
> >>>> On Fri, 13 Nov 2020, Wei Chen wrote:
> >>>>>> Hi,
> >>>>>>
> >>>>>> On 09/11/2020 08:21, Penny Zheng wrote:
> >>>>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
> >>>>>>> might return a wrong value when the counter crosses a 32bit boundary.
> >>>>>>>
> >>>>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0,
> >>>>>>> and it also should be the Guest OS's responsibility to deal with
> >>>>>>> this part.
> >>>>>>>
> >>>>>>> But for CNTPCT, there exists several cases in Xen involving reading
> >>>>>>> CNTPCT, so a possible workaround is that performing the read twice,
> >>>>>>> and to return one or the other depending on whether a transition has
> >>>>>>> taken place.
> >>>>>>>
> >>>>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
> >>>>>>
> >>>>>> Acked-by: Julien Grall <jgrall@amazon.com>
> >>>>>>
> >>>>>> On a related topic, do we need a fix similar to Linux commit
> >>>>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur
> >>>>>> with seqlock held"?
> >>>>>>
> >>>>>
> >>>>> I take a look at this Linux commit, it seems to prevent the seq-lock to be
> >>>>> speculated.  Using an enforce ordering instead of ISB after the read
> counter
> >>>>> operation seems to be for performance reasons.
> >>>>>
> >>>>> I have found that you had placed an ISB before read counter in get_cycles
> >>>>> to prevent counter value to be speculated. But you haven't placed the
> >> second
> >>>>> ISB after reading. Is it because we haven't used the get_cycles in seq lock
> >>>>> critical context (Maybe I didn't find the right place)? So should we need to
> >> fix it
> >>>>> now, or you prefer to fix it now for future usage?
> >>>>
> >>>> Looking at the call sites, it doesn't look like we need any ISB after
> >>>> get_cycles as it is not used in any critical context. There is also a
> >>>> data dependency with the value returned by it.
> >>
> >> I am assuming you looked at all the users of NOW(). Is that right?
> >>
> >>>>
> >>>> So I am thinking we don't need any fix. At most we need an in-code
> comment?
> >>>
> >>> I agree with you to add an in-code comment. It will remind us in future
> when
> >> we
> >>> use the get_cycles in critical context. Adding it now will probably only lead
> to
> >>> meaningless performance degradation.
> >>
> >> I read this as there would be no perfomance impact if we add the
> >> ordering it now. Did you intend to say?
> >
> > Sorry about my English. I intended to say "Adding it now may introduce some
> > performance cost. And this performance cost may be not worth. Because Xen
> > may never use it in a similar scenario "
> 
> Don't worry! I think the performance should not be noticeable if we use
> the same trick as Linux.
> 
> >> In addition, AFAICT, the x86 version of get_cycles() is already able to
> >> provide that ordering. So there are chances that code may rely on it.
> >>
> >> While I don't necessarily agree to add barriers everywhere by default
> >> (this may have big impact on the platform). I think it is better to have
> >> an accurate number of cycles.
> >>
> >
> > As x86 had done it, I think it’s ok to do it for Arm. This will keep a function
> > behaves the same on different architectures.
> 
> Just to be clear, I am not 100% sure this is what Intel is doing.
> Although this is my understanding of the comment in the code.
> 
> @Stefano, what do you think?
> 
> @Wei, assuming Stefano is happy with the proposal, would you be happy to
> send a patch for that?
> 

Of  course, I am willing to do that. It seems the enforce_ordering patch has been
merged. And Vincenzo reported the enforce_ordering method will have ~4.5
performance improvement[1] (Compare to ISB). So I will use enforce_ordering
method directly instead of using ISB.

[1]https://lkml.org/lkml/2020/3/13/645

> Best regards,
> 
> --
> Julien Grall
RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
Posted by Stefano Stabellini 3 years, 4 months ago
On Thu, 3 Dec 2020, Wei Chen wrote:
> Hi Julien,
> 
> > -----Original Message-----
> > From: Julien Grall <julien@xen.org>
> > Sent: 2020年12月3日 2:11
> > To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini <sstabellini@kernel.org>
> > Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org;
> > Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis
> > <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd
> > <nd@arm.com>
> > Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
> > 
> > 
> > 
> > On 26/11/2020 11:27, Wei Chen wrote:
> > > Hi Julien,
> > 
> > Hi Wei,
> > 
> > >> -----Original Message-----
> > >> From: Julien Grall <julien@xen.org>
> > >> Sent: 2020年11月26日 18:55
> > >> To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini
> > <sstabellini@kernel.org>
> > >> Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org;
> > >> Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis
> > >> <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd
> > >> <nd@arm.com>
> > >> Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
> > >>
> > >> Hi Wei,
> > >>
> > >> Your e-mail font seems to be different to the usual plain text one. Are
> > >> you sending the e-mail using HTML by any chance?
> > >>
> > >
> > > It's strange, I always use the plain text.
> > 
> > Maybe exchange decided to mangle the e-mail :). Anyway, this new message
> > looks fine.
> > 
> > >
> > >> On 26/11/2020 02:07, Wei Chen wrote:
> > >>> Hi Stefano,
> > >>>
> > >>>> -----Original Message-----
> > >>>> From: Stefano Stabellini <sstabellini@kernel.org>
> > >>>> Sent: 2020??????11??????26?????? 8:00
> > >>>> To: Wei Chen <Wei.Chen@arm.com>
> > >>>> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>;
> > >> xen-
> > >>>> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara
> > >>>> <Andre.Przywara@arm.com>; Bertrand Marquis
> > >> <Bertrand.Marquis@arm.com>;
> > >>>> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com>
> > >>>> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921
> > workaround
> > >>>>
> > >>>> Resuming this old thread.
> > >>>>
> > >>>> On Fri, 13 Nov 2020, Wei Chen wrote:
> > >>>>>> Hi,
> > >>>>>>
> > >>>>>> On 09/11/2020 08:21, Penny Zheng wrote:
> > >>>>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
> > >>>>>>> might return a wrong value when the counter crosses a 32bit boundary.
> > >>>>>>>
> > >>>>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0,
> > >>>>>>> and it also should be the Guest OS's responsibility to deal with
> > >>>>>>> this part.
> > >>>>>>>
> > >>>>>>> But for CNTPCT, there exists several cases in Xen involving reading
> > >>>>>>> CNTPCT, so a possible workaround is that performing the read twice,
> > >>>>>>> and to return one or the other depending on whether a transition has
> > >>>>>>> taken place.
> > >>>>>>>
> > >>>>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
> > >>>>>>
> > >>>>>> Acked-by: Julien Grall <jgrall@amazon.com>
> > >>>>>>
> > >>>>>> On a related topic, do we need a fix similar to Linux commit
> > >>>>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur
> > >>>>>> with seqlock held"?
> > >>>>>>
> > >>>>>
> > >>>>> I take a look at this Linux commit, it seems to prevent the seq-lock to be
> > >>>>> speculated.  Using an enforce ordering instead of ISB after the read
> > counter
> > >>>>> operation seems to be for performance reasons.
> > >>>>>
> > >>>>> I have found that you had placed an ISB before read counter in get_cycles
> > >>>>> to prevent counter value to be speculated. But you haven't placed the
> > >> second
> > >>>>> ISB after reading. Is it because we haven't used the get_cycles in seq lock
> > >>>>> critical context (Maybe I didn't find the right place)? So should we need to
> > >> fix it
> > >>>>> now, or you prefer to fix it now for future usage?
> > >>>>
> > >>>> Looking at the call sites, it doesn't look like we need any ISB after
> > >>>> get_cycles as it is not used in any critical context. There is also a
> > >>>> data dependency with the value returned by it.
> > >>
> > >> I am assuming you looked at all the users of NOW(). Is that right?
> > >>
> > >>>>
> > >>>> So I am thinking we don't need any fix. At most we need an in-code
> > comment?
> > >>>
> > >>> I agree with you to add an in-code comment. It will remind us in future
> > when
> > >> we
> > >>> use the get_cycles in critical context. Adding it now will probably only lead
> > to
> > >>> meaningless performance degradation.
> > >>
> > >> I read this as there would be no perfomance impact if we add the
> > >> ordering it now. Did you intend to say?
> > >
> > > Sorry about my English. I intended to say "Adding it now may introduce some
> > > performance cost. And this performance cost may be not worth. Because Xen
> > > may never use it in a similar scenario "
> > 
> > Don't worry! I think the performance should not be noticeable if we use
> > the same trick as Linux.
> > 
> > >> In addition, AFAICT, the x86 version of get_cycles() is already able to
> > >> provide that ordering. So there are chances that code may rely on it.
> > >>
> > >> While I don't necessarily agree to add barriers everywhere by default
> > >> (this may have big impact on the platform). I think it is better to have
> > >> an accurate number of cycles.
> > >>
> > >
> > > As x86 had done it, I think it’s ok to do it for Arm. This will keep a function
> > > behaves the same on different architectures.
> > 
> > Just to be clear, I am not 100% sure this is what Intel is doing.
> > Although this is my understanding of the comment in the code.
> > 
> > @Stefano, what do you think?
> > 
> > @Wei, assuming Stefano is happy with the proposal, would you be happy to
> > send a patch for that?
> > 
> 
> Of  course, I am willing to do that. It seems the enforce_ordering patch has been
> merged. And Vincenzo reported the enforce_ordering method will have ~4.5
> performance improvement[1] (Compare to ISB). So I will use enforce_ordering
> method directly instead of using ISB.
> 
> [1]https://lkml.org/lkml/2020/3/13/645

If we can enforce ordering without adding an ISB, I am all for it.
RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
Posted by Wei Chen 3 years, 5 months ago
Hi Penny,

> -----Original Message-----
> From: Penny Zheng <penny.zheng@arm.com>
> Sent: 2020年11月9日 16:21
> To: xen-devel@lists.xenproject.org; sstabellini@kernel.org; julien@xen.org
> Cc: Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis
> <Bertrand.Marquis@arm.com>; Wei Chen <Wei.Chen@arm.com>; Penny Zheng
> <Penny.Zheng@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com>
> Subject: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
> 
> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
> might return a wrong value when the counter crosses a 32bit boundary.
> 
> Until now, there is no case for Xen itself to access CNTVCT_EL0,
> and it also should be the Guest OS's responsibility to deal with
> this part.
> 
> But for CNTPCT, there exists several cases in Xen involving reading
> CNTPCT, so a possible workaround is that performing the read twice,
> and to return one or the other depending on whether a transition has
> taken place.
> 
> Signed-off-by: Penny Zheng <penny.zheng@arm.com>

Reviewed-by: Wei Chen <Wei.Chen@arm.com>

Thanks,
Wei Chen

> ---
>  docs/misc/arm/silicon-errata.txt |  1 +
>  xen/arch/arm/Kconfig             | 18 ++++++++++++++++++
>  xen/arch/arm/cpuerrata.c         |  8 ++++++++
>  xen/arch/arm/vtimer.c            |  2 +-
>  xen/include/asm-arm/cpuerrata.h  |  2 ++
>  xen/include/asm-arm/cpufeature.h |  3 ++-
>  xen/include/asm-arm/time.h       | 22 +++++++++++++++++++++-
>  7 files changed, 53 insertions(+), 3 deletions(-)
> 
> diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
> index 1f18a9df58..552c4151d3 100644
> --- a/docs/misc/arm/silicon-errata.txt
> +++ b/docs/misc/arm/silicon-errata.txt
> @@ -51,6 +51,7 @@ stable hypervisors.
>  | ARM            | Cortex-A57      | #1319537        | N/A                     |
>  | ARM            | Cortex-A72      | #1319367        | N/A                     |
>  | ARM            | Cortex-A72      | #853709         | N/A                     |
> +| ARM            | Cortex-A73      | #858921         | ARM_ERRATUM_858921      |
>  | ARM            | Cortex-A76      | #1165522        | N/A                     |
>  | ARM            | Neoverse-N1     | #1165522        | N/A
>  | ARM            | MMU-500         | #842869         | N/A                     |
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index 2777388265..f938dd21bd 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -226,6 +226,24 @@ config ARM64_ERRATUM_834220
> 
>  	  If unsure, say Y.
> 
> +config ARM_ERRATUM_858921
> +	bool "Cortex-A73: 858921: Possible wrong read value for CNTVCT or
> CNTPCT"
> +	default y
> +	help
> +	  This option adds an alternative code sequence to work around ARM
> +	  erratum 858921 on Cortex-A73 (all versions).
> +
> +	  Affected Cortex-A73 might return wrong read value for CNTVCT or
> CNTPCT
> +	  when the counter crosses a 32bit boundary.
> +
> +	  The workaround involves performing the read twice, and to return
> +	  one or the other value depending on whether a transition has taken
> place.
> +	  Please note that this does not necessarily enable the workaround,
> +	  as it depends on the alternative framework, which will only patch
> +	  the kernel if an affected CPU is detected.
> +
> +	  If unsure, say Y.
> +
>  endmenu
> 
>  config ARM64_HARDEN_BRANCH_PREDICTOR
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index 6731d873e8..567911d380 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -469,6 +469,14 @@ static const struct arm_cpu_capabilities arm_errata[] =
> {
>          .capability = ARM_SSBD,
>          .matches = has_ssbd_mitigation,
>      },
> +#endif
> +#ifdef CONFIG_ARM_ERRATUM_858921
> +    {
> +        /* Cortex-A73 (all versions) */
> +        .desc = "ARM erratum 858921",
> +        .capability = ARM_WORKAROUND_858921,
> +        MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
> +    },
>  #endif
>      {
>          /* Neoverse r0p0 - r2p0 */
> diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
> index 6d39fc944f..c2b27915c6 100644
> --- a/xen/arch/arm/vtimer.c
> +++ b/xen/arch/arm/vtimer.c
> @@ -62,7 +62,7 @@ static void virt_timer_expired(void *data)
> 
>  int domain_vtimer_init(struct domain *d, struct xen_arch_domainconfig
> *config)
>  {
> -    d->arch.virt_timer_base.offset = READ_SYSREG64(CNTPCT_EL0);
> +    d->arch.virt_timer_base.offset = get_cycles();
>      d->time_offset.seconds = ticks_to_ns(d->arch.virt_timer_base.offset -
> boot_count);
>      do_div(d->time_offset.seconds, 1000000000);
> 
> diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-
> arm/cpuerrata.h
> index 88ef3ca934..8d7e7b9375 100644
> --- a/xen/include/asm-arm/cpuerrata.h
> +++ b/xen/include/asm-arm/cpuerrata.h
> @@ -28,6 +28,8 @@ static inline bool check_workaround_##erratum(void)
> \
>  CHECK_WORKAROUND_HELPER(766422, ARM32_WORKAROUND_766422,
> CONFIG_ARM_32)
>  CHECK_WORKAROUND_HELPER(834220, ARM64_WORKAROUND_834220,
> CONFIG_ARM_64)
>  CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD, CONFIG_ARM_SSBD)
> +CHECK_WORKAROUND_HELPER(858921, ARM_WORKAROUND_858921,
> +                        CONFIG_ARM_ERRATUM_858921)
> 
>  #undef CHECK_WORKAROUND_HELPER
> 
> diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-
> arm/cpufeature.h
> index 10878ead8a..016a9fe203 100644
> --- a/xen/include/asm-arm/cpufeature.h
> +++ b/xen/include/asm-arm/cpufeature.h
> @@ -45,8 +45,9 @@
>  #define ARM_SSBD 7
>  #define ARM_SMCCC_1_1 8
>  #define ARM64_WORKAROUND_AT_SPECULATE 9
> +#define ARM_WORKAROUND_858921 10
> 
> -#define ARM_NCAPS           10
> +#define ARM_NCAPS           11
> 
>  #ifndef __ASSEMBLY__
> 
> diff --git a/xen/include/asm-arm/time.h b/xen/include/asm-arm/time.h
> index 9cb6f9b0b4..1b2c13614b 100644
> --- a/xen/include/asm-arm/time.h
> +++ b/xen/include/asm-arm/time.h
> @@ -3,6 +3,7 @@
> 
>  #include <asm/sysregs.h>
>  #include <asm/system.h>
> +#include <asm/cpuerrata.h>
> 
>  #define DT_MATCH_TIMER                      \
>      DT_MATCH_COMPATIBLE("arm,armv7-timer"), \
> @@ -13,7 +14,26 @@ typedef uint64_t cycles_t;
>  static inline cycles_t get_cycles (void)
>  {
>          isb();
> -        return READ_SYSREG64(CNTPCT_EL0);
> +        /*
> +         * ARM_WORKAROUND_858921: Cortex-A73 (all versions) counter read
> +         * can return a wrong value when the counter crosses a 32bit boundary.
> +         */
> +        if ( !check_workaround_858921() )
> +            return READ_SYSREG64(CNTPCT_EL0);
> +        else
> +        {
> +            /*
> +             * A recommended workaround for erratum 858921 is to:
> +             *  1- Read twice CNTPCT.
> +             *  2- Compare bit[32] of the two read values.
> +             *      - If bit[32] is different, keep the old value.
> +             *      - If bit[32] is the same, keep the new value.
> +             */
> +            cycles_t old, new;
> +            old = READ_SYSREG64(CNTPCT_EL0);
> +            new = READ_SYSREG64(CNTPCT_EL0);
> +            return (((old ^ new) >> 32) & 1) ? old : new;
> +        }
>  }
> 
>  /* List of timer's IRQ */
> --
> 2.25.1