docs/misc/arm/silicon-errata.txt | 1 + xen/arch/arm/Kconfig | 18 ++++++++++++++++++ xen/arch/arm/cpuerrata.c | 8 ++++++++ xen/arch/arm/vtimer.c | 2 +- xen/include/asm-arm/cpuerrata.h | 2 ++ xen/include/asm-arm/cpufeature.h | 3 ++- xen/include/asm-arm/time.h | 22 +++++++++++++++++++++- 7 files changed, 53 insertions(+), 3 deletions(-)
CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
might return a wrong value when the counter crosses a 32bit boundary.
Until now, there is no case for Xen itself to access CNTVCT_EL0,
and it also should be the Guest OS's responsibility to deal with
this part.
But for CNTPCT, there exists several cases in Xen involving reading
CNTPCT, so a possible workaround is that performing the read twice,
and to return one or the other depending on whether a transition has
taken place.
Signed-off-by: Penny Zheng <penny.zheng@arm.com>
---
docs/misc/arm/silicon-errata.txt | 1 +
xen/arch/arm/Kconfig | 18 ++++++++++++++++++
xen/arch/arm/cpuerrata.c | 8 ++++++++
xen/arch/arm/vtimer.c | 2 +-
xen/include/asm-arm/cpuerrata.h | 2 ++
xen/include/asm-arm/cpufeature.h | 3 ++-
xen/include/asm-arm/time.h | 22 +++++++++++++++++++++-
7 files changed, 53 insertions(+), 3 deletions(-)
diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
index 1f18a9df58..552c4151d3 100644
--- a/docs/misc/arm/silicon-errata.txt
+++ b/docs/misc/arm/silicon-errata.txt
@@ -51,6 +51,7 @@ stable hypervisors.
| ARM | Cortex-A57 | #1319537 | N/A |
| ARM | Cortex-A72 | #1319367 | N/A |
| ARM | Cortex-A72 | #853709 | N/A |
+| ARM | Cortex-A73 | #858921 | ARM_ERRATUM_858921 |
| ARM | Cortex-A76 | #1165522 | N/A |
| ARM | Neoverse-N1 | #1165522 | N/A
| ARM | MMU-500 | #842869 | N/A |
diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index 2777388265..f938dd21bd 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -226,6 +226,24 @@ config ARM64_ERRATUM_834220
If unsure, say Y.
+config ARM_ERRATUM_858921
+ bool "Cortex-A73: 858921: Possible wrong read value for CNTVCT or CNTPCT"
+ default y
+ help
+ This option adds an alternative code sequence to work around ARM
+ erratum 858921 on Cortex-A73 (all versions).
+
+ Affected Cortex-A73 might return wrong read value for CNTVCT or CNTPCT
+ when the counter crosses a 32bit boundary.
+
+ The workaround involves performing the read twice, and to return
+ one or the other value depending on whether a transition has taken place.
+ Please note that this does not necessarily enable the workaround,
+ as it depends on the alternative framework, which will only patch
+ the kernel if an affected CPU is detected.
+
+ If unsure, say Y.
+
endmenu
config ARM64_HARDEN_BRANCH_PREDICTOR
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index 6731d873e8..567911d380 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -469,6 +469,14 @@ static const struct arm_cpu_capabilities arm_errata[] = {
.capability = ARM_SSBD,
.matches = has_ssbd_mitigation,
},
+#endif
+#ifdef CONFIG_ARM_ERRATUM_858921
+ {
+ /* Cortex-A73 (all versions) */
+ .desc = "ARM erratum 858921",
+ .capability = ARM_WORKAROUND_858921,
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
+ },
#endif
{
/* Neoverse r0p0 - r2p0 */
diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
index 6d39fc944f..c2b27915c6 100644
--- a/xen/arch/arm/vtimer.c
+++ b/xen/arch/arm/vtimer.c
@@ -62,7 +62,7 @@ static void virt_timer_expired(void *data)
int domain_vtimer_init(struct domain *d, struct xen_arch_domainconfig *config)
{
- d->arch.virt_timer_base.offset = READ_SYSREG64(CNTPCT_EL0);
+ d->arch.virt_timer_base.offset = get_cycles();
d->time_offset.seconds = ticks_to_ns(d->arch.virt_timer_base.offset - boot_count);
do_div(d->time_offset.seconds, 1000000000);
diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h
index 88ef3ca934..8d7e7b9375 100644
--- a/xen/include/asm-arm/cpuerrata.h
+++ b/xen/include/asm-arm/cpuerrata.h
@@ -28,6 +28,8 @@ static inline bool check_workaround_##erratum(void) \
CHECK_WORKAROUND_HELPER(766422, ARM32_WORKAROUND_766422, CONFIG_ARM_32)
CHECK_WORKAROUND_HELPER(834220, ARM64_WORKAROUND_834220, CONFIG_ARM_64)
CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD, CONFIG_ARM_SSBD)
+CHECK_WORKAROUND_HELPER(858921, ARM_WORKAROUND_858921,
+ CONFIG_ARM_ERRATUM_858921)
#undef CHECK_WORKAROUND_HELPER
diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
index 10878ead8a..016a9fe203 100644
--- a/xen/include/asm-arm/cpufeature.h
+++ b/xen/include/asm-arm/cpufeature.h
@@ -45,8 +45,9 @@
#define ARM_SSBD 7
#define ARM_SMCCC_1_1 8
#define ARM64_WORKAROUND_AT_SPECULATE 9
+#define ARM_WORKAROUND_858921 10
-#define ARM_NCAPS 10
+#define ARM_NCAPS 11
#ifndef __ASSEMBLY__
diff --git a/xen/include/asm-arm/time.h b/xen/include/asm-arm/time.h
index 9cb6f9b0b4..1b2c13614b 100644
--- a/xen/include/asm-arm/time.h
+++ b/xen/include/asm-arm/time.h
@@ -3,6 +3,7 @@
#include <asm/sysregs.h>
#include <asm/system.h>
+#include <asm/cpuerrata.h>
#define DT_MATCH_TIMER \
DT_MATCH_COMPATIBLE("arm,armv7-timer"), \
@@ -13,7 +14,26 @@ typedef uint64_t cycles_t;
static inline cycles_t get_cycles (void)
{
isb();
- return READ_SYSREG64(CNTPCT_EL0);
+ /*
+ * ARM_WORKAROUND_858921: Cortex-A73 (all versions) counter read
+ * can return a wrong value when the counter crosses a 32bit boundary.
+ */
+ if ( !check_workaround_858921() )
+ return READ_SYSREG64(CNTPCT_EL0);
+ else
+ {
+ /*
+ * A recommended workaround for erratum 858921 is to:
+ * 1- Read twice CNTPCT.
+ * 2- Compare bit[32] of the two read values.
+ * - If bit[32] is different, keep the old value.
+ * - If bit[32] is the same, keep the new value.
+ */
+ cycles_t old, new;
+ old = READ_SYSREG64(CNTPCT_EL0);
+ new = READ_SYSREG64(CNTPCT_EL0);
+ return (((old ^ new) >> 32) & 1) ? old : new;
+ }
}
/* List of timer's IRQ */
--
2.25.1
Hi,
> On 9 Nov 2020, at 08:21, Penny Zheng <Penny.Zheng@arm.com> wrote:
>
> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
> might return a wrong value when the counter crosses a 32bit boundary.
>
> Until now, there is no case for Xen itself to access CNTVCT_EL0,
> and it also should be the Guest OS's responsibility to deal with
> this part.
>
> But for CNTPCT, there exists several cases in Xen involving reading
> CNTPCT, so a possible workaround is that performing the read twice,
> and to return one or the other depending on whether a transition has
> taken place.
>
> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com>
Regards
Bertrand
> ---
> docs/misc/arm/silicon-errata.txt | 1 +
> xen/arch/arm/Kconfig | 18 ++++++++++++++++++
> xen/arch/arm/cpuerrata.c | 8 ++++++++
> xen/arch/arm/vtimer.c | 2 +-
> xen/include/asm-arm/cpuerrata.h | 2 ++
> xen/include/asm-arm/cpufeature.h | 3 ++-
> xen/include/asm-arm/time.h | 22 +++++++++++++++++++++-
> 7 files changed, 53 insertions(+), 3 deletions(-)
>
> diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
> index 1f18a9df58..552c4151d3 100644
> --- a/docs/misc/arm/silicon-errata.txt
> +++ b/docs/misc/arm/silicon-errata.txt
> @@ -51,6 +51,7 @@ stable hypervisors.
> | ARM | Cortex-A57 | #1319537 | N/A |
> | ARM | Cortex-A72 | #1319367 | N/A |
> | ARM | Cortex-A72 | #853709 | N/A |
> +| ARM | Cortex-A73 | #858921 | ARM_ERRATUM_858921 |
> | ARM | Cortex-A76 | #1165522 | N/A |
> | ARM | Neoverse-N1 | #1165522 | N/A
> | ARM | MMU-500 | #842869 | N/A |
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index 2777388265..f938dd21bd 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -226,6 +226,24 @@ config ARM64_ERRATUM_834220
>
> If unsure, say Y.
>
> +config ARM_ERRATUM_858921
> + bool "Cortex-A73: 858921: Possible wrong read value for CNTVCT or CNTPCT"
> + default y
> + help
> + This option adds an alternative code sequence to work around ARM
> + erratum 858921 on Cortex-A73 (all versions).
> +
> + Affected Cortex-A73 might return wrong read value for CNTVCT or CNTPCT
> + when the counter crosses a 32bit boundary.
> +
> + The workaround involves performing the read twice, and to return
> + one or the other value depending on whether a transition has taken place.
> + Please note that this does not necessarily enable the workaround,
> + as it depends on the alternative framework, which will only patch
> + the kernel if an affected CPU is detected.
> +
> + If unsure, say Y.
> +
> endmenu
>
> config ARM64_HARDEN_BRANCH_PREDICTOR
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index 6731d873e8..567911d380 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -469,6 +469,14 @@ static const struct arm_cpu_capabilities arm_errata[] = {
> .capability = ARM_SSBD,
> .matches = has_ssbd_mitigation,
> },
> +#endif
> +#ifdef CONFIG_ARM_ERRATUM_858921
> + {
> + /* Cortex-A73 (all versions) */
> + .desc = "ARM erratum 858921",
> + .capability = ARM_WORKAROUND_858921,
> + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
> + },
> #endif
> {
> /* Neoverse r0p0 - r2p0 */
> diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
> index 6d39fc944f..c2b27915c6 100644
> --- a/xen/arch/arm/vtimer.c
> +++ b/xen/arch/arm/vtimer.c
> @@ -62,7 +62,7 @@ static void virt_timer_expired(void *data)
>
> int domain_vtimer_init(struct domain *d, struct xen_arch_domainconfig *config)
> {
> - d->arch.virt_timer_base.offset = READ_SYSREG64(CNTPCT_EL0);
> + d->arch.virt_timer_base.offset = get_cycles();
> d->time_offset.seconds = ticks_to_ns(d->arch.virt_timer_base.offset - boot_count);
> do_div(d->time_offset.seconds, 1000000000);
>
> diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h
> index 88ef3ca934..8d7e7b9375 100644
> --- a/xen/include/asm-arm/cpuerrata.h
> +++ b/xen/include/asm-arm/cpuerrata.h
> @@ -28,6 +28,8 @@ static inline bool check_workaround_##erratum(void) \
> CHECK_WORKAROUND_HELPER(766422, ARM32_WORKAROUND_766422, CONFIG_ARM_32)
> CHECK_WORKAROUND_HELPER(834220, ARM64_WORKAROUND_834220, CONFIG_ARM_64)
> CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD, CONFIG_ARM_SSBD)
> +CHECK_WORKAROUND_HELPER(858921, ARM_WORKAROUND_858921,
> + CONFIG_ARM_ERRATUM_858921)
>
> #undef CHECK_WORKAROUND_HELPER
>
> diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
> index 10878ead8a..016a9fe203 100644
> --- a/xen/include/asm-arm/cpufeature.h
> +++ b/xen/include/asm-arm/cpufeature.h
> @@ -45,8 +45,9 @@
> #define ARM_SSBD 7
> #define ARM_SMCCC_1_1 8
> #define ARM64_WORKAROUND_AT_SPECULATE 9
> +#define ARM_WORKAROUND_858921 10
>
> -#define ARM_NCAPS 10
> +#define ARM_NCAPS 11
>
> #ifndef __ASSEMBLY__
>
> diff --git a/xen/include/asm-arm/time.h b/xen/include/asm-arm/time.h
> index 9cb6f9b0b4..1b2c13614b 100644
> --- a/xen/include/asm-arm/time.h
> +++ b/xen/include/asm-arm/time.h
> @@ -3,6 +3,7 @@
>
> #include <asm/sysregs.h>
> #include <asm/system.h>
> +#include <asm/cpuerrata.h>
>
> #define DT_MATCH_TIMER \
> DT_MATCH_COMPATIBLE("arm,armv7-timer"), \
> @@ -13,7 +14,26 @@ typedef uint64_t cycles_t;
> static inline cycles_t get_cycles (void)
> {
> isb();
> - return READ_SYSREG64(CNTPCT_EL0);
> + /*
> + * ARM_WORKAROUND_858921: Cortex-A73 (all versions) counter read
> + * can return a wrong value when the counter crosses a 32bit boundary.
> + */
> + if ( !check_workaround_858921() )
> + return READ_SYSREG64(CNTPCT_EL0);
> + else
> + {
> + /*
> + * A recommended workaround for erratum 858921 is to:
> + * 1- Read twice CNTPCT.
> + * 2- Compare bit[32] of the two read values.
> + * - If bit[32] is different, keep the old value.
> + * - If bit[32] is the same, keep the new value.
> + */
> + cycles_t old, new;
> + old = READ_SYSREG64(CNTPCT_EL0);
> + new = READ_SYSREG64(CNTPCT_EL0);
> + return (((old ^ new) >> 32) & 1) ? old : new;
> + }
> }
>
> /* List of timer's IRQ */
> --
> 2.25.1
>
Hi, On 09/11/2020 08:21, Penny Zheng wrote: > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > might return a wrong value when the counter crosses a 32bit boundary. > > Until now, there is no case for Xen itself to access CNTVCT_EL0, > and it also should be the Guest OS's responsibility to deal with > this part. > > But for CNTPCT, there exists several cases in Xen involving reading > CNTPCT, so a possible workaround is that performing the read twice, > and to return one or the other depending on whether a transition has > taken place. > > Signed-off-by: Penny Zheng <penny.zheng@arm.com> Acked-by: Julien Grall <jgrall@amazon.com> On a related topic, do we need a fix similar to Linux commit 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur with seqlock held"? Cheers, -- Julien Grall
> -----Original Message----- > From: Julien Grall <julien@xen.org> > Sent: Monday, November 9, 2020 8:04 PM > To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > sstabellini@kernel.org > Cc: Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > <Bertrand.Marquis@arm.com>; Wei Chen <Wei.Chen@arm.com>; Kaly Xin > <Kaly.Xin@arm.com>; nd <nd@arm.com> > Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > Hi, > > On 09/11/2020 08:21, Penny Zheng wrote: > > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > > might return a wrong value when the counter crosses a 32bit boundary. > > > > Until now, there is no case for Xen itself to access CNTVCT_EL0, and > > it also should be the Guest OS's responsibility to deal with this > > part. > > > > But for CNTPCT, there exists several cases in Xen involving reading > > CNTPCT, so a possible workaround is that performing the read twice, > > and to return one or the other depending on whether a transition has > > taken place. > > > > Signed-off-by: Penny Zheng <penny.zheng@arm.com> > > Acked-by: Julien Grall <jgrall@amazon.com> > Thank you. 😉 > On a related topic, do we need a fix similar to Linux commit 75a19a0202db > "arm64: arch_timer: Ensure counter register reads occur with seqlock held"? > Sure, I'll check this commit and talk with my teams for further work. Cheers -- Penny Zheng > Cheers, > > -- > Julien Grall
HI Julien, > -----Original Message----- > From: Julien Grall <julien@xen.org> > Sent: 2020年11月9日 20:04 > To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > sstabellini@kernel.org > Cc: Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > <Bertrand.Marquis@arm.com>; Wei Chen <Wei.Chen@arm.com>; Kaly Xin > <Kaly.Xin@arm.com>; nd <nd@arm.com> > Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > Hi, > > On 09/11/2020 08:21, Penny Zheng wrote: > > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > > might return a wrong value when the counter crosses a 32bit boundary. > > > > Until now, there is no case for Xen itself to access CNTVCT_EL0, > > and it also should be the Guest OS's responsibility to deal with > > this part. > > > > But for CNTPCT, there exists several cases in Xen involving reading > > CNTPCT, so a possible workaround is that performing the read twice, > > and to return one or the other depending on whether a transition has > > taken place. > > > > Signed-off-by: Penny Zheng <penny.zheng@arm.com> > > Acked-by: Julien Grall <jgrall@amazon.com> > > On a related topic, do we need a fix similar to Linux commit > 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur > with seqlock held"? > I take a look at this Linux commit, it seems to prevent the seq-lock to be speculated. Using an enforce ordering instead of ISB after the read counter operation seems to be for performance reasons. I have found that you had placed an ISB before read counter in get_cycles to prevent counter value to be speculated. But you haven't placed the second ISB after reading. Is it because we haven't used the get_cycles in seq lock critical context (Maybe I didn't find the right place)? So should we need to fix it now, or you prefer to fix it now for future usage? Regards, Wei Chen > Cheers, > > -- > Julien Grall
Resuming this old thread. On Fri, 13 Nov 2020, Wei Chen wrote: > > Hi, > > > > On 09/11/2020 08:21, Penny Zheng wrote: > > > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > > > might return a wrong value when the counter crosses a 32bit boundary. > > > > > > Until now, there is no case for Xen itself to access CNTVCT_EL0, > > > and it also should be the Guest OS's responsibility to deal with > > > this part. > > > > > > But for CNTPCT, there exists several cases in Xen involving reading > > > CNTPCT, so a possible workaround is that performing the read twice, > > > and to return one or the other depending on whether a transition has > > > taken place. > > > > > > Signed-off-by: Penny Zheng <penny.zheng@arm.com> > > > > Acked-by: Julien Grall <jgrall@amazon.com> > > > > On a related topic, do we need a fix similar to Linux commit > > 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur > > with seqlock held"? > > > > I take a look at this Linux commit, it seems to prevent the seq-lock to be > speculated. Using an enforce ordering instead of ISB after the read counter > operation seems to be for performance reasons. > > I have found that you had placed an ISB before read counter in get_cycles > to prevent counter value to be speculated. But you haven't placed the second > ISB after reading. Is it because we haven't used the get_cycles in seq lock > critical context (Maybe I didn't find the right place)? So should we need to fix it > now, or you prefer to fix it now for future usage? Looking at the call sites, it doesn't look like we need any ISB after get_cycles as it is not used in any critical context. There is also a data dependency with the value returned by it. So I am thinking we don't need any fix. At most we need an in-code comment?
Hi Stefano, > -----Original Message----- > From: Stefano Stabellini <sstabellini@kernel.org> > Sent: 2020年11月26日 8:00 > To: Wei Chen <Wei.Chen@arm.com> > Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; xen- > devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara > <Andre.Przywara@arm.com>; Bertrand Marquis <Bertrand.Marquis@arm.com>; > Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com> > Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > Resuming this old thread. > > On Fri, 13 Nov 2020, Wei Chen wrote: > > > Hi, > > > > > > On 09/11/2020 08:21, Penny Zheng wrote: > > > > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > > > > might return a wrong value when the counter crosses a 32bit boundary. > > > > > > > > Until now, there is no case for Xen itself to access CNTVCT_EL0, > > > > and it also should be the Guest OS's responsibility to deal with > > > > this part. > > > > > > > > But for CNTPCT, there exists several cases in Xen involving reading > > > > CNTPCT, so a possible workaround is that performing the read twice, > > > > and to return one or the other depending on whether a transition has > > > > taken place. > > > > > > > > Signed-off-by: Penny Zheng <penny.zheng@arm.com> > > > > > > Acked-by: Julien Grall <jgrall@amazon.com> > > > > > > On a related topic, do we need a fix similar to Linux commit > > > 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur > > > with seqlock held"? > > > > > > > I take a look at this Linux commit, it seems to prevent the seq-lock to be > > speculated. Using an enforce ordering instead of ISB after the read counter > > operation seems to be for performance reasons. > > > > I have found that you had placed an ISB before read counter in get_cycles > > to prevent counter value to be speculated. But you haven't placed the second > > ISB after reading. Is it because we haven't used the get_cycles in seq lock > > critical context (Maybe I didn't find the right place)? So should we need to fix it > > now, or you prefer to fix it now for future usage? > > Looking at the call sites, it doesn't look like we need any ISB after > get_cycles as it is not used in any critical context. There is also a > data dependency with the value returned by it. > > So I am thinking we don't need any fix. At most we need an in-code comment? I agree with you to add an in-code comment. It will remind us in future when we use the get_cycles in critical context. Adding it now will probably only lead to meaningless performance degradation. Because Xen may never use it in a similar scenario. BTW: Can we send a patch that just contains a pure comment : ) Regards, Wei Chen
Hi Wei,
Your e-mail font seems to be different to the usual plain text one. Are
you sending the e-mail using HTML by any chance?
On 26/11/2020 02:07, Wei Chen wrote:
> Hi Stefano,
>
>> -----Original Message-----
>> From: Stefano Stabellini <sstabellini@kernel.org>
>> Sent: 2020年11月26日 8:00
>> To: Wei Chen <Wei.Chen@arm.com>
>> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; xen-
>> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara
>> <Andre.Przywara@arm.com>; Bertrand Marquis <Bertrand.Marquis@arm.com>;
>> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com>
>> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
>>
>> Resuming this old thread.
>>
>> On Fri, 13 Nov 2020, Wei Chen wrote:
>>>> Hi,
>>>>
>>>> On 09/11/2020 08:21, Penny Zheng wrote:
>>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
>>>>> might return a wrong value when the counter crosses a 32bit boundary.
>>>>>
>>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0,
>>>>> and it also should be the Guest OS's responsibility to deal with
>>>>> this part.
>>>>>
>>>>> But for CNTPCT, there exists several cases in Xen involving reading
>>>>> CNTPCT, so a possible workaround is that performing the read twice,
>>>>> and to return one or the other depending on whether a transition has
>>>>> taken place.
>>>>>
>>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
>>>>
>>>> Acked-by: Julien Grall <jgrall@amazon.com>
>>>>
>>>> On a related topic, do we need a fix similar to Linux commit
>>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur
>>>> with seqlock held"?
>>>>
>>>
>>> I take a look at this Linux commit, it seems to prevent the seq-lock to be
>>> speculated. Using an enforce ordering instead of ISB after the read counter
>>> operation seems to be for performance reasons.
>>>
>>> I have found that you had placed an ISB before read counter in get_cycles
>>> to prevent counter value to be speculated. But you haven't placed the second
>>> ISB after reading. Is it because we haven't used the get_cycles in seq lock
>>> critical context (Maybe I didn't find the right place)? So should we need to fix it
>>> now, or you prefer to fix it now for future usage?
>>
>> Looking at the call sites, it doesn't look like we need any ISB after
>> get_cycles as it is not used in any critical context. There is also a
>> data dependency with the value returned by it.
I am assuming you looked at all the users of NOW(). Is that right?
>>
>> So I am thinking we don't need any fix. At most we need an in-code comment?
>
> I agree with you to add an in-code comment. It will remind us in future when we
> use the get_cycles in critical context. Adding it now will probably only lead to
> meaningless performance degradation.
I read this as there would be no perfomance impact if we add the
ordering it now. Did you intend to say?
> Because Xen may never use it in a similar
> scenario.
Right, there are two potentials approach here:
- Wait until there are a user
* Pros: Doesn't impact performance
* Cons: We rely on users/reviewers to catch any misuse
- Harden the code
* Pros: Less risk to introduce a bug inadvertently
* Cons: May impact the performance
In general, I prefer that the code is hardened by default if the
performance impact is limited. This may save us hours of
debugging/reproducing bug.
In addition, AFAICT, the x86 version of get_cycles() is already able to
provide that ordering. So there are chances that code may rely on it.
While I don't necessarily agree to add barriers everywhere by default
(this may have big impact on the platform). I think it is better to have
an accurate number of cycles.
Cheers,
--
Julien Grall
Hi Julien, > -----Original Message----- > From: Julien Grall <julien@xen.org> > Sent: 2020年11月26日 18:55 > To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini <sstabellini@kernel.org> > Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd > <nd@arm.com> > Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > Hi Wei, > > Your e-mail font seems to be different to the usual plain text one. Are > you sending the e-mail using HTML by any chance? > It's strange, I always use the plain text. > On 26/11/2020 02:07, Wei Chen wrote: > > Hi Stefano, > > > >> -----Original Message----- > >> From: Stefano Stabellini <sstabellini@kernel.org> > >> Sent: 2020��11��26�� 8:00 > >> To: Wei Chen <Wei.Chen@arm.com> > >> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; > xen- > >> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara > >> <Andre.Przywara@arm.com>; Bertrand Marquis > <Bertrand.Marquis@arm.com>; > >> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com> > >> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > >> > >> Resuming this old thread. > >> > >> On Fri, 13 Nov 2020, Wei Chen wrote: > >>>> Hi, > >>>> > >>>> On 09/11/2020 08:21, Penny Zheng wrote: > >>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > >>>>> might return a wrong value when the counter crosses a 32bit boundary. > >>>>> > >>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0, > >>>>> and it also should be the Guest OS's responsibility to deal with > >>>>> this part. > >>>>> > >>>>> But for CNTPCT, there exists several cases in Xen involving reading > >>>>> CNTPCT, so a possible workaround is that performing the read twice, > >>>>> and to return one or the other depending on whether a transition has > >>>>> taken place. > >>>>> > >>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com> > >>>> > >>>> Acked-by: Julien Grall <jgrall@amazon.com> > >>>> > >>>> On a related topic, do we need a fix similar to Linux commit > >>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur > >>>> with seqlock held"? > >>>> > >>> > >>> I take a look at this Linux commit, it seems to prevent the seq-lock to be > >>> speculated. Using an enforce ordering instead of ISB after the read counter > >>> operation seems to be for performance reasons. > >>> > >>> I have found that you had placed an ISB before read counter in get_cycles > >>> to prevent counter value to be speculated. But you haven't placed the > second > >>> ISB after reading. Is it because we haven't used the get_cycles in seq lock > >>> critical context (Maybe I didn't find the right place)? So should we need to > fix it > >>> now, or you prefer to fix it now for future usage? > >> > >> Looking at the call sites, it doesn't look like we need any ISB after > >> get_cycles as it is not used in any critical context. There is also a > >> data dependency with the value returned by it. > > I am assuming you looked at all the users of NOW(). Is that right? > > >> > >> So I am thinking we don't need any fix. At most we need an in-code comment? > > > > I agree with you to add an in-code comment. It will remind us in future when > we > > use the get_cycles in critical context. Adding it now will probably only lead to > > meaningless performance degradation. > > I read this as there would be no perfomance impact if we add the > ordering it now. Did you intend to say? Sorry about my English. I intended to say "Adding it now may introduce some performance cost. And this performance cost may be not worth. Because Xen may never use it in a similar scenario " > > > Because Xen may never use it in a similar > > scenario. > > Right, there are two potentials approach here: > - Wait until there are a user > * Pros: Doesn't impact performance > * Cons: We rely on users/reviewers to catch any misuse > - Harden the code > * Pros: Less risk to introduce a bug inadvertently > * Cons: May impact the performance > > In general, I prefer that the code is hardened by default if the > performance impact is limited. This may save us hours of > debugging/reproducing bug. > From a preventive point of view, you're right. > In addition, AFAICT, the x86 version of get_cycles() is already able to > provide that ordering. So there are chances that code may rely on it. > > While I don't necessarily agree to add barriers everywhere by default > (this may have big impact on the platform). I think it is better to have > an accurate number of cycles. > As x86 had done it, I think it’s ok to do it for Arm. This will keep a function behaves the same on different architectures. Thanks, Wei Chen > Cheers, > > -- > Julien Grall
On 26/11/2020 11:27, Wei Chen wrote: > Hi Julien, Hi Wei, >> -----Original Message----- >> From: Julien Grall <julien@xen.org> >> Sent: 2020年11月26日 18:55 >> To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini <sstabellini@kernel.org> >> Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; >> Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis >> <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd >> <nd@arm.com> >> Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround >> >> Hi Wei, >> >> Your e-mail font seems to be different to the usual plain text one. Are >> you sending the e-mail using HTML by any chance? >> > > It's strange, I always use the plain text. Maybe exchange decided to mangle the e-mail :). Anyway, this new message looks fine. > >> On 26/11/2020 02:07, Wei Chen wrote: >>> Hi Stefano, >>> >>>> -----Original Message----- >>>> From: Stefano Stabellini <sstabellini@kernel.org> >>>> Sent: 2020��11��26�� 8:00 >>>> To: Wei Chen <Wei.Chen@arm.com> >>>> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; >> xen- >>>> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara >>>> <Andre.Przywara@arm.com>; Bertrand Marquis >> <Bertrand.Marquis@arm.com>; >>>> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com> >>>> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround >>>> >>>> Resuming this old thread. >>>> >>>> On Fri, 13 Nov 2020, Wei Chen wrote: >>>>>> Hi, >>>>>> >>>>>> On 09/11/2020 08:21, Penny Zheng wrote: >>>>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) >>>>>>> might return a wrong value when the counter crosses a 32bit boundary. >>>>>>> >>>>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0, >>>>>>> and it also should be the Guest OS's responsibility to deal with >>>>>>> this part. >>>>>>> >>>>>>> But for CNTPCT, there exists several cases in Xen involving reading >>>>>>> CNTPCT, so a possible workaround is that performing the read twice, >>>>>>> and to return one or the other depending on whether a transition has >>>>>>> taken place. >>>>>>> >>>>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com> >>>>>> >>>>>> Acked-by: Julien Grall <jgrall@amazon.com> >>>>>> >>>>>> On a related topic, do we need a fix similar to Linux commit >>>>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur >>>>>> with seqlock held"? >>>>>> >>>>> >>>>> I take a look at this Linux commit, it seems to prevent the seq-lock to be >>>>> speculated. Using an enforce ordering instead of ISB after the read counter >>>>> operation seems to be for performance reasons. >>>>> >>>>> I have found that you had placed an ISB before read counter in get_cycles >>>>> to prevent counter value to be speculated. But you haven't placed the >> second >>>>> ISB after reading. Is it because we haven't used the get_cycles in seq lock >>>>> critical context (Maybe I didn't find the right place)? So should we need to >> fix it >>>>> now, or you prefer to fix it now for future usage? >>>> >>>> Looking at the call sites, it doesn't look like we need any ISB after >>>> get_cycles as it is not used in any critical context. There is also a >>>> data dependency with the value returned by it. >> >> I am assuming you looked at all the users of NOW(). Is that right? >> >>>> >>>> So I am thinking we don't need any fix. At most we need an in-code comment? >>> >>> I agree with you to add an in-code comment. It will remind us in future when >> we >>> use the get_cycles in critical context. Adding it now will probably only lead to >>> meaningless performance degradation. >> >> I read this as there would be no perfomance impact if we add the >> ordering it now. Did you intend to say? > > Sorry about my English. I intended to say "Adding it now may introduce some > performance cost. And this performance cost may be not worth. Because Xen > may never use it in a similar scenario " Don't worry! I think the performance should not be noticeable if we use the same trick as Linux. >> In addition, AFAICT, the x86 version of get_cycles() is already able to >> provide that ordering. So there are chances that code may rely on it. >> >> While I don't necessarily agree to add barriers everywhere by default >> (this may have big impact on the platform). I think it is better to have >> an accurate number of cycles. >> > > As x86 had done it, I think it’s ok to do it for Arm. This will keep a function > behaves the same on different architectures. Just to be clear, I am not 100% sure this is what Intel is doing. Although this is my understanding of the comment in the code. @Stefano, what do you think? @Wei, assuming Stefano is happy with the proposal, would you be happy to send a patch for that? Best regards, -- Julien Grall
Hi Julien, > -----Original Message----- > From: Julien Grall <julien@xen.org> > Sent: 2020年12月3日 2:11 > To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini <sstabellini@kernel.org> > Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd > <nd@arm.com> > Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > > > On 26/11/2020 11:27, Wei Chen wrote: > > Hi Julien, > > Hi Wei, > > >> -----Original Message----- > >> From: Julien Grall <julien@xen.org> > >> Sent: 2020年11月26日 18:55 > >> To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini > <sstabellini@kernel.org> > >> Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > >> Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > >> <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd > >> <nd@arm.com> > >> Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > >> > >> Hi Wei, > >> > >> Your e-mail font seems to be different to the usual plain text one. Are > >> you sending the e-mail using HTML by any chance? > >> > > > > It's strange, I always use the plain text. > > Maybe exchange decided to mangle the e-mail :). Anyway, this new message > looks fine. > > > > >> On 26/11/2020 02:07, Wei Chen wrote: > >>> Hi Stefano, > >>> > >>>> -----Original Message----- > >>>> From: Stefano Stabellini <sstabellini@kernel.org> > >>>> Sent: 2020��11��26�� 8:00 > >>>> To: Wei Chen <Wei.Chen@arm.com> > >>>> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; > >> xen- > >>>> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara > >>>> <Andre.Przywara@arm.com>; Bertrand Marquis > >> <Bertrand.Marquis@arm.com>; > >>>> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com> > >>>> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 > workaround > >>>> > >>>> Resuming this old thread. > >>>> > >>>> On Fri, 13 Nov 2020, Wei Chen wrote: > >>>>>> Hi, > >>>>>> > >>>>>> On 09/11/2020 08:21, Penny Zheng wrote: > >>>>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > >>>>>>> might return a wrong value when the counter crosses a 32bit boundary. > >>>>>>> > >>>>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0, > >>>>>>> and it also should be the Guest OS's responsibility to deal with > >>>>>>> this part. > >>>>>>> > >>>>>>> But for CNTPCT, there exists several cases in Xen involving reading > >>>>>>> CNTPCT, so a possible workaround is that performing the read twice, > >>>>>>> and to return one or the other depending on whether a transition has > >>>>>>> taken place. > >>>>>>> > >>>>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com> > >>>>>> > >>>>>> Acked-by: Julien Grall <jgrall@amazon.com> > >>>>>> > >>>>>> On a related topic, do we need a fix similar to Linux commit > >>>>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur > >>>>>> with seqlock held"? > >>>>>> > >>>>> > >>>>> I take a look at this Linux commit, it seems to prevent the seq-lock to be > >>>>> speculated. Using an enforce ordering instead of ISB after the read > counter > >>>>> operation seems to be for performance reasons. > >>>>> > >>>>> I have found that you had placed an ISB before read counter in get_cycles > >>>>> to prevent counter value to be speculated. But you haven't placed the > >> second > >>>>> ISB after reading. Is it because we haven't used the get_cycles in seq lock > >>>>> critical context (Maybe I didn't find the right place)? So should we need to > >> fix it > >>>>> now, or you prefer to fix it now for future usage? > >>>> > >>>> Looking at the call sites, it doesn't look like we need any ISB after > >>>> get_cycles as it is not used in any critical context. There is also a > >>>> data dependency with the value returned by it. > >> > >> I am assuming you looked at all the users of NOW(). Is that right? > >> > >>>> > >>>> So I am thinking we don't need any fix. At most we need an in-code > comment? > >>> > >>> I agree with you to add an in-code comment. It will remind us in future > when > >> we > >>> use the get_cycles in critical context. Adding it now will probably only lead > to > >>> meaningless performance degradation. > >> > >> I read this as there would be no perfomance impact if we add the > >> ordering it now. Did you intend to say? > > > > Sorry about my English. I intended to say "Adding it now may introduce some > > performance cost. And this performance cost may be not worth. Because Xen > > may never use it in a similar scenario " > > Don't worry! I think the performance should not be noticeable if we use > the same trick as Linux. > > >> In addition, AFAICT, the x86 version of get_cycles() is already able to > >> provide that ordering. So there are chances that code may rely on it. > >> > >> While I don't necessarily agree to add barriers everywhere by default > >> (this may have big impact on the platform). I think it is better to have > >> an accurate number of cycles. > >> > > > > As x86 had done it, I think it’s ok to do it for Arm. This will keep a function > > behaves the same on different architectures. > > Just to be clear, I am not 100% sure this is what Intel is doing. > Although this is my understanding of the comment in the code. > > @Stefano, what do you think? > > @Wei, assuming Stefano is happy with the proposal, would you be happy to > send a patch for that? > Of course, I am willing to do that. It seems the enforce_ordering patch has been merged. And Vincenzo reported the enforce_ordering method will have ~4.5 performance improvement[1] (Compare to ISB). So I will use enforce_ordering method directly instead of using ISB. [1]https://lkml.org/lkml/2020/3/13/645 > Best regards, > > -- > Julien Grall
On Thu, 3 Dec 2020, Wei Chen wrote: > Hi Julien, > > > -----Original Message----- > > From: Julien Grall <julien@xen.org> > > Sent: 2020年12月3日 2:11 > > To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini <sstabellini@kernel.org> > > Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > > Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > > <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd > > <nd@arm.com> > > Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > > > > > > > On 26/11/2020 11:27, Wei Chen wrote: > > > Hi Julien, > > > > Hi Wei, > > > > >> -----Original Message----- > > >> From: Julien Grall <julien@xen.org> > > >> Sent: 2020年11月26日 18:55 > > >> To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini > > <sstabellini@kernel.org> > > >> Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > > >> Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > > >> <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd > > >> <nd@arm.com> > > >> Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > >> > > >> Hi Wei, > > >> > > >> Your e-mail font seems to be different to the usual plain text one. Are > > >> you sending the e-mail using HTML by any chance? > > >> > > > > > > It's strange, I always use the plain text. > > > > Maybe exchange decided to mangle the e-mail :). Anyway, this new message > > looks fine. > > > > > > > >> On 26/11/2020 02:07, Wei Chen wrote: > > >>> Hi Stefano, > > >>> > > >>>> -----Original Message----- > > >>>> From: Stefano Stabellini <sstabellini@kernel.org> > > >>>> Sent: 2020??????11??????26?????? 8:00 > > >>>> To: Wei Chen <Wei.Chen@arm.com> > > >>>> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; > > >> xen- > > >>>> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara > > >>>> <Andre.Przywara@arm.com>; Bertrand Marquis > > >> <Bertrand.Marquis@arm.com>; > > >>>> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com> > > >>>> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 > > workaround > > >>>> > > >>>> Resuming this old thread. > > >>>> > > >>>> On Fri, 13 Nov 2020, Wei Chen wrote: > > >>>>>> Hi, > > >>>>>> > > >>>>>> On 09/11/2020 08:21, Penny Zheng wrote: > > >>>>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > > >>>>>>> might return a wrong value when the counter crosses a 32bit boundary. > > >>>>>>> > > >>>>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0, > > >>>>>>> and it also should be the Guest OS's responsibility to deal with > > >>>>>>> this part. > > >>>>>>> > > >>>>>>> But for CNTPCT, there exists several cases in Xen involving reading > > >>>>>>> CNTPCT, so a possible workaround is that performing the read twice, > > >>>>>>> and to return one or the other depending on whether a transition has > > >>>>>>> taken place. > > >>>>>>> > > >>>>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com> > > >>>>>> > > >>>>>> Acked-by: Julien Grall <jgrall@amazon.com> > > >>>>>> > > >>>>>> On a related topic, do we need a fix similar to Linux commit > > >>>>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur > > >>>>>> with seqlock held"? > > >>>>>> > > >>>>> > > >>>>> I take a look at this Linux commit, it seems to prevent the seq-lock to be > > >>>>> speculated. Using an enforce ordering instead of ISB after the read > > counter > > >>>>> operation seems to be for performance reasons. > > >>>>> > > >>>>> I have found that you had placed an ISB before read counter in get_cycles > > >>>>> to prevent counter value to be speculated. But you haven't placed the > > >> second > > >>>>> ISB after reading. Is it because we haven't used the get_cycles in seq lock > > >>>>> critical context (Maybe I didn't find the right place)? So should we need to > > >> fix it > > >>>>> now, or you prefer to fix it now for future usage? > > >>>> > > >>>> Looking at the call sites, it doesn't look like we need any ISB after > > >>>> get_cycles as it is not used in any critical context. There is also a > > >>>> data dependency with the value returned by it. > > >> > > >> I am assuming you looked at all the users of NOW(). Is that right? > > >> > > >>>> > > >>>> So I am thinking we don't need any fix. At most we need an in-code > > comment? > > >>> > > >>> I agree with you to add an in-code comment. It will remind us in future > > when > > >> we > > >>> use the get_cycles in critical context. Adding it now will probably only lead > > to > > >>> meaningless performance degradation. > > >> > > >> I read this as there would be no perfomance impact if we add the > > >> ordering it now. Did you intend to say? > > > > > > Sorry about my English. I intended to say "Adding it now may introduce some > > > performance cost. And this performance cost may be not worth. Because Xen > > > may never use it in a similar scenario " > > > > Don't worry! I think the performance should not be noticeable if we use > > the same trick as Linux. > > > > >> In addition, AFAICT, the x86 version of get_cycles() is already able to > > >> provide that ordering. So there are chances that code may rely on it. > > >> > > >> While I don't necessarily agree to add barriers everywhere by default > > >> (this may have big impact on the platform). I think it is better to have > > >> an accurate number of cycles. > > >> > > > > > > As x86 had done it, I think it’s ok to do it for Arm. This will keep a function > > > behaves the same on different architectures. > > > > Just to be clear, I am not 100% sure this is what Intel is doing. > > Although this is my understanding of the comment in the code. > > > > @Stefano, what do you think? > > > > @Wei, assuming Stefano is happy with the proposal, would you be happy to > > send a patch for that? > > > > Of course, I am willing to do that. It seems the enforce_ordering patch has been > merged. And Vincenzo reported the enforce_ordering method will have ~4.5 > performance improvement[1] (Compare to ISB). So I will use enforce_ordering > method directly instead of using ISB. > > [1]https://lkml.org/lkml/2020/3/13/645 If we can enforce ordering without adding an ISB, I am all for it.
Hi Penny,
> -----Original Message-----
> From: Penny Zheng <penny.zheng@arm.com>
> Sent: 2020年11月9日 16:21
> To: xen-devel@lists.xenproject.org; sstabellini@kernel.org; julien@xen.org
> Cc: Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis
> <Bertrand.Marquis@arm.com>; Wei Chen <Wei.Chen@arm.com>; Penny Zheng
> <Penny.Zheng@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com>
> Subject: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround
>
> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
> might return a wrong value when the counter crosses a 32bit boundary.
>
> Until now, there is no case for Xen itself to access CNTVCT_EL0,
> and it also should be the Guest OS's responsibility to deal with
> this part.
>
> But for CNTPCT, there exists several cases in Xen involving reading
> CNTPCT, so a possible workaround is that performing the read twice,
> and to return one or the other depending on whether a transition has
> taken place.
>
> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
Reviewed-by: Wei Chen <Wei.Chen@arm.com>
Thanks,
Wei Chen
> ---
> docs/misc/arm/silicon-errata.txt | 1 +
> xen/arch/arm/Kconfig | 18 ++++++++++++++++++
> xen/arch/arm/cpuerrata.c | 8 ++++++++
> xen/arch/arm/vtimer.c | 2 +-
> xen/include/asm-arm/cpuerrata.h | 2 ++
> xen/include/asm-arm/cpufeature.h | 3 ++-
> xen/include/asm-arm/time.h | 22 +++++++++++++++++++++-
> 7 files changed, 53 insertions(+), 3 deletions(-)
>
> diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
> index 1f18a9df58..552c4151d3 100644
> --- a/docs/misc/arm/silicon-errata.txt
> +++ b/docs/misc/arm/silicon-errata.txt
> @@ -51,6 +51,7 @@ stable hypervisors.
> | ARM | Cortex-A57 | #1319537 | N/A |
> | ARM | Cortex-A72 | #1319367 | N/A |
> | ARM | Cortex-A72 | #853709 | N/A |
> +| ARM | Cortex-A73 | #858921 | ARM_ERRATUM_858921 |
> | ARM | Cortex-A76 | #1165522 | N/A |
> | ARM | Neoverse-N1 | #1165522 | N/A
> | ARM | MMU-500 | #842869 | N/A |
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index 2777388265..f938dd21bd 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -226,6 +226,24 @@ config ARM64_ERRATUM_834220
>
> If unsure, say Y.
>
> +config ARM_ERRATUM_858921
> + bool "Cortex-A73: 858921: Possible wrong read value for CNTVCT or
> CNTPCT"
> + default y
> + help
> + This option adds an alternative code sequence to work around ARM
> + erratum 858921 on Cortex-A73 (all versions).
> +
> + Affected Cortex-A73 might return wrong read value for CNTVCT or
> CNTPCT
> + when the counter crosses a 32bit boundary.
> +
> + The workaround involves performing the read twice, and to return
> + one or the other value depending on whether a transition has taken
> place.
> + Please note that this does not necessarily enable the workaround,
> + as it depends on the alternative framework, which will only patch
> + the kernel if an affected CPU is detected.
> +
> + If unsure, say Y.
> +
> endmenu
>
> config ARM64_HARDEN_BRANCH_PREDICTOR
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index 6731d873e8..567911d380 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -469,6 +469,14 @@ static const struct arm_cpu_capabilities arm_errata[] =
> {
> .capability = ARM_SSBD,
> .matches = has_ssbd_mitigation,
> },
> +#endif
> +#ifdef CONFIG_ARM_ERRATUM_858921
> + {
> + /* Cortex-A73 (all versions) */
> + .desc = "ARM erratum 858921",
> + .capability = ARM_WORKAROUND_858921,
> + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
> + },
> #endif
> {
> /* Neoverse r0p0 - r2p0 */
> diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
> index 6d39fc944f..c2b27915c6 100644
> --- a/xen/arch/arm/vtimer.c
> +++ b/xen/arch/arm/vtimer.c
> @@ -62,7 +62,7 @@ static void virt_timer_expired(void *data)
>
> int domain_vtimer_init(struct domain *d, struct xen_arch_domainconfig
> *config)
> {
> - d->arch.virt_timer_base.offset = READ_SYSREG64(CNTPCT_EL0);
> + d->arch.virt_timer_base.offset = get_cycles();
> d->time_offset.seconds = ticks_to_ns(d->arch.virt_timer_base.offset -
> boot_count);
> do_div(d->time_offset.seconds, 1000000000);
>
> diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-
> arm/cpuerrata.h
> index 88ef3ca934..8d7e7b9375 100644
> --- a/xen/include/asm-arm/cpuerrata.h
> +++ b/xen/include/asm-arm/cpuerrata.h
> @@ -28,6 +28,8 @@ static inline bool check_workaround_##erratum(void)
> \
> CHECK_WORKAROUND_HELPER(766422, ARM32_WORKAROUND_766422,
> CONFIG_ARM_32)
> CHECK_WORKAROUND_HELPER(834220, ARM64_WORKAROUND_834220,
> CONFIG_ARM_64)
> CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD, CONFIG_ARM_SSBD)
> +CHECK_WORKAROUND_HELPER(858921, ARM_WORKAROUND_858921,
> + CONFIG_ARM_ERRATUM_858921)
>
> #undef CHECK_WORKAROUND_HELPER
>
> diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-
> arm/cpufeature.h
> index 10878ead8a..016a9fe203 100644
> --- a/xen/include/asm-arm/cpufeature.h
> +++ b/xen/include/asm-arm/cpufeature.h
> @@ -45,8 +45,9 @@
> #define ARM_SSBD 7
> #define ARM_SMCCC_1_1 8
> #define ARM64_WORKAROUND_AT_SPECULATE 9
> +#define ARM_WORKAROUND_858921 10
>
> -#define ARM_NCAPS 10
> +#define ARM_NCAPS 11
>
> #ifndef __ASSEMBLY__
>
> diff --git a/xen/include/asm-arm/time.h b/xen/include/asm-arm/time.h
> index 9cb6f9b0b4..1b2c13614b 100644
> --- a/xen/include/asm-arm/time.h
> +++ b/xen/include/asm-arm/time.h
> @@ -3,6 +3,7 @@
>
> #include <asm/sysregs.h>
> #include <asm/system.h>
> +#include <asm/cpuerrata.h>
>
> #define DT_MATCH_TIMER \
> DT_MATCH_COMPATIBLE("arm,armv7-timer"), \
> @@ -13,7 +14,26 @@ typedef uint64_t cycles_t;
> static inline cycles_t get_cycles (void)
> {
> isb();
> - return READ_SYSREG64(CNTPCT_EL0);
> + /*
> + * ARM_WORKAROUND_858921: Cortex-A73 (all versions) counter read
> + * can return a wrong value when the counter crosses a 32bit boundary.
> + */
> + if ( !check_workaround_858921() )
> + return READ_SYSREG64(CNTPCT_EL0);
> + else
> + {
> + /*
> + * A recommended workaround for erratum 858921 is to:
> + * 1- Read twice CNTPCT.
> + * 2- Compare bit[32] of the two read values.
> + * - If bit[32] is different, keep the old value.
> + * - If bit[32] is the same, keep the new value.
> + */
> + cycles_t old, new;
> + old = READ_SYSREG64(CNTPCT_EL0);
> + new = READ_SYSREG64(CNTPCT_EL0);
> + return (((old ^ new) >> 32) & 1) ? old : new;
> + }
> }
>
> /* List of timer's IRQ */
> --
> 2.25.1
© 2016 - 2026 Red Hat, Inc.