docs/misc/arm/silicon-errata.txt | 1 + xen/arch/arm/Kconfig | 18 ++++++++++++++++++ xen/arch/arm/cpuerrata.c | 8 ++++++++ xen/arch/arm/vtimer.c | 2 +- xen/include/asm-arm/cpuerrata.h | 2 ++ xen/include/asm-arm/cpufeature.h | 3 ++- xen/include/asm-arm/time.h | 22 +++++++++++++++++++++- 7 files changed, 53 insertions(+), 3 deletions(-)
CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions)
might return a wrong value when the counter crosses a 32bit boundary.
Until now, there is no case for Xen itself to access CNTVCT_EL0,
and it also should be the Guest OS's responsibility to deal with
this part.
But for CNTPCT, there exists several cases in Xen involving reading
CNTPCT, so a possible workaround is that performing the read twice,
and to return one or the other depending on whether a transition has
taken place.
Signed-off-by: Penny Zheng <penny.zheng@arm.com>
---
docs/misc/arm/silicon-errata.txt | 1 +
xen/arch/arm/Kconfig | 18 ++++++++++++++++++
xen/arch/arm/cpuerrata.c | 8 ++++++++
xen/arch/arm/vtimer.c | 2 +-
xen/include/asm-arm/cpuerrata.h | 2 ++
xen/include/asm-arm/cpufeature.h | 3 ++-
xen/include/asm-arm/time.h | 22 +++++++++++++++++++++-
7 files changed, 53 insertions(+), 3 deletions(-)
diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt
index 1f18a9df58..552c4151d3 100644
--- a/docs/misc/arm/silicon-errata.txt
+++ b/docs/misc/arm/silicon-errata.txt
@@ -51,6 +51,7 @@ stable hypervisors.
| ARM | Cortex-A57 | #1319537 | N/A |
| ARM | Cortex-A72 | #1319367 | N/A |
| ARM | Cortex-A72 | #853709 | N/A |
+| ARM | Cortex-A73 | #858921 | ARM_ERRATUM_858921 |
| ARM | Cortex-A76 | #1165522 | N/A |
| ARM | Neoverse-N1 | #1165522 | N/A
| ARM | MMU-500 | #842869 | N/A |
diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index 2777388265..f938dd21bd 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -226,6 +226,24 @@ config ARM64_ERRATUM_834220
If unsure, say Y.
+config ARM_ERRATUM_858921
+ bool "Cortex-A73: 858921: Possible wrong read value for CNTVCT or CNTPCT"
+ default y
+ help
+ This option adds an alternative code sequence to work around ARM
+ erratum 858921 on Cortex-A73 (all versions).
+
+ Affected Cortex-A73 might return wrong read value for CNTVCT or CNTPCT
+ when the counter crosses a 32bit boundary.
+
+ The workaround involves performing the read twice, and to return
+ one or the other value depending on whether a transition has taken place.
+ Please note that this does not necessarily enable the workaround,
+ as it depends on the alternative framework, which will only patch
+ the kernel if an affected CPU is detected.
+
+ If unsure, say Y.
+
endmenu
config ARM64_HARDEN_BRANCH_PREDICTOR
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index 6731d873e8..567911d380 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -469,6 +469,14 @@ static const struct arm_cpu_capabilities arm_errata[] = {
.capability = ARM_SSBD,
.matches = has_ssbd_mitigation,
},
+#endif
+#ifdef CONFIG_ARM_ERRATUM_858921
+ {
+ /* Cortex-A73 (all versions) */
+ .desc = "ARM erratum 858921",
+ .capability = ARM_WORKAROUND_858921,
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
+ },
#endif
{
/* Neoverse r0p0 - r2p0 */
diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
index 6d39fc944f..c2b27915c6 100644
--- a/xen/arch/arm/vtimer.c
+++ b/xen/arch/arm/vtimer.c
@@ -62,7 +62,7 @@ static void virt_timer_expired(void *data)
int domain_vtimer_init(struct domain *d, struct xen_arch_domainconfig *config)
{
- d->arch.virt_timer_base.offset = READ_SYSREG64(CNTPCT_EL0);
+ d->arch.virt_timer_base.offset = get_cycles();
d->time_offset.seconds = ticks_to_ns(d->arch.virt_timer_base.offset - boot_count);
do_div(d->time_offset.seconds, 1000000000);
diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h
index 88ef3ca934..8d7e7b9375 100644
--- a/xen/include/asm-arm/cpuerrata.h
+++ b/xen/include/asm-arm/cpuerrata.h
@@ -28,6 +28,8 @@ static inline bool check_workaround_##erratum(void) \
CHECK_WORKAROUND_HELPER(766422, ARM32_WORKAROUND_766422, CONFIG_ARM_32)
CHECK_WORKAROUND_HELPER(834220, ARM64_WORKAROUND_834220, CONFIG_ARM_64)
CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD, CONFIG_ARM_SSBD)
+CHECK_WORKAROUND_HELPER(858921, ARM_WORKAROUND_858921,
+ CONFIG_ARM_ERRATUM_858921)
#undef CHECK_WORKAROUND_HELPER
diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
index 10878ead8a..016a9fe203 100644
--- a/xen/include/asm-arm/cpufeature.h
+++ b/xen/include/asm-arm/cpufeature.h
@@ -45,8 +45,9 @@
#define ARM_SSBD 7
#define ARM_SMCCC_1_1 8
#define ARM64_WORKAROUND_AT_SPECULATE 9
+#define ARM_WORKAROUND_858921 10
-#define ARM_NCAPS 10
+#define ARM_NCAPS 11
#ifndef __ASSEMBLY__
diff --git a/xen/include/asm-arm/time.h b/xen/include/asm-arm/time.h
index 9cb6f9b0b4..1b2c13614b 100644
--- a/xen/include/asm-arm/time.h
+++ b/xen/include/asm-arm/time.h
@@ -3,6 +3,7 @@
#include <asm/sysregs.h>
#include <asm/system.h>
+#include <asm/cpuerrata.h>
#define DT_MATCH_TIMER \
DT_MATCH_COMPATIBLE("arm,armv7-timer"), \
@@ -13,7 +14,26 @@ typedef uint64_t cycles_t;
static inline cycles_t get_cycles (void)
{
isb();
- return READ_SYSREG64(CNTPCT_EL0);
+ /*
+ * ARM_WORKAROUND_858921: Cortex-A73 (all versions) counter read
+ * can return a wrong value when the counter crosses a 32bit boundary.
+ */
+ if ( !check_workaround_858921() )
+ return READ_SYSREG64(CNTPCT_EL0);
+ else
+ {
+ /*
+ * A recommended workaround for erratum 858921 is to:
+ * 1- Read twice CNTPCT.
+ * 2- Compare bit[32] of the two read values.
+ * - If bit[32] is different, keep the old value.
+ * - If bit[32] is the same, keep the new value.
+ */
+ cycles_t old, new;
+ old = READ_SYSREG64(CNTPCT_EL0);
+ new = READ_SYSREG64(CNTPCT_EL0);
+ return (((old ^ new) >> 32) & 1) ? old : new;
+ }
}
/* List of timer's IRQ */
--
2.25.1
Hi, > On 9 Nov 2020, at 08:21, Penny Zheng <Penny.Zheng@arm.com> wrote: > > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > might return a wrong value when the counter crosses a 32bit boundary. > > Until now, there is no case for Xen itself to access CNTVCT_EL0, > and it also should be the Guest OS's responsibility to deal with > this part. > > But for CNTPCT, there exists several cases in Xen involving reading > CNTPCT, so a possible workaround is that performing the read twice, > and to return one or the other depending on whether a transition has > taken place. > > Signed-off-by: Penny Zheng <penny.zheng@arm.com> Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com> Regards Bertrand > --- > docs/misc/arm/silicon-errata.txt | 1 + > xen/arch/arm/Kconfig | 18 ++++++++++++++++++ > xen/arch/arm/cpuerrata.c | 8 ++++++++ > xen/arch/arm/vtimer.c | 2 +- > xen/include/asm-arm/cpuerrata.h | 2 ++ > xen/include/asm-arm/cpufeature.h | 3 ++- > xen/include/asm-arm/time.h | 22 +++++++++++++++++++++- > 7 files changed, 53 insertions(+), 3 deletions(-) > > diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt > index 1f18a9df58..552c4151d3 100644 > --- a/docs/misc/arm/silicon-errata.txt > +++ b/docs/misc/arm/silicon-errata.txt > @@ -51,6 +51,7 @@ stable hypervisors. > | ARM | Cortex-A57 | #1319537 | N/A | > | ARM | Cortex-A72 | #1319367 | N/A | > | ARM | Cortex-A72 | #853709 | N/A | > +| ARM | Cortex-A73 | #858921 | ARM_ERRATUM_858921 | > | ARM | Cortex-A76 | #1165522 | N/A | > | ARM | Neoverse-N1 | #1165522 | N/A > | ARM | MMU-500 | #842869 | N/A | > diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig > index 2777388265..f938dd21bd 100644 > --- a/xen/arch/arm/Kconfig > +++ b/xen/arch/arm/Kconfig > @@ -226,6 +226,24 @@ config ARM64_ERRATUM_834220 > > If unsure, say Y. > > +config ARM_ERRATUM_858921 > + bool "Cortex-A73: 858921: Possible wrong read value for CNTVCT or CNTPCT" > + default y > + help > + This option adds an alternative code sequence to work around ARM > + erratum 858921 on Cortex-A73 (all versions). > + > + Affected Cortex-A73 might return wrong read value for CNTVCT or CNTPCT > + when the counter crosses a 32bit boundary. > + > + The workaround involves performing the read twice, and to return > + one or the other value depending on whether a transition has taken place. > + Please note that this does not necessarily enable the workaround, > + as it depends on the alternative framework, which will only patch > + the kernel if an affected CPU is detected. > + > + If unsure, say Y. > + > endmenu > > config ARM64_HARDEN_BRANCH_PREDICTOR > diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c > index 6731d873e8..567911d380 100644 > --- a/xen/arch/arm/cpuerrata.c > +++ b/xen/arch/arm/cpuerrata.c > @@ -469,6 +469,14 @@ static const struct arm_cpu_capabilities arm_errata[] = { > .capability = ARM_SSBD, > .matches = has_ssbd_mitigation, > }, > +#endif > +#ifdef CONFIG_ARM_ERRATUM_858921 > + { > + /* Cortex-A73 (all versions) */ > + .desc = "ARM erratum 858921", > + .capability = ARM_WORKAROUND_858921, > + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), > + }, > #endif > { > /* Neoverse r0p0 - r2p0 */ > diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c > index 6d39fc944f..c2b27915c6 100644 > --- a/xen/arch/arm/vtimer.c > +++ b/xen/arch/arm/vtimer.c > @@ -62,7 +62,7 @@ static void virt_timer_expired(void *data) > > int domain_vtimer_init(struct domain *d, struct xen_arch_domainconfig *config) > { > - d->arch.virt_timer_base.offset = READ_SYSREG64(CNTPCT_EL0); > + d->arch.virt_timer_base.offset = get_cycles(); > d->time_offset.seconds = ticks_to_ns(d->arch.virt_timer_base.offset - boot_count); > do_div(d->time_offset.seconds, 1000000000); > > diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h > index 88ef3ca934..8d7e7b9375 100644 > --- a/xen/include/asm-arm/cpuerrata.h > +++ b/xen/include/asm-arm/cpuerrata.h > @@ -28,6 +28,8 @@ static inline bool check_workaround_##erratum(void) \ > CHECK_WORKAROUND_HELPER(766422, ARM32_WORKAROUND_766422, CONFIG_ARM_32) > CHECK_WORKAROUND_HELPER(834220, ARM64_WORKAROUND_834220, CONFIG_ARM_64) > CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD, CONFIG_ARM_SSBD) > +CHECK_WORKAROUND_HELPER(858921, ARM_WORKAROUND_858921, > + CONFIG_ARM_ERRATUM_858921) > > #undef CHECK_WORKAROUND_HELPER > > diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h > index 10878ead8a..016a9fe203 100644 > --- a/xen/include/asm-arm/cpufeature.h > +++ b/xen/include/asm-arm/cpufeature.h > @@ -45,8 +45,9 @@ > #define ARM_SSBD 7 > #define ARM_SMCCC_1_1 8 > #define ARM64_WORKAROUND_AT_SPECULATE 9 > +#define ARM_WORKAROUND_858921 10 > > -#define ARM_NCAPS 10 > +#define ARM_NCAPS 11 > > #ifndef __ASSEMBLY__ > > diff --git a/xen/include/asm-arm/time.h b/xen/include/asm-arm/time.h > index 9cb6f9b0b4..1b2c13614b 100644 > --- a/xen/include/asm-arm/time.h > +++ b/xen/include/asm-arm/time.h > @@ -3,6 +3,7 @@ > > #include <asm/sysregs.h> > #include <asm/system.h> > +#include <asm/cpuerrata.h> > > #define DT_MATCH_TIMER \ > DT_MATCH_COMPATIBLE("arm,armv7-timer"), \ > @@ -13,7 +14,26 @@ typedef uint64_t cycles_t; > static inline cycles_t get_cycles (void) > { > isb(); > - return READ_SYSREG64(CNTPCT_EL0); > + /* > + * ARM_WORKAROUND_858921: Cortex-A73 (all versions) counter read > + * can return a wrong value when the counter crosses a 32bit boundary. > + */ > + if ( !check_workaround_858921() ) > + return READ_SYSREG64(CNTPCT_EL0); > + else > + { > + /* > + * A recommended workaround for erratum 858921 is to: > + * 1- Read twice CNTPCT. > + * 2- Compare bit[32] of the two read values. > + * - If bit[32] is different, keep the old value. > + * - If bit[32] is the same, keep the new value. > + */ > + cycles_t old, new; > + old = READ_SYSREG64(CNTPCT_EL0); > + new = READ_SYSREG64(CNTPCT_EL0); > + return (((old ^ new) >> 32) & 1) ? old : new; > + } > } > > /* List of timer's IRQ */ > -- > 2.25.1 >
Hi, On 09/11/2020 08:21, Penny Zheng wrote: > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > might return a wrong value when the counter crosses a 32bit boundary. > > Until now, there is no case for Xen itself to access CNTVCT_EL0, > and it also should be the Guest OS's responsibility to deal with > this part. > > But for CNTPCT, there exists several cases in Xen involving reading > CNTPCT, so a possible workaround is that performing the read twice, > and to return one or the other depending on whether a transition has > taken place. > > Signed-off-by: Penny Zheng <penny.zheng@arm.com> Acked-by: Julien Grall <jgrall@amazon.com> On a related topic, do we need a fix similar to Linux commit 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur with seqlock held"? Cheers, -- Julien Grall
> -----Original Message----- > From: Julien Grall <julien@xen.org> > Sent: Monday, November 9, 2020 8:04 PM > To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > sstabellini@kernel.org > Cc: Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > <Bertrand.Marquis@arm.com>; Wei Chen <Wei.Chen@arm.com>; Kaly Xin > <Kaly.Xin@arm.com>; nd <nd@arm.com> > Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > Hi, > > On 09/11/2020 08:21, Penny Zheng wrote: > > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > > might return a wrong value when the counter crosses a 32bit boundary. > > > > Until now, there is no case for Xen itself to access CNTVCT_EL0, and > > it also should be the Guest OS's responsibility to deal with this > > part. > > > > But for CNTPCT, there exists several cases in Xen involving reading > > CNTPCT, so a possible workaround is that performing the read twice, > > and to return one or the other depending on whether a transition has > > taken place. > > > > Signed-off-by: Penny Zheng <penny.zheng@arm.com> > > Acked-by: Julien Grall <jgrall@amazon.com> > Thank you. 😉 > On a related topic, do we need a fix similar to Linux commit 75a19a0202db > "arm64: arch_timer: Ensure counter register reads occur with seqlock held"? > Sure, I'll check this commit and talk with my teams for further work. Cheers -- Penny Zheng > Cheers, > > -- > Julien Grall
HI Julien, > -----Original Message----- > From: Julien Grall <julien@xen.org> > Sent: 2020年11月9日 20:04 > To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > sstabellini@kernel.org > Cc: Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > <Bertrand.Marquis@arm.com>; Wei Chen <Wei.Chen@arm.com>; Kaly Xin > <Kaly.Xin@arm.com>; nd <nd@arm.com> > Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > Hi, > > On 09/11/2020 08:21, Penny Zheng wrote: > > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > > might return a wrong value when the counter crosses a 32bit boundary. > > > > Until now, there is no case for Xen itself to access CNTVCT_EL0, > > and it also should be the Guest OS's responsibility to deal with > > this part. > > > > But for CNTPCT, there exists several cases in Xen involving reading > > CNTPCT, so a possible workaround is that performing the read twice, > > and to return one or the other depending on whether a transition has > > taken place. > > > > Signed-off-by: Penny Zheng <penny.zheng@arm.com> > > Acked-by: Julien Grall <jgrall@amazon.com> > > On a related topic, do we need a fix similar to Linux commit > 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur > with seqlock held"? > I take a look at this Linux commit, it seems to prevent the seq-lock to be speculated. Using an enforce ordering instead of ISB after the read counter operation seems to be for performance reasons. I have found that you had placed an ISB before read counter in get_cycles to prevent counter value to be speculated. But you haven't placed the second ISB after reading. Is it because we haven't used the get_cycles in seq lock critical context (Maybe I didn't find the right place)? So should we need to fix it now, or you prefer to fix it now for future usage? Regards, Wei Chen > Cheers, > > -- > Julien Grall
Resuming this old thread. On Fri, 13 Nov 2020, Wei Chen wrote: > > Hi, > > > > On 09/11/2020 08:21, Penny Zheng wrote: > > > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > > > might return a wrong value when the counter crosses a 32bit boundary. > > > > > > Until now, there is no case for Xen itself to access CNTVCT_EL0, > > > and it also should be the Guest OS's responsibility to deal with > > > this part. > > > > > > But for CNTPCT, there exists several cases in Xen involving reading > > > CNTPCT, so a possible workaround is that performing the read twice, > > > and to return one or the other depending on whether a transition has > > > taken place. > > > > > > Signed-off-by: Penny Zheng <penny.zheng@arm.com> > > > > Acked-by: Julien Grall <jgrall@amazon.com> > > > > On a related topic, do we need a fix similar to Linux commit > > 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur > > with seqlock held"? > > > > I take a look at this Linux commit, it seems to prevent the seq-lock to be > speculated. Using an enforce ordering instead of ISB after the read counter > operation seems to be for performance reasons. > > I have found that you had placed an ISB before read counter in get_cycles > to prevent counter value to be speculated. But you haven't placed the second > ISB after reading. Is it because we haven't used the get_cycles in seq lock > critical context (Maybe I didn't find the right place)? So should we need to fix it > now, or you prefer to fix it now for future usage? Looking at the call sites, it doesn't look like we need any ISB after get_cycles as it is not used in any critical context. There is also a data dependency with the value returned by it. So I am thinking we don't need any fix. At most we need an in-code comment?
Hi Stefano, > -----Original Message----- > From: Stefano Stabellini <sstabellini@kernel.org> > Sent: 2020年11月26日 8:00 > To: Wei Chen <Wei.Chen@arm.com> > Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; xen- > devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara > <Andre.Przywara@arm.com>; Bertrand Marquis <Bertrand.Marquis@arm.com>; > Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com> > Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > Resuming this old thread. > > On Fri, 13 Nov 2020, Wei Chen wrote: > > > Hi, > > > > > > On 09/11/2020 08:21, Penny Zheng wrote: > > > > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > > > > might return a wrong value when the counter crosses a 32bit boundary. > > > > > > > > Until now, there is no case for Xen itself to access CNTVCT_EL0, > > > > and it also should be the Guest OS's responsibility to deal with > > > > this part. > > > > > > > > But for CNTPCT, there exists several cases in Xen involving reading > > > > CNTPCT, so a possible workaround is that performing the read twice, > > > > and to return one or the other depending on whether a transition has > > > > taken place. > > > > > > > > Signed-off-by: Penny Zheng <penny.zheng@arm.com> > > > > > > Acked-by: Julien Grall <jgrall@amazon.com> > > > > > > On a related topic, do we need a fix similar to Linux commit > > > 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur > > > with seqlock held"? > > > > > > > I take a look at this Linux commit, it seems to prevent the seq-lock to be > > speculated. Using an enforce ordering instead of ISB after the read counter > > operation seems to be for performance reasons. > > > > I have found that you had placed an ISB before read counter in get_cycles > > to prevent counter value to be speculated. But you haven't placed the second > > ISB after reading. Is it because we haven't used the get_cycles in seq lock > > critical context (Maybe I didn't find the right place)? So should we need to fix it > > now, or you prefer to fix it now for future usage? > > Looking at the call sites, it doesn't look like we need any ISB after > get_cycles as it is not used in any critical context. There is also a > data dependency with the value returned by it. > > So I am thinking we don't need any fix. At most we need an in-code comment? I agree with you to add an in-code comment. It will remind us in future when we use the get_cycles in critical context. Adding it now will probably only lead to meaningless performance degradation. Because Xen may never use it in a similar scenario. BTW: Can we send a patch that just contains a pure comment : ) Regards, Wei Chen
Hi Wei, Your e-mail font seems to be different to the usual plain text one. Are you sending the e-mail using HTML by any chance? On 26/11/2020 02:07, Wei Chen wrote: > Hi Stefano, > >> -----Original Message----- >> From: Stefano Stabellini <sstabellini@kernel.org> >> Sent: 2020年11月26日 8:00 >> To: Wei Chen <Wei.Chen@arm.com> >> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; xen- >> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara >> <Andre.Przywara@arm.com>; Bertrand Marquis <Bertrand.Marquis@arm.com>; >> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com> >> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround >> >> Resuming this old thread. >> >> On Fri, 13 Nov 2020, Wei Chen wrote: >>>> Hi, >>>> >>>> On 09/11/2020 08:21, Penny Zheng wrote: >>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) >>>>> might return a wrong value when the counter crosses a 32bit boundary. >>>>> >>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0, >>>>> and it also should be the Guest OS's responsibility to deal with >>>>> this part. >>>>> >>>>> But for CNTPCT, there exists several cases in Xen involving reading >>>>> CNTPCT, so a possible workaround is that performing the read twice, >>>>> and to return one or the other depending on whether a transition has >>>>> taken place. >>>>> >>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com> >>>> >>>> Acked-by: Julien Grall <jgrall@amazon.com> >>>> >>>> On a related topic, do we need a fix similar to Linux commit >>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur >>>> with seqlock held"? >>>> >>> >>> I take a look at this Linux commit, it seems to prevent the seq-lock to be >>> speculated. Using an enforce ordering instead of ISB after the read counter >>> operation seems to be for performance reasons. >>> >>> I have found that you had placed an ISB before read counter in get_cycles >>> to prevent counter value to be speculated. But you haven't placed the second >>> ISB after reading. Is it because we haven't used the get_cycles in seq lock >>> critical context (Maybe I didn't find the right place)? So should we need to fix it >>> now, or you prefer to fix it now for future usage? >> >> Looking at the call sites, it doesn't look like we need any ISB after >> get_cycles as it is not used in any critical context. There is also a >> data dependency with the value returned by it. I am assuming you looked at all the users of NOW(). Is that right? >> >> So I am thinking we don't need any fix. At most we need an in-code comment? > > I agree with you to add an in-code comment. It will remind us in future when we > use the get_cycles in critical context. Adding it now will probably only lead to > meaningless performance degradation. I read this as there would be no perfomance impact if we add the ordering it now. Did you intend to say? > Because Xen may never use it in a similar > scenario. Right, there are two potentials approach here: - Wait until there are a user * Pros: Doesn't impact performance * Cons: We rely on users/reviewers to catch any misuse - Harden the code * Pros: Less risk to introduce a bug inadvertently * Cons: May impact the performance In general, I prefer that the code is hardened by default if the performance impact is limited. This may save us hours of debugging/reproducing bug. In addition, AFAICT, the x86 version of get_cycles() is already able to provide that ordering. So there are chances that code may rely on it. While I don't necessarily agree to add barriers everywhere by default (this may have big impact on the platform). I think it is better to have an accurate number of cycles. Cheers, -- Julien Grall
Hi Julien, > -----Original Message----- > From: Julien Grall <julien@xen.org> > Sent: 2020年11月26日 18:55 > To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini <sstabellini@kernel.org> > Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd > <nd@arm.com> > Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > Hi Wei, > > Your e-mail font seems to be different to the usual plain text one. Are > you sending the e-mail using HTML by any chance? > It's strange, I always use the plain text. > On 26/11/2020 02:07, Wei Chen wrote: > > Hi Stefano, > > > >> -----Original Message----- > >> From: Stefano Stabellini <sstabellini@kernel.org> > >> Sent: 2020��11��26�� 8:00 > >> To: Wei Chen <Wei.Chen@arm.com> > >> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; > xen- > >> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara > >> <Andre.Przywara@arm.com>; Bertrand Marquis > <Bertrand.Marquis@arm.com>; > >> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com> > >> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > >> > >> Resuming this old thread. > >> > >> On Fri, 13 Nov 2020, Wei Chen wrote: > >>>> Hi, > >>>> > >>>> On 09/11/2020 08:21, Penny Zheng wrote: > >>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > >>>>> might return a wrong value when the counter crosses a 32bit boundary. > >>>>> > >>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0, > >>>>> and it also should be the Guest OS's responsibility to deal with > >>>>> this part. > >>>>> > >>>>> But for CNTPCT, there exists several cases in Xen involving reading > >>>>> CNTPCT, so a possible workaround is that performing the read twice, > >>>>> and to return one or the other depending on whether a transition has > >>>>> taken place. > >>>>> > >>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com> > >>>> > >>>> Acked-by: Julien Grall <jgrall@amazon.com> > >>>> > >>>> On a related topic, do we need a fix similar to Linux commit > >>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur > >>>> with seqlock held"? > >>>> > >>> > >>> I take a look at this Linux commit, it seems to prevent the seq-lock to be > >>> speculated. Using an enforce ordering instead of ISB after the read counter > >>> operation seems to be for performance reasons. > >>> > >>> I have found that you had placed an ISB before read counter in get_cycles > >>> to prevent counter value to be speculated. But you haven't placed the > second > >>> ISB after reading. Is it because we haven't used the get_cycles in seq lock > >>> critical context (Maybe I didn't find the right place)? So should we need to > fix it > >>> now, or you prefer to fix it now for future usage? > >> > >> Looking at the call sites, it doesn't look like we need any ISB after > >> get_cycles as it is not used in any critical context. There is also a > >> data dependency with the value returned by it. > > I am assuming you looked at all the users of NOW(). Is that right? > > >> > >> So I am thinking we don't need any fix. At most we need an in-code comment? > > > > I agree with you to add an in-code comment. It will remind us in future when > we > > use the get_cycles in critical context. Adding it now will probably only lead to > > meaningless performance degradation. > > I read this as there would be no perfomance impact if we add the > ordering it now. Did you intend to say? Sorry about my English. I intended to say "Adding it now may introduce some performance cost. And this performance cost may be not worth. Because Xen may never use it in a similar scenario " > > > Because Xen may never use it in a similar > > scenario. > > Right, there are two potentials approach here: > - Wait until there are a user > * Pros: Doesn't impact performance > * Cons: We rely on users/reviewers to catch any misuse > - Harden the code > * Pros: Less risk to introduce a bug inadvertently > * Cons: May impact the performance > > In general, I prefer that the code is hardened by default if the > performance impact is limited. This may save us hours of > debugging/reproducing bug. > From a preventive point of view, you're right. > In addition, AFAICT, the x86 version of get_cycles() is already able to > provide that ordering. So there are chances that code may rely on it. > > While I don't necessarily agree to add barriers everywhere by default > (this may have big impact on the platform). I think it is better to have > an accurate number of cycles. > As x86 had done it, I think it’s ok to do it for Arm. This will keep a function behaves the same on different architectures. Thanks, Wei Chen > Cheers, > > -- > Julien Grall
On 26/11/2020 11:27, Wei Chen wrote: > Hi Julien, Hi Wei, >> -----Original Message----- >> From: Julien Grall <julien@xen.org> >> Sent: 2020年11月26日 18:55 >> To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini <sstabellini@kernel.org> >> Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; >> Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis >> <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd >> <nd@arm.com> >> Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround >> >> Hi Wei, >> >> Your e-mail font seems to be different to the usual plain text one. Are >> you sending the e-mail using HTML by any chance? >> > > It's strange, I always use the plain text. Maybe exchange decided to mangle the e-mail :). Anyway, this new message looks fine. > >> On 26/11/2020 02:07, Wei Chen wrote: >>> Hi Stefano, >>> >>>> -----Original Message----- >>>> From: Stefano Stabellini <sstabellini@kernel.org> >>>> Sent: 2020��11��26�� 8:00 >>>> To: Wei Chen <Wei.Chen@arm.com> >>>> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; >> xen- >>>> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara >>>> <Andre.Przywara@arm.com>; Bertrand Marquis >> <Bertrand.Marquis@arm.com>; >>>> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com> >>>> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround >>>> >>>> Resuming this old thread. >>>> >>>> On Fri, 13 Nov 2020, Wei Chen wrote: >>>>>> Hi, >>>>>> >>>>>> On 09/11/2020 08:21, Penny Zheng wrote: >>>>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) >>>>>>> might return a wrong value when the counter crosses a 32bit boundary. >>>>>>> >>>>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0, >>>>>>> and it also should be the Guest OS's responsibility to deal with >>>>>>> this part. >>>>>>> >>>>>>> But for CNTPCT, there exists several cases in Xen involving reading >>>>>>> CNTPCT, so a possible workaround is that performing the read twice, >>>>>>> and to return one or the other depending on whether a transition has >>>>>>> taken place. >>>>>>> >>>>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com> >>>>>> >>>>>> Acked-by: Julien Grall <jgrall@amazon.com> >>>>>> >>>>>> On a related topic, do we need a fix similar to Linux commit >>>>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur >>>>>> with seqlock held"? >>>>>> >>>>> >>>>> I take a look at this Linux commit, it seems to prevent the seq-lock to be >>>>> speculated. Using an enforce ordering instead of ISB after the read counter >>>>> operation seems to be for performance reasons. >>>>> >>>>> I have found that you had placed an ISB before read counter in get_cycles >>>>> to prevent counter value to be speculated. But you haven't placed the >> second >>>>> ISB after reading. Is it because we haven't used the get_cycles in seq lock >>>>> critical context (Maybe I didn't find the right place)? So should we need to >> fix it >>>>> now, or you prefer to fix it now for future usage? >>>> >>>> Looking at the call sites, it doesn't look like we need any ISB after >>>> get_cycles as it is not used in any critical context. There is also a >>>> data dependency with the value returned by it. >> >> I am assuming you looked at all the users of NOW(). Is that right? >> >>>> >>>> So I am thinking we don't need any fix. At most we need an in-code comment? >>> >>> I agree with you to add an in-code comment. It will remind us in future when >> we >>> use the get_cycles in critical context. Adding it now will probably only lead to >>> meaningless performance degradation. >> >> I read this as there would be no perfomance impact if we add the >> ordering it now. Did you intend to say? > > Sorry about my English. I intended to say "Adding it now may introduce some > performance cost. And this performance cost may be not worth. Because Xen > may never use it in a similar scenario " Don't worry! I think the performance should not be noticeable if we use the same trick as Linux. >> In addition, AFAICT, the x86 version of get_cycles() is already able to >> provide that ordering. So there are chances that code may rely on it. >> >> While I don't necessarily agree to add barriers everywhere by default >> (this may have big impact on the platform). I think it is better to have >> an accurate number of cycles. >> > > As x86 had done it, I think it’s ok to do it for Arm. This will keep a function > behaves the same on different architectures. Just to be clear, I am not 100% sure this is what Intel is doing. Although this is my understanding of the comment in the code. @Stefano, what do you think? @Wei, assuming Stefano is happy with the proposal, would you be happy to send a patch for that? Best regards, -- Julien Grall
Hi Julien, > -----Original Message----- > From: Julien Grall <julien@xen.org> > Sent: 2020年12月3日 2:11 > To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini <sstabellini@kernel.org> > Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd > <nd@arm.com> > Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > > > On 26/11/2020 11:27, Wei Chen wrote: > > Hi Julien, > > Hi Wei, > > >> -----Original Message----- > >> From: Julien Grall <julien@xen.org> > >> Sent: 2020年11月26日 18:55 > >> To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini > <sstabellini@kernel.org> > >> Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > >> Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > >> <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd > >> <nd@arm.com> > >> Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > >> > >> Hi Wei, > >> > >> Your e-mail font seems to be different to the usual plain text one. Are > >> you sending the e-mail using HTML by any chance? > >> > > > > It's strange, I always use the plain text. > > Maybe exchange decided to mangle the e-mail :). Anyway, this new message > looks fine. > > > > >> On 26/11/2020 02:07, Wei Chen wrote: > >>> Hi Stefano, > >>> > >>>> -----Original Message----- > >>>> From: Stefano Stabellini <sstabellini@kernel.org> > >>>> Sent: 2020��11��26�� 8:00 > >>>> To: Wei Chen <Wei.Chen@arm.com> > >>>> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; > >> xen- > >>>> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara > >>>> <Andre.Przywara@arm.com>; Bertrand Marquis > >> <Bertrand.Marquis@arm.com>; > >>>> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com> > >>>> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 > workaround > >>>> > >>>> Resuming this old thread. > >>>> > >>>> On Fri, 13 Nov 2020, Wei Chen wrote: > >>>>>> Hi, > >>>>>> > >>>>>> On 09/11/2020 08:21, Penny Zheng wrote: > >>>>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > >>>>>>> might return a wrong value when the counter crosses a 32bit boundary. > >>>>>>> > >>>>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0, > >>>>>>> and it also should be the Guest OS's responsibility to deal with > >>>>>>> this part. > >>>>>>> > >>>>>>> But for CNTPCT, there exists several cases in Xen involving reading > >>>>>>> CNTPCT, so a possible workaround is that performing the read twice, > >>>>>>> and to return one or the other depending on whether a transition has > >>>>>>> taken place. > >>>>>>> > >>>>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com> > >>>>>> > >>>>>> Acked-by: Julien Grall <jgrall@amazon.com> > >>>>>> > >>>>>> On a related topic, do we need a fix similar to Linux commit > >>>>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur > >>>>>> with seqlock held"? > >>>>>> > >>>>> > >>>>> I take a look at this Linux commit, it seems to prevent the seq-lock to be > >>>>> speculated. Using an enforce ordering instead of ISB after the read > counter > >>>>> operation seems to be for performance reasons. > >>>>> > >>>>> I have found that you had placed an ISB before read counter in get_cycles > >>>>> to prevent counter value to be speculated. But you haven't placed the > >> second > >>>>> ISB after reading. Is it because we haven't used the get_cycles in seq lock > >>>>> critical context (Maybe I didn't find the right place)? So should we need to > >> fix it > >>>>> now, or you prefer to fix it now for future usage? > >>>> > >>>> Looking at the call sites, it doesn't look like we need any ISB after > >>>> get_cycles as it is not used in any critical context. There is also a > >>>> data dependency with the value returned by it. > >> > >> I am assuming you looked at all the users of NOW(). Is that right? > >> > >>>> > >>>> So I am thinking we don't need any fix. At most we need an in-code > comment? > >>> > >>> I agree with you to add an in-code comment. It will remind us in future > when > >> we > >>> use the get_cycles in critical context. Adding it now will probably only lead > to > >>> meaningless performance degradation. > >> > >> I read this as there would be no perfomance impact if we add the > >> ordering it now. Did you intend to say? > > > > Sorry about my English. I intended to say "Adding it now may introduce some > > performance cost. And this performance cost may be not worth. Because Xen > > may never use it in a similar scenario " > > Don't worry! I think the performance should not be noticeable if we use > the same trick as Linux. > > >> In addition, AFAICT, the x86 version of get_cycles() is already able to > >> provide that ordering. So there are chances that code may rely on it. > >> > >> While I don't necessarily agree to add barriers everywhere by default > >> (this may have big impact on the platform). I think it is better to have > >> an accurate number of cycles. > >> > > > > As x86 had done it, I think it’s ok to do it for Arm. This will keep a function > > behaves the same on different architectures. > > Just to be clear, I am not 100% sure this is what Intel is doing. > Although this is my understanding of the comment in the code. > > @Stefano, what do you think? > > @Wei, assuming Stefano is happy with the proposal, would you be happy to > send a patch for that? > Of course, I am willing to do that. It seems the enforce_ordering patch has been merged. And Vincenzo reported the enforce_ordering method will have ~4.5 performance improvement[1] (Compare to ISB). So I will use enforce_ordering method directly instead of using ISB. [1]https://lkml.org/lkml/2020/3/13/645 > Best regards, > > -- > Julien Grall
On Thu, 3 Dec 2020, Wei Chen wrote: > Hi Julien, > > > -----Original Message----- > > From: Julien Grall <julien@xen.org> > > Sent: 2020年12月3日 2:11 > > To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini <sstabellini@kernel.org> > > Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > > Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > > <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd > > <nd@arm.com> > > Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > > > > > > > On 26/11/2020 11:27, Wei Chen wrote: > > > Hi Julien, > > > > Hi Wei, > > > > >> -----Original Message----- > > >> From: Julien Grall <julien@xen.org> > > >> Sent: 2020年11月26日 18:55 > > >> To: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini > > <sstabellini@kernel.org> > > >> Cc: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org; > > >> Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > > >> <Bertrand.Marquis@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd > > >> <nd@arm.com> > > >> Subject: Re: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > >> > > >> Hi Wei, > > >> > > >> Your e-mail font seems to be different to the usual plain text one. Are > > >> you sending the e-mail using HTML by any chance? > > >> > > > > > > It's strange, I always use the plain text. > > > > Maybe exchange decided to mangle the e-mail :). Anyway, this new message > > looks fine. > > > > > > > >> On 26/11/2020 02:07, Wei Chen wrote: > > >>> Hi Stefano, > > >>> > > >>>> -----Original Message----- > > >>>> From: Stefano Stabellini <sstabellini@kernel.org> > > >>>> Sent: 2020??????11??????26?????? 8:00 > > >>>> To: Wei Chen <Wei.Chen@arm.com> > > >>>> Cc: Julien Grall <julien@xen.org>; Penny Zheng <Penny.Zheng@arm.com>; > > >> xen- > > >>>> devel@lists.xenproject.org; sstabellini@kernel.org; Andre Przywara > > >>>> <Andre.Przywara@arm.com>; Bertrand Marquis > > >> <Bertrand.Marquis@arm.com>; > > >>>> Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com> > > >>>> Subject: RE: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 > > workaround > > >>>> > > >>>> Resuming this old thread. > > >>>> > > >>>> On Fri, 13 Nov 2020, Wei Chen wrote: > > >>>>>> Hi, > > >>>>>> > > >>>>>> On 09/11/2020 08:21, Penny Zheng wrote: > > >>>>>>> CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > > >>>>>>> might return a wrong value when the counter crosses a 32bit boundary. > > >>>>>>> > > >>>>>>> Until now, there is no case for Xen itself to access CNTVCT_EL0, > > >>>>>>> and it also should be the Guest OS's responsibility to deal with > > >>>>>>> this part. > > >>>>>>> > > >>>>>>> But for CNTPCT, there exists several cases in Xen involving reading > > >>>>>>> CNTPCT, so a possible workaround is that performing the read twice, > > >>>>>>> and to return one or the other depending on whether a transition has > > >>>>>>> taken place. > > >>>>>>> > > >>>>>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com> > > >>>>>> > > >>>>>> Acked-by: Julien Grall <jgrall@amazon.com> > > >>>>>> > > >>>>>> On a related topic, do we need a fix similar to Linux commit > > >>>>>> 75a19a0202db "arm64: arch_timer: Ensure counter register reads occur > > >>>>>> with seqlock held"? > > >>>>>> > > >>>>> > > >>>>> I take a look at this Linux commit, it seems to prevent the seq-lock to be > > >>>>> speculated. Using an enforce ordering instead of ISB after the read > > counter > > >>>>> operation seems to be for performance reasons. > > >>>>> > > >>>>> I have found that you had placed an ISB before read counter in get_cycles > > >>>>> to prevent counter value to be speculated. But you haven't placed the > > >> second > > >>>>> ISB after reading. Is it because we haven't used the get_cycles in seq lock > > >>>>> critical context (Maybe I didn't find the right place)? So should we need to > > >> fix it > > >>>>> now, or you prefer to fix it now for future usage? > > >>>> > > >>>> Looking at the call sites, it doesn't look like we need any ISB after > > >>>> get_cycles as it is not used in any critical context. There is also a > > >>>> data dependency with the value returned by it. > > >> > > >> I am assuming you looked at all the users of NOW(). Is that right? > > >> > > >>>> > > >>>> So I am thinking we don't need any fix. At most we need an in-code > > comment? > > >>> > > >>> I agree with you to add an in-code comment. It will remind us in future > > when > > >> we > > >>> use the get_cycles in critical context. Adding it now will probably only lead > > to > > >>> meaningless performance degradation. > > >> > > >> I read this as there would be no perfomance impact if we add the > > >> ordering it now. Did you intend to say? > > > > > > Sorry about my English. I intended to say "Adding it now may introduce some > > > performance cost. And this performance cost may be not worth. Because Xen > > > may never use it in a similar scenario " > > > > Don't worry! I think the performance should not be noticeable if we use > > the same trick as Linux. > > > > >> In addition, AFAICT, the x86 version of get_cycles() is already able to > > >> provide that ordering. So there are chances that code may rely on it. > > >> > > >> While I don't necessarily agree to add barriers everywhere by default > > >> (this may have big impact on the platform). I think it is better to have > > >> an accurate number of cycles. > > >> > > > > > > As x86 had done it, I think it’s ok to do it for Arm. This will keep a function > > > behaves the same on different architectures. > > > > Just to be clear, I am not 100% sure this is what Intel is doing. > > Although this is my understanding of the comment in the code. > > > > @Stefano, what do you think? > > > > @Wei, assuming Stefano is happy with the proposal, would you be happy to > > send a patch for that? > > > > Of course, I am willing to do that. It seems the enforce_ordering patch has been > merged. And Vincenzo reported the enforce_ordering method will have ~4.5 > performance improvement[1] (Compare to ISB). So I will use enforce_ordering > method directly instead of using ISB. > > [1]https://lkml.org/lkml/2020/3/13/645 If we can enforce ordering without adding an ISB, I am all for it.
Hi Penny, > -----Original Message----- > From: Penny Zheng <penny.zheng@arm.com> > Sent: 2020年11月9日 16:21 > To: xen-devel@lists.xenproject.org; sstabellini@kernel.org; julien@xen.org > Cc: Andre Przywara <Andre.Przywara@arm.com>; Bertrand Marquis > <Bertrand.Marquis@arm.com>; Wei Chen <Wei.Chen@arm.com>; Penny Zheng > <Penny.Zheng@arm.com>; Kaly Xin <Kaly.Xin@arm.com>; nd <nd@arm.com> > Subject: [PATCH] xen/arm: Add Cortex-A73 erratum 858921 workaround > > CNTVCT_EL0 or CNTPCT_EL0 counter read in Cortex-A73 (all versions) > might return a wrong value when the counter crosses a 32bit boundary. > > Until now, there is no case for Xen itself to access CNTVCT_EL0, > and it also should be the Guest OS's responsibility to deal with > this part. > > But for CNTPCT, there exists several cases in Xen involving reading > CNTPCT, so a possible workaround is that performing the read twice, > and to return one or the other depending on whether a transition has > taken place. > > Signed-off-by: Penny Zheng <penny.zheng@arm.com> Reviewed-by: Wei Chen <Wei.Chen@arm.com> Thanks, Wei Chen > --- > docs/misc/arm/silicon-errata.txt | 1 + > xen/arch/arm/Kconfig | 18 ++++++++++++++++++ > xen/arch/arm/cpuerrata.c | 8 ++++++++ > xen/arch/arm/vtimer.c | 2 +- > xen/include/asm-arm/cpuerrata.h | 2 ++ > xen/include/asm-arm/cpufeature.h | 3 ++- > xen/include/asm-arm/time.h | 22 +++++++++++++++++++++- > 7 files changed, 53 insertions(+), 3 deletions(-) > > diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt > index 1f18a9df58..552c4151d3 100644 > --- a/docs/misc/arm/silicon-errata.txt > +++ b/docs/misc/arm/silicon-errata.txt > @@ -51,6 +51,7 @@ stable hypervisors. > | ARM | Cortex-A57 | #1319537 | N/A | > | ARM | Cortex-A72 | #1319367 | N/A | > | ARM | Cortex-A72 | #853709 | N/A | > +| ARM | Cortex-A73 | #858921 | ARM_ERRATUM_858921 | > | ARM | Cortex-A76 | #1165522 | N/A | > | ARM | Neoverse-N1 | #1165522 | N/A > | ARM | MMU-500 | #842869 | N/A | > diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig > index 2777388265..f938dd21bd 100644 > --- a/xen/arch/arm/Kconfig > +++ b/xen/arch/arm/Kconfig > @@ -226,6 +226,24 @@ config ARM64_ERRATUM_834220 > > If unsure, say Y. > > +config ARM_ERRATUM_858921 > + bool "Cortex-A73: 858921: Possible wrong read value for CNTVCT or > CNTPCT" > + default y > + help > + This option adds an alternative code sequence to work around ARM > + erratum 858921 on Cortex-A73 (all versions). > + > + Affected Cortex-A73 might return wrong read value for CNTVCT or > CNTPCT > + when the counter crosses a 32bit boundary. > + > + The workaround involves performing the read twice, and to return > + one or the other value depending on whether a transition has taken > place. > + Please note that this does not necessarily enable the workaround, > + as it depends on the alternative framework, which will only patch > + the kernel if an affected CPU is detected. > + > + If unsure, say Y. > + > endmenu > > config ARM64_HARDEN_BRANCH_PREDICTOR > diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c > index 6731d873e8..567911d380 100644 > --- a/xen/arch/arm/cpuerrata.c > +++ b/xen/arch/arm/cpuerrata.c > @@ -469,6 +469,14 @@ static const struct arm_cpu_capabilities arm_errata[] = > { > .capability = ARM_SSBD, > .matches = has_ssbd_mitigation, > }, > +#endif > +#ifdef CONFIG_ARM_ERRATUM_858921 > + { > + /* Cortex-A73 (all versions) */ > + .desc = "ARM erratum 858921", > + .capability = ARM_WORKAROUND_858921, > + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), > + }, > #endif > { > /* Neoverse r0p0 - r2p0 */ > diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c > index 6d39fc944f..c2b27915c6 100644 > --- a/xen/arch/arm/vtimer.c > +++ b/xen/arch/arm/vtimer.c > @@ -62,7 +62,7 @@ static void virt_timer_expired(void *data) > > int domain_vtimer_init(struct domain *d, struct xen_arch_domainconfig > *config) > { > - d->arch.virt_timer_base.offset = READ_SYSREG64(CNTPCT_EL0); > + d->arch.virt_timer_base.offset = get_cycles(); > d->time_offset.seconds = ticks_to_ns(d->arch.virt_timer_base.offset - > boot_count); > do_div(d->time_offset.seconds, 1000000000); > > diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm- > arm/cpuerrata.h > index 88ef3ca934..8d7e7b9375 100644 > --- a/xen/include/asm-arm/cpuerrata.h > +++ b/xen/include/asm-arm/cpuerrata.h > @@ -28,6 +28,8 @@ static inline bool check_workaround_##erratum(void) > \ > CHECK_WORKAROUND_HELPER(766422, ARM32_WORKAROUND_766422, > CONFIG_ARM_32) > CHECK_WORKAROUND_HELPER(834220, ARM64_WORKAROUND_834220, > CONFIG_ARM_64) > CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD, CONFIG_ARM_SSBD) > +CHECK_WORKAROUND_HELPER(858921, ARM_WORKAROUND_858921, > + CONFIG_ARM_ERRATUM_858921) > > #undef CHECK_WORKAROUND_HELPER > > diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm- > arm/cpufeature.h > index 10878ead8a..016a9fe203 100644 > --- a/xen/include/asm-arm/cpufeature.h > +++ b/xen/include/asm-arm/cpufeature.h > @@ -45,8 +45,9 @@ > #define ARM_SSBD 7 > #define ARM_SMCCC_1_1 8 > #define ARM64_WORKAROUND_AT_SPECULATE 9 > +#define ARM_WORKAROUND_858921 10 > > -#define ARM_NCAPS 10 > +#define ARM_NCAPS 11 > > #ifndef __ASSEMBLY__ > > diff --git a/xen/include/asm-arm/time.h b/xen/include/asm-arm/time.h > index 9cb6f9b0b4..1b2c13614b 100644 > --- a/xen/include/asm-arm/time.h > +++ b/xen/include/asm-arm/time.h > @@ -3,6 +3,7 @@ > > #include <asm/sysregs.h> > #include <asm/system.h> > +#include <asm/cpuerrata.h> > > #define DT_MATCH_TIMER \ > DT_MATCH_COMPATIBLE("arm,armv7-timer"), \ > @@ -13,7 +14,26 @@ typedef uint64_t cycles_t; > static inline cycles_t get_cycles (void) > { > isb(); > - return READ_SYSREG64(CNTPCT_EL0); > + /* > + * ARM_WORKAROUND_858921: Cortex-A73 (all versions) counter read > + * can return a wrong value when the counter crosses a 32bit boundary. > + */ > + if ( !check_workaround_858921() ) > + return READ_SYSREG64(CNTPCT_EL0); > + else > + { > + /* > + * A recommended workaround for erratum 858921 is to: > + * 1- Read twice CNTPCT. > + * 2- Compare bit[32] of the two read values. > + * - If bit[32] is different, keep the old value. > + * - If bit[32] is the same, keep the new value. > + */ > + cycles_t old, new; > + old = READ_SYSREG64(CNTPCT_EL0); > + new = READ_SYSREG64(CNTPCT_EL0); > + return (((old ^ new) >> 32) & 1) ? old : new; > + } > } > > /* List of timer's IRQ */ > -- > 2.25.1
© 2016 - 2024 Red Hat, Inc.