arch/arm64/include/asm/vdso/gettimeofday.h | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-)
While reading how `cntvct_el0` was read in the kernel, I found that
__arch_get_hw_counter() is doing something very similar to what
__arch_counter_get_cntvct() is already doing.
Use the existing __arch_counter_get_cntvct() function instead of
duplicating similar inline assembly code in __arch_get_hw_counter().
Both functions were performing nearly identical operations to read the
cntvct_el0 register. The only difference was that
__arch_get_hw_counter() included a memory clobber in its inline
assembly, which appears unnecessary in this context.
This change simplifies the code by eliminating duplicate functionality
and improves maintainability by centralizing the counter access logic in
a single implementation.
Signed-off-by: Breno Leitao <leitao@debian.org>
---
I'm sharing this code as an RFC since I'm not intimately familiar with
different arm platforms, and I want to double-check that I haven't
missed anything subtle.
---
arch/arm64/include/asm/vdso/gettimeofday.h | 22 ++--------------------
1 file changed, 2 insertions(+), 20 deletions(-)
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
index 92a2b59a9f3df..417b5b41b877d 100644
--- a/arch/arm64/include/asm/vdso/gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -11,6 +11,7 @@
#include <asm/barrier.h>
#include <asm/unistd.h>
#include <asm/sysreg.h>
+#include <asm/arch_timer.h>
#define VDSO_HAS_CLOCK_GETRES 1
@@ -69,8 +70,6 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
const struct vdso_time_data *vd)
{
- u64 res;
-
/*
* Core checks for mode already, so this raced against a concurrent
* update. Return something. Core will do another round and then
@@ -79,24 +78,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
if (clock_mode == VDSO_CLOCKMODE_NONE)
return 0;
- /*
- * If FEAT_ECV is available, use the self-synchronizing counter.
- * Otherwise the isb is required to prevent that the counter value
- * is speculated.
- */
- asm volatile(
- ALTERNATIVE("isb\n"
- "mrs %0, cntvct_el0",
- "nop\n"
- __mrs_s("%0", SYS_CNTVCTSS_EL0),
- ARM64_HAS_ECV)
- : "=r" (res)
- :
- : "memory");
-
- arch_counter_enforce_ordering(res);
-
- return res;
+ return __arch_counter_get_cntvct();
}
#endif /* !__ASSEMBLY__ */
---
base-commit: acc4d5ff0b61eb1715c498b6536c38c1feb7f3c1
change-id: 20250402-arm-vdso-c4b0c9f35300
Best regards,
--
Breno Leitao <leitao@debian.org>
On Wed, 02 Apr 2025 20:22:47 +0100, Breno Leitao <leitao@debian.org> wrote: > > While reading how `cntvct_el0` was read in the kernel, I found that > __arch_get_hw_counter() is doing something very similar to what > __arch_counter_get_cntvct() is already doing. > > Use the existing __arch_counter_get_cntvct() function instead of > duplicating similar inline assembly code in __arch_get_hw_counter(). > > Both functions were performing nearly identical operations to read the > cntvct_el0 register. The only difference was that > __arch_get_hw_counter() included a memory clobber in its inline > assembly, which appears unnecessary in this context. > > This change simplifies the code by eliminating duplicate functionality > and improves maintainability by centralizing the counter access logic in > a single implementation. > > Signed-off-by: Breno Leitao <leitao@debian.org> > --- > I'm sharing this code as an RFC since I'm not intimately familiar with > different arm platforms, and I want to double-check that I haven't > missed anything subtle. > --- > arch/arm64/include/asm/vdso/gettimeofday.h | 22 ++-------------------- > 1 file changed, 2 insertions(+), 20 deletions(-) > > diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h > index 92a2b59a9f3df..417b5b41b877d 100644 > --- a/arch/arm64/include/asm/vdso/gettimeofday.h > +++ b/arch/arm64/include/asm/vdso/gettimeofday.h > @@ -11,6 +11,7 @@ > #include <asm/barrier.h> > #include <asm/unistd.h> > #include <asm/sysreg.h> > +#include <asm/arch_timer.h> > > #define VDSO_HAS_CLOCK_GETRES 1 > > @@ -69,8 +70,6 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) > static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, > const struct vdso_time_data *vd) > { > - u64 res; > - > /* > * Core checks for mode already, so this raced against a concurrent > * update. Return something. Core will do another round and then > @@ -79,24 +78,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, > if (clock_mode == VDSO_CLOCKMODE_NONE) > return 0; > > - /* > - * If FEAT_ECV is available, use the self-synchronizing counter. > - * Otherwise the isb is required to prevent that the counter value > - * is speculated. > - */ > - asm volatile( > - ALTERNATIVE("isb\n" > - "mrs %0, cntvct_el0", > - "nop\n" > - __mrs_s("%0", SYS_CNTVCTSS_EL0), > - ARM64_HAS_ECV) > - : "=r" (res) > - : > - : "memory"); > - > - arch_counter_enforce_ordering(res); > - > - return res; > + return __arch_counter_get_cntvct(); I won't pretend I understand it all, but you really want to have a look at the link just above the arch_counter_enforce_ordering() definition, pasted below for your convenience: https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/ Dropping this ordering enforcement seems pretty adventurous unless you have very strong guarantees about the context this executes in. M. -- Jazz isn't dead. It just smells funny.
On Wed, 02 Apr 2025 23:04:41 +0100, Marc Zyngier <maz@kernel.org> wrote: > > On Wed, 02 Apr 2025 20:22:47 +0100, > Breno Leitao <leitao@debian.org> wrote: > > > > While reading how `cntvct_el0` was read in the kernel, I found that > > __arch_get_hw_counter() is doing something very similar to what > > __arch_counter_get_cntvct() is already doing. > > > > Use the existing __arch_counter_get_cntvct() function instead of > > duplicating similar inline assembly code in __arch_get_hw_counter(). > > > > Both functions were performing nearly identical operations to read the > > cntvct_el0 register. The only difference was that > > __arch_get_hw_counter() included a memory clobber in its inline > > assembly, which appears unnecessary in this context. > > > > This change simplifies the code by eliminating duplicate functionality > > and improves maintainability by centralizing the counter access logic in > > a single implementation. > > > > Signed-off-by: Breno Leitao <leitao@debian.org> > > --- > > I'm sharing this code as an RFC since I'm not intimately familiar with > > different arm platforms, and I want to double-check that I haven't > > missed anything subtle. > > --- > > arch/arm64/include/asm/vdso/gettimeofday.h | 22 ++-------------------- > > 1 file changed, 2 insertions(+), 20 deletions(-) > > > > diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h > > index 92a2b59a9f3df..417b5b41b877d 100644 > > --- a/arch/arm64/include/asm/vdso/gettimeofday.h > > +++ b/arch/arm64/include/asm/vdso/gettimeofday.h > > @@ -11,6 +11,7 @@ > > #include <asm/barrier.h> > > #include <asm/unistd.h> > > #include <asm/sysreg.h> > > +#include <asm/arch_timer.h> > > > > #define VDSO_HAS_CLOCK_GETRES 1 > > > > @@ -69,8 +70,6 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) > > static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, > > const struct vdso_time_data *vd) > > { > > - u64 res; > > - > > /* > > * Core checks for mode already, so this raced against a concurrent > > * update. Return something. Core will do another round and then > > @@ -79,24 +78,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, > > if (clock_mode == VDSO_CLOCKMODE_NONE) > > return 0; > > > > - /* > > - * If FEAT_ECV is available, use the self-synchronizing counter. > > - * Otherwise the isb is required to prevent that the counter value > > - * is speculated. > > - */ > > - asm volatile( > > - ALTERNATIVE("isb\n" > > - "mrs %0, cntvct_el0", > > - "nop\n" > > - __mrs_s("%0", SYS_CNTVCTSS_EL0), > > - ARM64_HAS_ECV) > > - : "=r" (res) > > - : > > - : "memory"); > > - > > - arch_counter_enforce_ordering(res); > > - > > - return res; > > + return __arch_counter_get_cntvct(); > > I won't pretend I understand it all, but you really want to have a > look at the link just above the arch_counter_enforce_ordering() > definition, pasted below for your convenience: > > https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/ > > Dropping this ordering enforcement seems pretty adventurous unless you > have very strong guarantees about the context this executes in. Ah, I appear to have misread this patch, and __arch_counter_get_cntvct() does have the same ordering requirements. Apologies for the noise. M. -- Jazz isn't dead. It just smells funny.
Hello Marc, On Wed, Apr 02, 2025 at 11:22:51PM +0100, Marc Zyngier wrote: > > > - arch_counter_enforce_ordering(res); > > > - > > > - return res; > > > + return __arch_counter_get_cntvct(); > > > > I won't pretend I understand it all, but you really want to have a > > look at the link just above the arch_counter_enforce_ordering() > > definition, pasted below for your convenience: > > > > https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/ > > > > Dropping this ordering enforcement seems pretty adventurous unless you > > have very strong guarantees about the context this executes in. > > Ah, I appear to have misread this patch, and > __arch_counter_get_cntvct() does have the same ordering requirements. Right, I've originally ensured that this part remained unchanged, with one notable exception. The __arch_counter_get_cntvct() function does not mark memory as clobbered, whereas the original code did. The original code, which is being removed, used the following assembly construction: asm volatile( ALTERNATIVE("isb\n mrs %0, cntvct_el0", "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0), ARM64_HAS_ECV) : "=r" (res) : : "memory"); This code explicitly marked memory as clobbered using the "memory" clobber specifier. In contrast, the __arch_counter_get_cntvct() uses a similar assembly instruction, but without the memory clobber specifier: asm volatile( ALTERNATIVE("isb\n mrs %0, cntvct_el0", "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0), ARM64_HAS_ECV) : "=r" (cnt)); From my analysis, I understand that memory clobbering is not necessary in this case. The assembly instruction only accesses registers and does not modify memory. The use of explicit memory variable (res/cnt) in the assembly code ensures that memory is safe. Other than that, nothing else changes. > Apologies for the noise. Since you created *all* this noise regarding instruction ordering, can I pick your brain in the same topic? :-P If my machine has Speculation Barrier (sb)[1] support, is it a good replacement for `isb` ? Do you happen to know? [1] https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/SB--Speculation-Barrier- Thanks for your review! --breno
On Thu, 03 Apr 2025 13:14:49 +0100, Breno Leitao <leitao@debian.org> wrote: > > Since you created *all* this noise regarding instruction ordering, can > I pick your brain in the same topic? :-P > > If my machine has Speculation Barrier (sb)[1] support, is it a good > replacement for `isb` ? Do you happen to know? Probably not. SB prevent speculation past it, while ISB is here to enforce ordering. We're pretty happy to let the CPU speculate the counter, as long as it does it the order we have defined. On some implementation, this can be have a similar effect (drain the fetch queue, restart). But the intent clearly isn't the same, and some implementations may do things differently. In any case, what you want is CNTVCTSS_EL0 (part of FEAT_ECV), which does away with all barriers. M. -- Jazz isn't dead. It just smells funny.
© 2016 - 2025 Red Hat, Inc.