[PATCH v8 20/28] KVM: arm64: Add clock support for the pKVM hyp

Vincent Donnefort posted 28 patches 1 month, 1 week ago
There is a newer version of this series
[PATCH v8 20/28] KVM: arm64: Add clock support for the pKVM hyp
Posted by Vincent Donnefort 1 month, 1 week ago
By default, the arm64 host kernel is using the arch timer as a source
for sched_clock. Conveniently, EL2 has access to that same counter,
allowing to generate clock values that are synchronized.

The clock needs nonetheless to be setup with the same slope values as
the kernel. Introducing at the same time trace_clock() which is expected
to be later configured by the hypervisor tracing.

Signed-off-by: Vincent Donnefort <vdonnefort@google.com>

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index e6be1f5d0967..d46621d936e3 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -146,5 +146,4 @@ extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
 extern unsigned long kvm_nvhe_sym(__icache_flags);
 extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
 extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
-
 #endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/clock.h b/arch/arm64/kvm/hyp/include/nvhe/clock.h
new file mode 100644
index 000000000000..9e152521f345
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/clock.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM64_KVM_HYP_NVHE_CLOCK_H
+#define __ARM64_KVM_HYP_NVHE_CLOCK_H
+#include <linux/types.h>
+
+#include <asm/kvm_hyp.h>
+
+#ifdef CONFIG_PKVM_TRACING
+void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc);
+u64 trace_clock(void);
+#else
+static inline void
+trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) { }
+static inline u64 trace_clock(void) { return 0; }
+#endif
+#endif
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index a244ec25f8c5..f55a9a17d38f 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -17,7 +17,7 @@ ccflags-y += -fno-stack-protector	\
 hostprogs := gen-hyprel
 HOST_EXTRACFLAGS += -I$(objtree)/include
 
-lib-objs := clear_page.o copy_page.o memcpy.o memset.o
+lib-objs := clear_page.o copy_page.o memcpy.o memset.o tishift.o
 lib-objs := $(addprefix ../../../lib/, $(lib-objs))
 
 CFLAGS_switch.nvhe.o += -Wno-override-init
@@ -29,6 +29,7 @@ hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
 	 ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
 hyp-obj-y += ../../../kernel/smccc-call.o
 hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
+hyp-obj-$(CONFIG_PKVM_TRACING) += clock.o
 hyp-obj-y += $(lib-objs)
 
 ##
diff --git a/arch/arm64/kvm/hyp/nvhe/clock.c b/arch/arm64/kvm/hyp/nvhe/clock.c
new file mode 100644
index 000000000000..600a300bece7
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/clock.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Google LLC
+ * Author: Vincent Donnefort <vdonnefort@google.com>
+ */
+
+#include <nvhe/clock.h>
+
+#include <asm/arch_timer.h>
+#include <asm/div64.h>
+
+static struct clock_data {
+	struct {
+		u32 mult;
+		u32 shift;
+		u64 epoch_ns;
+		u64 epoch_cyc;
+		u64 cyc_overflow64;
+	} data[2];
+	u64 cur;
+} trace_clock_data;
+
+static u64 __clock_mult_uint128(u64 cyc, u32 mult, u32 shift)
+{
+	__uint128_t ns = (__uint128_t)cyc * mult;
+
+	ns >>= shift;
+
+	return (u64)ns;
+}
+
+/* Does not guarantee no reader on the modified bank. */
+void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
+{
+	struct clock_data *clock = &trace_clock_data;
+	u64 bank = clock->cur ^ 1;
+
+	clock->data[bank].mult			= mult;
+	clock->data[bank].shift			= shift;
+	clock->data[bank].epoch_ns		= epoch_ns;
+	clock->data[bank].epoch_cyc		= epoch_cyc;
+	clock->data[bank].cyc_overflow64	= ULONG_MAX / mult;
+
+	smp_store_release(&clock->cur, bank);
+}
+
+/* Using host provided data. Do not use for anything else than debugging. */
+u64 trace_clock(void)
+{
+	struct clock_data *clock = &trace_clock_data;
+	u64 bank = smp_load_acquire(&clock->cur);
+	u64 cyc, ns;
+
+	cyc = __arch_counter_get_cntpct() - clock->data[bank].epoch_cyc;
+
+	if (likely(cyc < clock->data[bank].cyc_overflow64)) {
+		ns = cyc * clock->data[bank].mult;
+		ns >>= clock->data[bank].shift;
+	} else {
+		ns = __clock_mult_uint128(cyc, clock->data[bank].mult,
+					  clock->data[bank].shift);
+	}
+
+	return (u64)ns + clock->data[bank].epoch_ns;
+}
-- 
2.51.2.1041.gc1ab5b90ca-goog
Re: [PATCH v8 20/28] KVM: arm64: Add clock support for the pKVM hyp
Posted by Marc Zyngier 1 month ago
On Fri, 07 Nov 2025 09:38:32 +0000,
Vincent Donnefort <vdonnefort@google.com> wrote:
> 
> By default, the arm64 host kernel is using the arch timer as a source
> for sched_clock. Conveniently, EL2 has access to that same counter,
> allowing to generate clock values that are synchronized.
> 
> The clock needs nonetheless to be setup with the same slope values as
> the kernel. Introducing at the same time trace_clock() which is expected
> to be later configured by the hypervisor tracing.
> 
> Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
> 
> diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
> index e6be1f5d0967..d46621d936e3 100644
> --- a/arch/arm64/include/asm/kvm_hyp.h
> +++ b/arch/arm64/include/asm/kvm_hyp.h
> @@ -146,5 +146,4 @@ extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
>  extern unsigned long kvm_nvhe_sym(__icache_flags);
>  extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
>  extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
> -
>  #endif /* __ARM64_KVM_HYP_H__ */
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/clock.h b/arch/arm64/kvm/hyp/include/nvhe/clock.h
> new file mode 100644
> index 000000000000..9e152521f345
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp/include/nvhe/clock.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ARM64_KVM_HYP_NVHE_CLOCK_H
> +#define __ARM64_KVM_HYP_NVHE_CLOCK_H
> +#include <linux/types.h>
> +
> +#include <asm/kvm_hyp.h>
> +
> +#ifdef CONFIG_PKVM_TRACING
> +void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc);
> +u64 trace_clock(void);
> +#else
> +static inline void
> +trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) { }
> +static inline u64 trace_clock(void) { return 0; }
> +#endif
> +#endif
> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
> index a244ec25f8c5..f55a9a17d38f 100644
> --- a/arch/arm64/kvm/hyp/nvhe/Makefile
> +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
> @@ -17,7 +17,7 @@ ccflags-y += -fno-stack-protector	\
>  hostprogs := gen-hyprel
>  HOST_EXTRACFLAGS += -I$(objtree)/include
>  
> -lib-objs := clear_page.o copy_page.o memcpy.o memset.o
> +lib-objs := clear_page.o copy_page.o memcpy.o memset.o tishift.o
>  lib-objs := $(addprefix ../../../lib/, $(lib-objs))
>  
>  CFLAGS_switch.nvhe.o += -Wno-override-init
> @@ -29,6 +29,7 @@ hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
>  	 ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
>  hyp-obj-y += ../../../kernel/smccc-call.o
>  hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
> +hyp-obj-$(CONFIG_PKVM_TRACING) += clock.o
>  hyp-obj-y += $(lib-objs)
>  
>  ##
> diff --git a/arch/arm64/kvm/hyp/nvhe/clock.c b/arch/arm64/kvm/hyp/nvhe/clock.c
> new file mode 100644
> index 000000000000..600a300bece7
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp/nvhe/clock.c
> @@ -0,0 +1,65 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2025 Google LLC
> + * Author: Vincent Donnefort <vdonnefort@google.com>
> + */
> +
> +#include <nvhe/clock.h>
> +
> +#include <asm/arch_timer.h>
> +#include <asm/div64.h>
> +
> +static struct clock_data {
> +	struct {
> +		u32 mult;
> +		u32 shift;
> +		u64 epoch_ns;
> +		u64 epoch_cyc;
> +		u64 cyc_overflow64;
> +	} data[2];
> +	u64 cur;
> +} trace_clock_data;
> +
> +static u64 __clock_mult_uint128(u64 cyc, u32 mult, u32 shift)
> +{
> +	__uint128_t ns = (__uint128_t)cyc * mult;
> +
> +	ns >>= shift;
> +
> +	return (u64)ns;
> +}
> +
> +/* Does not guarantee no reader on the modified bank. */
> +void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
> +{
> +	struct clock_data *clock = &trace_clock_data;
> +	u64 bank = clock->cur ^ 1;
> +
> +	clock->data[bank].mult			= mult;
> +	clock->data[bank].shift			= shift;
> +	clock->data[bank].epoch_ns		= epoch_ns;
> +	clock->data[bank].epoch_cyc		= epoch_cyc;
> +	clock->data[bank].cyc_overflow64	= ULONG_MAX / mult;
> +
> +	smp_store_release(&clock->cur, bank);
> +}
> +
> +/* Using host provided data. Do not use for anything else than debugging. */
> +u64 trace_clock(void)
> +{
> +	struct clock_data *clock = &trace_clock_data;
> +	u64 bank = smp_load_acquire(&clock->cur);
> +	u64 cyc, ns;
> +
> +	cyc = __arch_counter_get_cntpct() - clock->data[bank].epoch_cyc;

I can only imagine that you don't care about the broken systems that
do not have a monotonic counter, and that will happily have their
counter swing back and forth?

Also, you may want to document why you are using the physical counter
instead of the virtual one. I guess that you don't want CNTVOFF_EL2 to
apply when HCR_EL2.E2H==0, but given that you never allow anything
else for pKVM, this is slightly odd.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.
Re: [PATCH v8 20/28] KVM: arm64: Add clock support for the pKVM hyp
Posted by Vincent Donnefort 4 weeks, 1 day ago
[...]

> > +/* Using host provided data. Do not use for anything else than debugging. */
> > +u64 trace_clock(void)
> > +{
> > +	struct clock_data *clock = &trace_clock_data;
> > +	u64 bank = smp_load_acquire(&clock->cur);
> > +	u64 cyc, ns;
> > +
> > +	cyc = __arch_counter_get_cntpct() - clock->data[bank].epoch_cyc;
> 
> I can only imagine that you don't care about the broken systems that
> do not have a monotonic counter, and that will happily have their
> counter swing back and forth?

I haven't used the _stable() variant for the affected devices... Hum this will
not be trivial from the hypervisor. I'll see what I can do.

> 
> Also, you may want to document why you are using the physical counter
> instead of the virtual one. I guess that you don't want CNTVOFF_EL2 to
> apply when HCR_EL2.E2H==0, but given that you never allow anything
> else for pKVM, this is slightly odd.

You mean I might as well use __arch_counter_get_cntvct() as CNTVOFF_EL2 will
always be ignored in the pKVM case?

> 
> Thanks,
> 
> 	M.
> 
> -- 
> Without deviation from the norm, progress is not possible.
Re: [PATCH v8 20/28] KVM: arm64: Add clock support for the pKVM hyp
Posted by Marc Zyngier 2 weeks, 5 days ago
On Thu, 20 Nov 2025 11:36:16 +0000,
Vincent Donnefort <vdonnefort@google.com> wrote:
> 
> [...]
> 
> > > +/* Using host provided data. Do not use for anything else than debugging. */
> > > +u64 trace_clock(void)
> > > +{
> > > +	struct clock_data *clock = &trace_clock_data;
> > > +	u64 bank = smp_load_acquire(&clock->cur);
> > > +	u64 cyc, ns;
> > > +
> > > +	cyc = __arch_counter_get_cntpct() - clock->data[bank].epoch_cyc;
> > 
> > I can only imagine that you don't care about the broken systems that
> > do not have a monotonic counter, and that will happily have their
> > counter swing back and forth?
> 
> I haven't used the _stable() variant for the affected devices... Hum this will
> not be trivial from the hypervisor. I'll see what I can do.

In all honestly, simply disabling tracing when you don't have a stable
counter should be enough. I don't think there is much to be gained
with supporting that on sub-standard HW (the bar is low enough).

> > Also, you may want to document why you are using the physical counter
> > instead of the virtual one. I guess that you don't want CNTVOFF_EL2 to
> > apply when HCR_EL2.E2H==0, but given that you never allow anything
> > else for pKVM, this is slightly odd.
> 
> You mean I might as well use __arch_counter_get_cntvct() as CNTVOFF_EL2 will
> always be ignored in the pKVM case?

That, plus the fact that CNTVOFF_EL2 is only evaluated at EL2 when
E2H=0 (and that case is going to disappear over time).

	M.

-- 
Jazz isn't dead. It just smells funny.