[PATCH v8 22/28] KVM: arm64: Add trace remote for the pKVM hyp

Vincent Donnefort posted 28 patches 1 month, 1 week ago
There is a newer version of this series
[PATCH v8 22/28] KVM: arm64: Add trace remote for the pKVM hyp
Posted by Vincent Donnefort 1 month, 1 week ago
When running with KVM protected mode, the hypervisor is able to generate
events into tracefs compatible ring-buffers. Create a trace remote so
the kernel can read those buffers.

This currently doesn't provide any event support which will come later.

Signed-off-by: Vincent Donnefort <vdonnefort@google.com>

diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 580426cdbe77..64db254f0448 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -87,6 +87,7 @@ config PKVM_TRACING
 	bool
 	depends on KVM
 	depends on TRACING
+	select TRACE_REMOTE
 	select SIMPLE_RING_BUFFER
 	default y
 
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 3ebc0570345c..2c184e3abd8e 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -30,6 +30,8 @@ kvm-$(CONFIG_HW_PERF_EVENTS)  += pmu-emul.o pmu.o
 kvm-$(CONFIG_ARM64_PTR_AUTH)  += pauth.o
 kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
 
+kvm-$(CONFIG_PKVM_TRACING) += hyp_trace.o
+
 always-y := hyp_constants.h hyp-constants.s
 
 define rule_gen_hyp_constants
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 870953b4a8a7..c485e54417e2 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -24,6 +24,7 @@
 
 #define CREATE_TRACE_POINTS
 #include "trace_arm.h"
+#include "hyp_trace.h"
 
 #include <linux/uaccess.h>
 #include <asm/ptrace.h>
@@ -2345,6 +2346,9 @@ static int __init init_subsystems(void)
 
 	kvm_register_perf_callbacks(NULL);
 
+	err = hyp_trace_init();
+	if (err)
+		kvm_err("Failed to initialize Hyp tracing\n");
 out:
 	if (err)
 		hyp_cpu_pm_exit();
diff --git a/arch/arm64/kvm/hyp_trace.c b/arch/arm64/kvm/hyp_trace.c
new file mode 100644
index 000000000000..98051c3fb0c2
--- /dev/null
+++ b/arch/arm64/kvm/hyp_trace.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Google LLC
+ * Author: Vincent Donnefort <vdonnefort@google.com>
+ */
+
+#include <linux/trace_remote.h>
+#include <linux/simple_ring_buffer.h>
+
+#include <asm/kvm_host.h>
+#include <asm/kvm_hyptrace.h>
+
+#include "hyp_trace.h"
+
+/* Access to this struct within the trace_remote_callbacks are protected by the trace_remote lock */
+static struct hyp_trace_buffer {
+	struct hyp_trace_desc	*desc;
+	size_t			desc_size;
+} trace_buffer;
+
+static int hyp_trace_buffer_alloc_bpages_backing(struct hyp_trace_buffer *trace_buffer, size_t size)
+{
+	int nr_bpages = (PAGE_ALIGN(size) / PAGE_SIZE) + 1;
+	size_t backing_size;
+	void *start;
+
+	backing_size = PAGE_ALIGN(sizeof(struct simple_buffer_page) * nr_bpages *
+				  num_possible_cpus());
+
+	start = alloc_pages_exact(backing_size, GFP_KERNEL_ACCOUNT);
+	if (!start)
+		return -ENOMEM;
+
+	trace_buffer->desc->bpages_backing_start = (unsigned long)start;
+	trace_buffer->desc->bpages_backing_size = backing_size;
+
+	return 0;
+}
+
+static void hyp_trace_buffer_free_bpages_backing(struct hyp_trace_buffer *trace_buffer)
+{
+	free_pages_exact((void *)trace_buffer->desc->bpages_backing_start,
+			 trace_buffer->desc->bpages_backing_size);
+}
+
+static int __load_page(unsigned long va)
+{
+	return kvm_call_hyp_nvhe(__pkvm_host_share_hyp, virt_to_pfn((void *)va), 1);
+}
+
+static void __unload_page(unsigned long va)
+{
+	WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, virt_to_pfn((void *)va), 1));
+}
+
+static void hyp_trace_buffer_unload_pages(struct hyp_trace_buffer *trace_buffer, int last_cpu)
+{
+	struct ring_buffer_desc *rb_desc;
+	int cpu, p;
+
+	for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) {
+		if (cpu > last_cpu)
+			break;
+
+		__unload_page(rb_desc->meta_va);
+		for (p = 0; p < rb_desc->nr_page_va; p++)
+			__unload_page(rb_desc->page_va[p]);
+	}
+}
+
+static int hyp_trace_buffer_load_pages(struct hyp_trace_buffer *trace_buffer)
+{
+	struct ring_buffer_desc *rb_desc;
+	int cpu, p, ret = 0;
+
+	for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) {
+		ret = __load_page(rb_desc->meta_va);
+		if (ret)
+			break;
+
+		for (p = 0; p < rb_desc->nr_page_va; p++) {
+			ret = __load_page(rb_desc->page_va[p]);
+			if (ret)
+				break;
+		}
+
+		if (ret) {
+			for (p--; p >= 0; p--)
+				__unload_page(rb_desc->page_va[p]);
+			break;
+		}
+	}
+
+	if (ret)
+		hyp_trace_buffer_unload_pages(trace_buffer, cpu--);
+
+	return ret;
+}
+
+static struct trace_buffer_desc *hyp_trace_load(unsigned long size, void *priv)
+{
+	struct hyp_trace_buffer *trace_buffer = priv;
+	struct hyp_trace_desc *desc;
+	size_t desc_size;
+	int ret;
+
+	if (WARN_ON(trace_buffer->desc))
+		return ERR_PTR(-EINVAL);
+
+	desc_size = trace_buffer_desc_size(size, num_possible_cpus());
+	if (desc_size == SIZE_MAX)
+		return ERR_PTR(-E2BIG);
+
+	/*
+	 * The hypervisor will unmap the descriptor from the host to protect the reading. Page
+	 * granularity for the allocation ensures no other useful data will be unmapped.
+	 */
+	desc_size = PAGE_ALIGN(desc_size);
+	desc = (struct hyp_trace_desc *)alloc_pages_exact(desc_size, GFP_KERNEL);
+	if (!desc)
+		return ERR_PTR(-ENOMEM);
+
+	trace_buffer->desc = desc;
+
+	ret = hyp_trace_buffer_alloc_bpages_backing(trace_buffer, size);
+	if (ret)
+		goto err_free_desc;
+
+	ret = trace_remote_alloc_buffer(&desc->trace_buffer_desc, desc_size, size,
+					cpu_possible_mask);
+	if (ret)
+		goto err_free_backing;
+
+	ret = hyp_trace_buffer_load_pages(trace_buffer);
+	if (ret)
+		goto err_free_buffer;
+
+	ret = kvm_call_hyp_nvhe(__pkvm_load_tracing, (unsigned long)desc, desc_size);
+	if (ret)
+		goto err_unload_pages;
+
+	return &desc->trace_buffer_desc;
+
+err_unload_pages:
+	hyp_trace_buffer_unload_pages(trace_buffer, INT_MAX);
+
+err_free_buffer:
+	trace_remote_free_buffer(&desc->trace_buffer_desc);
+
+err_free_backing:
+	hyp_trace_buffer_free_bpages_backing(trace_buffer);
+
+err_free_desc:
+	free_pages_exact(desc, desc_size);
+	trace_buffer->desc = NULL;
+
+	return ERR_PTR(ret);
+}
+
+static void hyp_trace_unload(struct trace_buffer_desc *desc, void *priv)
+{
+	struct hyp_trace_buffer *trace_buffer = priv;
+
+	if (WARN_ON(desc != &trace_buffer->desc->trace_buffer_desc))
+		return;
+
+	kvm_call_hyp_nvhe(__pkvm_unload_tracing);
+	hyp_trace_buffer_unload_pages(trace_buffer, INT_MAX);
+	trace_remote_free_buffer(desc);
+	hyp_trace_buffer_free_bpages_backing(trace_buffer);
+	free_pages_exact(trace_buffer->desc, trace_buffer->desc_size);
+	trace_buffer->desc = NULL;
+}
+
+static int hyp_trace_enable_tracing(bool enable, void *priv)
+{
+	return kvm_call_hyp_nvhe(__pkvm_enable_tracing, enable);
+}
+
+static int hyp_trace_swap_reader_page(unsigned int cpu, void *priv)
+{
+	return kvm_call_hyp_nvhe(__pkvm_swap_reader_tracing, cpu);
+}
+
+static int hyp_trace_reset(unsigned int cpu, void *priv)
+{
+	return 0;
+}
+
+static int hyp_trace_enable_event(unsigned short id, bool enable, void *priv)
+{
+	return 0;
+}
+
+static struct trace_remote_callbacks trace_remote_callbacks = {
+	.load_trace_buffer	= hyp_trace_load,
+	.unload_trace_buffer	= hyp_trace_unload,
+	.enable_tracing		= hyp_trace_enable_tracing,
+	.swap_reader_page	= hyp_trace_swap_reader_page,
+	.reset			= hyp_trace_reset,
+	.enable_event		= hyp_trace_enable_event,
+};
+
+int hyp_trace_init(void)
+{
+	if (!is_protected_kvm_enabled())
+		return 0;
+
+	return trace_remote_register("hypervisor", &trace_remote_callbacks, &trace_buffer, NULL, 0);
+}
diff --git a/arch/arm64/kvm/hyp_trace.h b/arch/arm64/kvm/hyp_trace.h
new file mode 100644
index 000000000000..54d8b1f44ca5
--- /dev/null
+++ b/arch/arm64/kvm/hyp_trace.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ARM64_KVM_HYP_TRACE_H__
+#define __ARM64_KVM_HYP_TRACE_H__
+
+#ifdef CONFIG_PKVM_TRACING
+int hyp_trace_init(void);
+#else
+static inline int hyp_trace_init(void) { return 0; }
+#endif
+#endif
-- 
2.51.2.1041.gc1ab5b90ca-goog
Re: [PATCH v8 22/28] KVM: arm64: Add trace remote for the pKVM hyp
Posted by Marc Zyngier 1 month ago
On Fri, 07 Nov 2025 09:38:34 +0000,
Vincent Donnefort <vdonnefort@google.com> wrote:
> 
> When running with KVM protected mode, the hypervisor is able to generate
> events into tracefs compatible ring-buffers. Create a trace remote so
> the kernel can read those buffers.
> 
> This currently doesn't provide any event support which will come later.
> 
> Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
> 
> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> index 580426cdbe77..64db254f0448 100644
> --- a/arch/arm64/kvm/Kconfig
> +++ b/arch/arm64/kvm/Kconfig
> @@ -87,6 +87,7 @@ config PKVM_TRACING
>  	bool
>  	depends on KVM
>  	depends on TRACING
> +	select TRACE_REMOTE
>  	select SIMPLE_RING_BUFFER
>  	default y
>  
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index 3ebc0570345c..2c184e3abd8e 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -30,6 +30,8 @@ kvm-$(CONFIG_HW_PERF_EVENTS)  += pmu-emul.o pmu.o
>  kvm-$(CONFIG_ARM64_PTR_AUTH)  += pauth.o
>  kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
>  
> +kvm-$(CONFIG_PKVM_TRACING) += hyp_trace.o
> +
>  always-y := hyp_constants.h hyp-constants.s
>  
>  define rule_gen_hyp_constants
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 870953b4a8a7..c485e54417e2 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -24,6 +24,7 @@
>  
>  #define CREATE_TRACE_POINTS
>  #include "trace_arm.h"
> +#include "hyp_trace.h"
>  
>  #include <linux/uaccess.h>
>  #include <asm/ptrace.h>
> @@ -2345,6 +2346,9 @@ static int __init init_subsystems(void)
>  
>  	kvm_register_perf_callbacks(NULL);
>  
> +	err = hyp_trace_init();
> +	if (err)
> +		kvm_err("Failed to initialize Hyp tracing\n");
>  out:
>  	if (err)
>  		hyp_cpu_pm_exit();
> diff --git a/arch/arm64/kvm/hyp_trace.c b/arch/arm64/kvm/hyp_trace.c
> new file mode 100644
> index 000000000000..98051c3fb0c2
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp_trace.c
> @@ -0,0 +1,210 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2025 Google LLC
> + * Author: Vincent Donnefort <vdonnefort@google.com>
> + */
> +
> +#include <linux/trace_remote.h>
> +#include <linux/simple_ring_buffer.h>
> +
> +#include <asm/kvm_host.h>
> +#include <asm/kvm_hyptrace.h>
> +
> +#include "hyp_trace.h"
> +
> +/* Access to this struct within the trace_remote_callbacks are protected by the trace_remote lock */
> +static struct hyp_trace_buffer {
> +	struct hyp_trace_desc	*desc;
> +	size_t			desc_size;
> +} trace_buffer;
> +
> +static int hyp_trace_buffer_alloc_bpages_backing(struct hyp_trace_buffer *trace_buffer, size_t size)
> +{
> +	int nr_bpages = (PAGE_ALIGN(size) / PAGE_SIZE) + 1;
> +	size_t backing_size;
> +	void *start;
> +
> +	backing_size = PAGE_ALIGN(sizeof(struct simple_buffer_page) * nr_bpages *
> +				  num_possible_cpus());
> +
> +	start = alloc_pages_exact(backing_size, GFP_KERNEL_ACCOUNT);
> +	if (!start)
> +		return -ENOMEM;
> +
> +	trace_buffer->desc->bpages_backing_start = (unsigned long)start;
> +	trace_buffer->desc->bpages_backing_size = backing_size;
> +
> +	return 0;
> +}
> +
> +static void hyp_trace_buffer_free_bpages_backing(struct hyp_trace_buffer *trace_buffer)
> +{
> +	free_pages_exact((void *)trace_buffer->desc->bpages_backing_start,
> +			 trace_buffer->desc->bpages_backing_size);
> +}
> +
> +static int __load_page(unsigned long va)
> +{
> +	return kvm_call_hyp_nvhe(__pkvm_host_share_hyp, virt_to_pfn((void *)va), 1);
> +}

I struggle a bit with the nomenclature here. Why is that called
"load"? Surely this is a "map" operation, right? Is that because this
is called at "vcpu load" time? Something else?

Also, how is this working without pKVM, in a normal nVHE environment?
Being able to trace in nVHE is a basic requirement, and I don't see
how this works here.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.
Re: [PATCH v8 22/28] KVM: arm64: Add trace remote for the pKVM hyp
Posted by Vincent Donnefort 4 weeks, 1 day ago
On Wed, Nov 19, 2025 at 05:31:30PM +0000, Marc Zyngier wrote:
> On Fri, 07 Nov 2025 09:38:34 +0000,
> Vincent Donnefort <vdonnefort@google.com> wrote:
> > 
> > When running with KVM protected mode, the hypervisor is able to generate
> > events into tracefs compatible ring-buffers. Create a trace remote so
> > the kernel can read those buffers.
> > 
> > This currently doesn't provide any event support which will come later.
> > 
> > Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
> > 
> > diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> > index 580426cdbe77..64db254f0448 100644
> > --- a/arch/arm64/kvm/Kconfig
> > +++ b/arch/arm64/kvm/Kconfig
> > @@ -87,6 +87,7 @@ config PKVM_TRACING
> >  	bool
> >  	depends on KVM
> >  	depends on TRACING
> > +	select TRACE_REMOTE
> >  	select SIMPLE_RING_BUFFER
> >  	default y
> >

[...]

> > +static void hyp_trace_buffer_free_bpages_backing(struct hyp_trace_buffer *trace_buffer)
> > +{
> > +	free_pages_exact((void *)trace_buffer->desc->bpages_backing_start,
> > +			 trace_buffer->desc->bpages_backing_size);
> > +}
> > +
> > +static int __load_page(unsigned long va)
> > +{
> > +	return kvm_call_hyp_nvhe(__pkvm_host_share_hyp, virt_to_pfn((void *)va), 1);
> > +}
> 
> I struggle a bit with the nomenclature here. Why is that called
> "load"? Surely this is a "map" operation, right? Is that because this
> is called at "vcpu load" time? Something else?

I called "load" the operation of getting the tracing buffer ready. But for this
implementation specific part, I can use map/unmap here. (same for
hyp_trace_buffer_map_pages/hyp_trace_buffer_map_pages)

> 
> Also, how is this working without pKVM, in a normal nVHE environment?
> Being able to trace in nVHE is a basic requirement, and I don't see
> how this works here.

I can probably make it work with nVHE as well.

> 
> Thanks,
> 
> 	M.
> 
> -- 
> Without deviation from the norm, progress is not possible.