[PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints

Maxim Levitsky posted 3 patches 1 year, 5 months ago
[PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints
Posted by Maxim Levitsky 1 year, 5 months ago
Add 3 new tracepoints for nested VM exits which are intended
to capture extra information to gain insights about the nested guest
behavior.

The new tracepoints are:

- kvm_nested_msr
- kvm_nested_hypercall

These tracepoints capture extra register state to be able to know
which MSR or which hypercall was done.

- kvm_nested_page_fault

This tracepoint allows to capture extra info about which host pagefault
error code caused the nested page fault.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 22 +++++++++++
 arch/x86/kvm/trace.h      | 82 +++++++++++++++++++++++++++++++++++++--
 arch/x86/kvm/vmx/nested.c | 27 +++++++++++++
 arch/x86/kvm/x86.c        |  3 ++
 4 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 6f704c1037e51..2020307481553 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -38,6 +38,8 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb *vmcb = svm->vmcb;
+	u64 host_error_code = vmcb->control.exit_info_1;
+
 
 	if (vmcb->control.exit_code != SVM_EXIT_NPF) {
 		/*
@@ -48,11 +50,15 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
 		vmcb->control.exit_code_hi = 0;
 		vmcb->control.exit_info_1 = (1ULL << 32);
 		vmcb->control.exit_info_2 = fault->address;
+		host_error_code = 0;
 	}
 
 	vmcb->control.exit_info_1 &= ~0xffffffffULL;
 	vmcb->control.exit_info_1 |= fault->error_code;
 
+	trace_kvm_nested_page_fault(fault->address, host_error_code,
+				    fault->error_code);
+
 	nested_svm_vmexit(svm);
 }
 
@@ -1126,6 +1132,22 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 				       vmcb12->control.exit_int_info_err,
 				       KVM_ISA_SVM);
 
+	/* Collect some info about nested VM exits */
+	switch (vmcb12->control.exit_code) {
+	case SVM_EXIT_MSR:
+		trace_kvm_nested_msr(vmcb12->control.exit_info_1 == 1,
+				     kvm_rcx_read(vcpu),
+				     (vmcb12->save.rax & 0xFFFFFFFFull) |
+				     (((u64)kvm_rdx_read(vcpu) << 32)));
+		break;
+	case SVM_EXIT_VMMCALL:
+		trace_kvm_nested_hypercall(vmcb12->save.rax,
+					   kvm_rbx_read(vcpu),
+					   kvm_rcx_read(vcpu),
+					   kvm_rdx_read(vcpu));
+		break;
+	}
+
 	kvm_vcpu_unmap(vcpu, &map, true);
 
 	nested_svm_transition_tlb_flush(vcpu);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 5a5b7757e8456..6074b4f85d5e2 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -613,7 +613,7 @@ TRACE_EVENT(kvm_pv_eoi,
 );
 
 /*
- * Tracepoint for nested VMRUN
+ * Tracepoint for nested VMRUN/VMENTER
  */
 TRACE_EVENT(kvm_nested_vmenter,
 	    TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
@@ -746,8 +746,84 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
 	TP_printk("rip: 0x%016llx", __entry->rip)
 );
 
+
 /*
- * Tracepoint for nested #vmexit because of interrupt pending
+ * Tracepoint for nested guest MSR access.
+ */
+TRACE_EVENT(kvm_nested_msr,
+	TP_PROTO(bool write, u32 ecx, u64 data),
+	TP_ARGS(write, ecx, data),
+
+	TP_STRUCT__entry(
+		__field(	bool,		write		)
+		__field(	u32,		ecx		)
+		__field(	u64,		data		)
+	),
+
+	TP_fast_assign(
+		__entry->write		= write;
+		__entry->ecx		= ecx;
+		__entry->data		= data;
+	),
+
+	TP_printk("msr_%s %x = 0x%llx",
+		  __entry->write ? "write" : "read",
+		  __entry->ecx, __entry->data)
+);
+
+/*
+ * Tracepoint for nested hypercalls, capturing generic info about the
+ * hypercall
+ */
+
+TRACE_EVENT(kvm_nested_hypercall,
+	TP_PROTO(u64 rax, u64 rbx, u64 rcx, u64 rdx),
+	TP_ARGS(rax, rbx, rcx, rdx),
+
+	TP_STRUCT__entry(
+		__field(	u64, 	rax	)
+		__field(	u64,	rbx	)
+		__field(	u64,	rcx	)
+		__field(	u64,	rdx	)
+	),
+
+	TP_fast_assign(
+		__entry->rax		= rax;
+		__entry->rbx		= rbx;
+		__entry->rcx		= rcx;
+		__entry->rdx		= rdx;
+	),
+
+	TP_printk("rax 0x%llx rbx 0x%llx rcx 0x%llx rdx 0x%llx",
+		 __entry->rax, __entry->rbx, __entry->rcx,  __entry->rdx)
+);
+
+
+TRACE_EVENT(kvm_nested_page_fault,
+	TP_PROTO(u64 gpa, u64 host_error_code, u64 guest_error_code),
+	TP_ARGS(gpa, host_error_code, guest_error_code),
+
+	TP_STRUCT__entry(
+			__field(	u64,		gpa	)
+		__field(	u64,		host_error_code		)
+		__field(	u64,		guest_errror_code	)
+	),
+
+	TP_fast_assign(
+		__entry->gpa			= gpa;
+		__entry->host_error_code	= host_error_code;
+		__entry->guest_errror_code	= guest_error_code;
+	),
+
+	TP_printk("gpa 0x%llx host err 0x%llx guest err 0x%llx",
+		  __entry->gpa,
+		  __entry->host_error_code,
+		  __entry->guest_errror_code)
+);
+
+
+/*
+ * Tracepoint for invlpga
  */
 TRACE_EVENT(kvm_invlpga,
 	    TP_PROTO(__u64 rip, unsigned int asid, u64 address),
@@ -770,7 +846,7 @@ TRACE_EVENT(kvm_invlpga,
 );
 
 /*
- * Tracepoint for nested #vmexit because of interrupt pending
+ * Tracepoint for skinit
  */
 TRACE_EVENT(kvm_skinit,
 	    TP_PROTO(__u64 rip, __u32 slb),
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 2392a7ef254df..3881a02694fc2 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -454,6 +454,16 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
 		 */
 		nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
 					   fault->address);
+
+		/*
+		 * vmx_get_exit_qual() returns the original exit qualification,
+		 * before it was overridden with exit qualification that
+		 * is about to be injected to the guest.
+		 */
+
+		trace_kvm_nested_page_fault(fault->address,
+				vmx_get_exit_qual(vcpu),
+				exit_qualification);
 	}
 
 	nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
@@ -4985,6 +4995,23 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 						       vmcs12->vm_exit_intr_error_code,
 						       KVM_ISA_VMX);
 
+		switch ((u16)vmcs12->vm_exit_reason) {
+		case EXIT_REASON_MSR_READ:
+		case EXIT_REASON_MSR_WRITE:
+			trace_kvm_nested_msr(vmcs12->vm_exit_reason == EXIT_REASON_MSR_WRITE,
+					     kvm_rcx_read(vcpu),
+					     (kvm_rax_read(vcpu) & 0xFFFFFFFFull) |
+					     (((u64)kvm_rdx_read(vcpu)) << 32));
+			break;
+		case EXIT_REASON_VMCALL:
+			trace_kvm_nested_hypercall(kvm_rax_read(vcpu),
+						   kvm_rbx_read(vcpu),
+						   kvm_rcx_read(vcpu),
+						   kvm_rdx_read(vcpu));
+			break;
+
+		}
+
 		load_vmcs12_host_state(vcpu, vmcs12);
 
 		return;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f72e5d89e942d..cb01cf2ad6ac9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -14032,6 +14032,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_hypercall);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_page_fault);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_msr);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
-- 
2.26.3
Re: [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints
Posted by Paolo Bonzini 1 year, 1 month ago
On 9/10/24 22:03, Maxim Levitsky wrote:
> Add 3 new tracepoints for nested VM exits which are intended
> to capture extra information to gain insights about the nested guest
> behavior.
> 
> The new tracepoints are:
> 
> - kvm_nested_msr
> - kvm_nested_hypercall
> 
> These tracepoints capture extra register state to be able to know
> which MSR or which hypercall was done.
> 
> - kvm_nested_page_fault
> 
> This tracepoint allows to capture extra info about which host pagefault
> error code caused the nested page fault.
> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>   arch/x86/kvm/svm/nested.c | 22 +++++++++++
>   arch/x86/kvm/trace.h      | 82 +++++++++++++++++++++++++++++++++++++--
>   arch/x86/kvm/vmx/nested.c | 27 +++++++++++++
>   arch/x86/kvm/x86.c        |  3 ++
>   4 files changed, 131 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 6f704c1037e51..2020307481553 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -38,6 +38,8 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
>   {
>   	struct vcpu_svm *svm = to_svm(vcpu);
>   	struct vmcb *vmcb = svm->vmcb;
> +	u64 host_error_code = vmcb->control.exit_info_1;
> +
>   
>   	if (vmcb->control.exit_code != SVM_EXIT_NPF) {
>   		/*
> @@ -48,11 +50,15 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
>   		vmcb->control.exit_code_hi = 0;
>   		vmcb->control.exit_info_1 = (1ULL << 32);
>   		vmcb->control.exit_info_2 = fault->address;
> +		host_error_code = 0;
>   	}
>   
>   	vmcb->control.exit_info_1 &= ~0xffffffffULL;
>   	vmcb->control.exit_info_1 |= fault->error_code;
>   
> +	trace_kvm_nested_page_fault(fault->address, host_error_code,
> +				    fault->error_code);
> +

I disagree with Sean about trace_kvm_nested_page_fault.  It's a useful 
addition and it is easier to understand what's happening with a 
dedicated tracepoint (especially on VMX).

Tracepoint are not an exact science and they aren't entirely kernel API. 
  At least they can just go away at any time (changing them is a lot 
more tricky, but their presence is not guaranteed).  The one below has 
the slight ugliness of having to do some computation in 
nested_svm_vmexit(), this one should go in.

>   	nested_svm_vmexit(svm);
>   }
>   
> @@ -1126,6 +1132,22 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
>   				       vmcb12->control.exit_int_info_err,
>   				       KVM_ISA_SVM);
>   
> +	/* Collect some info about nested VM exits */
> +	switch (vmcb12->control.exit_code) {
> +	case SVM_EXIT_MSR:
> +		trace_kvm_nested_msr(vmcb12->control.exit_info_1 == 1,
> +				     kvm_rcx_read(vcpu),
> +				     (vmcb12->save.rax & 0xFFFFFFFFull) |
> +				     (((u64)kvm_rdx_read(vcpu) << 32)));
> +		break;
> +	case SVM_EXIT_VMMCALL:
> +		trace_kvm_nested_hypercall(vmcb12->save.rax,
> +					   kvm_rbx_read(vcpu),
> +					   kvm_rcx_read(vcpu),
> +					   kvm_rdx_read(vcpu));
> +		break;

Here I probably would have preferred an unconditional tracepoint giving 
RAX/RBX/RCX/RDX after a nested vmexit.  This is not exactly what Sean 
wanted but perhaps it strikes a middle ground?  I know you wrote this 
for a debugging tool, do you really need to have everything in a single 
tracepoint, or can you correlate the existing exit tracepoint with this 
hypothetical trace_kvm_nested_exit_regs, to pick RDMSR vs. WRMSR?

Paolo
Re: [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints
Posted by Maxim Levitsky 1 year, 1 month ago
On Thu, 2024-12-19 at 18:33 +0100, Paolo Bonzini wrote:
> On 9/10/24 22:03, Maxim Levitsky wrote:
> > Add 3 new tracepoints for nested VM exits which are intended
> > to capture extra information to gain insights about the nested guest
> > behavior.
> > 
> > The new tracepoints are:
> > 
> > - kvm_nested_msr
> > - kvm_nested_hypercall
> > 
> > These tracepoints capture extra register state to be able to know
> > which MSR or which hypercall was done.
> > 
> > - kvm_nested_page_fault
> > 
> > This tracepoint allows to capture extra info about which host pagefault
> > error code caused the nested page fault.
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >   arch/x86/kvm/svm/nested.c | 22 +++++++++++
> >   arch/x86/kvm/trace.h      | 82 +++++++++++++++++++++++++++++++++++++--
> >   arch/x86/kvm/vmx/nested.c | 27 +++++++++++++
> >   arch/x86/kvm/x86.c        |  3 ++
> >   4 files changed, 131 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> > index 6f704c1037e51..2020307481553 100644
> > --- a/arch/x86/kvm/svm/nested.c
> > +++ b/arch/x86/kvm/svm/nested.c
> > @@ -38,6 +38,8 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
> >   {
> >   	struct vcpu_svm *svm = to_svm(vcpu);
> >   	struct vmcb *vmcb = svm->vmcb;
> > +	u64 host_error_code = vmcb->control.exit_info_1;
> > +
> >   
> >   	if (vmcb->control.exit_code != SVM_EXIT_NPF) {
> >   		/*
> > @@ -48,11 +50,15 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
> >   		vmcb->control.exit_code_hi = 0;
> >   		vmcb->control.exit_info_1 = (1ULL << 32);
> >   		vmcb->control.exit_info_2 = fault->address;
> > +		host_error_code = 0;
> >   	}
> >   
> >   	vmcb->control.exit_info_1 &= ~0xffffffffULL;
> >   	vmcb->control.exit_info_1 |= fault->error_code;
> >   
> > +	trace_kvm_nested_page_fault(fault->address, host_error_code,
> > +				    fault->error_code);
> > +
> 
> I disagree with Sean about trace_kvm_nested_page_fault.  It's a useful 
> addition and it is easier to understand what's happening with a 
> dedicated tracepoint (especially on VMX).
> 
> Tracepoint are not an exact science and they aren't entirely kernel API. 
>   At least they can just go away at any time (changing them is a lot 
> more tricky, but their presence is not guaranteed).  The one below has 
> the slight ugliness of having to do some computation in 
> nested_svm_vmexit(), this one should go in.
> 
> >   	nested_svm_vmexit(svm);
> >   }
> >   
> > @@ -1126,6 +1132,22 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
> >   				       vmcb12->control.exit_int_info_err,
> >   				       KVM_ISA_SVM);
> >   
> > +	/* Collect some info about nested VM exits */
> > +	switch (vmcb12->control.exit_code) {
> > +	case SVM_EXIT_MSR:
> > +		trace_kvm_nested_msr(vmcb12->control.exit_info_1 == 1,
> > +				     kvm_rcx_read(vcpu),
> > +				     (vmcb12->save.rax & 0xFFFFFFFFull) |
> > +				     (((u64)kvm_rdx_read(vcpu) << 32)));
> > +		break;
> > +	case SVM_EXIT_VMMCALL:
> > +		trace_kvm_nested_hypercall(vmcb12->save.rax,
> > +					   kvm_rbx_read(vcpu),
> > +					   kvm_rcx_read(vcpu),
> > +					   kvm_rdx_read(vcpu));
> > +		break;
> 
> Here I probably would have preferred an unconditional tracepoint giving 
> RAX/RBX/RCX/RDX after a nested vmexit.  This is not exactly what Sean 
> wanted but perhaps it strikes a middle ground?  I know you wrote this 
> for a debugging tool, do you really need to have everything in a single 
> tracepoint, or can you correlate the existing exit tracepoint with this 
> hypothetical trace_kvm_nested_exit_regs, to pick RDMSR vs. WRMSR?


Hi!

If the new trace_kvm_nested_exit_regs tracepoint has a VM exit number argument, then
I can enable this new tracepoint twice with a different filter (vm_exit_num number == msr and vm_exit_num == vmcall),
and each instance will count the events that I need.

So this can work.

Thanks!
Best regards,
	Maxim Levitsky

> 
> Paolo
>
Re: [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints
Posted by Paolo Bonzini 1 year, 1 month ago
On 12/19/24 18:49, Maxim Levitsky wrote:
>> Here I probably would have preferred an unconditional tracepoint giving
>> RAX/RBX/RCX/RDX after a nested vmexit.  This is not exactly what Sean
>> wanted but perhaps it strikes a middle ground?  I know you wrote this
>> for a debugging tool, do you really need to have everything in a single
>> tracepoint, or can you correlate the existing exit tracepoint with this
>> hypothetical trace_kvm_nested_exit_regs, to pick RDMSR vs. WRMSR?
> 
> Hi!
> 
> If the new trace_kvm_nested_exit_regs tracepoint has a VM exit number argument, then
> I can enable this new tracepoint twice with a different filter (vm_exit_num number == msr and vm_exit_num == vmcall),
> and each instance will count the events that I need.
> 
> So this can work.
Ok, thanks.  On one hand it may make sense to have trace_kvm_exit_regs 
and trace_kvm_nested_exit_regs (you can even extend the 
TRACE_EVENT_KVM_EXIT macro to generate both the exit and the exit_regs 
tracepoint).  On the other hand it seems to me that this new tracepoint 
is kinda reinventing the wheel; your patch adding nested equivalents of 
trace_kvm_hypercall and trace_kvm_msr seems more obvious to me.

I see Sean's point in not wanting one-off tracepoints, on the other hand 
there is value in having similar tracepoints for the L1->L0 and L2->L0 
cases.  I'll let him choose between the two possibilities (a new 
*_exit_regs pair, or just apply this patch) but I think there should be 
one of these two.

Paolo
Re: [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepoints
Posted by Sean Christopherson 1 year ago
On Thu, Dec 19, 2024, Paolo Bonzini wrote:
> On 12/19/24 18:49, Maxim Levitsky wrote:
> > > Here I probably would have preferred an unconditional tracepoint giving
> > > RAX/RBX/RCX/RDX after a nested vmexit.  This is not exactly what Sean
> > > wanted but perhaps it strikes a middle ground?  I know you wrote this
> > > for a debugging tool, do you really need to have everything in a single
> > > tracepoint, or can you correlate the existing exit tracepoint with this
> > > hypothetical trace_kvm_nested_exit_regs, to pick RDMSR vs. WRMSR?
> > 
> > Hi!
> > 
> > If the new trace_kvm_nested_exit_regs tracepoint has a VM exit number
> > argument, then I can enable this new tracepoint twice with a different
> > filter (vm_exit_num number == msr and vm_exit_num == vmcall), and each
> > instance will count the events that I need.
> > 
> > So this can work.
> Ok, thanks.  On one hand it may make sense to have trace_kvm_exit_regs and
> trace_kvm_nested_exit_regs (you can even extend the TRACE_EVENT_KVM_EXIT
> macro to generate both the exit and the exit_regs tracepoint).  On the other
> hand it seems to me that this new tracepoint is kinda reinventing the wheel;
> your patch adding nested equivalents of trace_kvm_hypercall and
> trace_kvm_msr seems more obvious to me.
> 
> I see Sean's point in not wanting one-off tracepoints, on the other hand
> there is value in having similar tracepoints for the L1->L0 and L2->L0
> cases.

I don't understand why we want two (or three, or five) tracepoints for the same
thing.  I want to go the opposite direction and (a) delete kvm_nested_vmexit
and then (b) rename kvm_nested_vmexit_inject => kvm_nested_vmexit so that it
pairs with kvm_nested_vmenter.

Similary, having kvm_nested_intr_vmexit is asinine when kvm_nested_vmexit_inject
captures *more* information about the IRQ itself.

I don't see the point of trace_kvm_nested_exit_regs.  Except for L1 vs. L2, it's
redundant.   kvm_nested_vmexit_inject and kvm_nested_vmenter are useful because
they capture novel information.

> I'll let him choose between the two possibilities (a new *_exit_regs
> pair, or just apply this patch) but I think there should be one of these
> two.

Anything but a pair.  Why can't we capture L1 vs. L2 in the tracepoints and call
it a day?
Re: [PATCH v5 3/3] KVM: x86: add new nested vmexit tracepointsg
Posted by Sean Christopherson 1 year, 1 month ago
On Tue, Sep 10, 2024, Maxim Levitsky wrote:
> Add 3 new tracepoints for nested VM exits which are intended
> to capture extra information to gain insights about the nested guest
> behavior.
> 
> The new tracepoints are:
> 
> - kvm_nested_msr
> - kvm_nested_hypercall

I 100% agree that not having register state in the exit tracepoints is obnoxious,
but I don't think we should add one-off tracepoints for the most annoying cases.
I would much prefer to figure out a way to capture register state in kvm_entry
and kvm_exit.  E.g. I've lost track of the number of times I've observed an MSR
exit without having trace_kvm_msr enabled.

One idea would be to capture E{A,B,C,D}X, which would cover MSRs, CPUID, and
most hypercalls.  And then we might even be able to drop the dedicated MSR and
CPUID tracepoints (not sure if that's a good idea).

Side topic, arch/s390/kvm/trace.h has the concept of COMMON information that is
captured for multiple tracepoints.  I haven't looked closely, but I gotta imagine
we can/should use a similar approach for x86.

> These tracepoints capture extra register state to be able to know
> which MSR or which hypercall was done.
> 
> - kvm_nested_page_fault
> 
> This tracepoint allows to capture extra info about which host pagefault
> error code caused the nested page fault.

The host error code, a.k.a. qualification info, is readily available in the
kvm_exit (or nested variant) tracepoint.  I don't letting userspace skip a
tracepoint that's probably already enabled is worth the extra code to support
this tracepoint.  The nested_svm_inject_npf_exit() code in particular is wonky,
and I think it's a good example of why userspace "needs" trace_kvm_exit, e.g. to
observe that a nested stage-2 page fault didn't originate from a hardware stage-2
fault.