From: Michal Leszczynski <michal.leszczynski@cert.pl>
Use Intel Processor Trace feature to provide vmtrace_pt_*
interface for HVM/VMX.
Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
---
xen/arch/x86/hvm/vmx/vmx.c | 110 +++++++++++++++++++++++++++++
xen/include/asm-x86/hvm/vmx/vmcs.h | 3 +
xen/include/asm-x86/hvm/vmx/vmx.h | 14 ++++
3 files changed, 127 insertions(+)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index cc6d4ece22..63a5a76e16 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -428,6 +428,56 @@ static void vmx_domain_relinquish_resources(struct domain *d)
vmx_free_vlapic_mapping(d);
}
+static int vmx_init_pt(struct vcpu *v)
+{
+ int rc;
+ uint64_t size = v->domain->processor_trace_buf_kb * KB(1);
+
+ if ( !v->vmtrace.pt_buf || !size )
+ return -EINVAL;
+
+ /*
+ * We don't accept trace buffer size smaller than single page
+ * and the upper bound is defined as 4GB in the specification.
+ * The buffer size must be also a power of 2.
+ */
+ if ( size < PAGE_SIZE || size > GB(4) || (size & (size - 1)) )
+ return -EINVAL;
+
+ v->arch.hvm.vmx.ipt_state = xzalloc(struct ipt_state);
+
+ if ( !v->arch.hvm.vmx.ipt_state )
+ return -ENOMEM;
+
+ v->arch.hvm.vmx.ipt_state->output_base =
+ page_to_maddr(v->vmtrace.pt_buf);
+ v->arch.hvm.vmx.ipt_state->output_mask.raw = size - 1;
+
+ rc = vmx_add_host_load_msr(v, MSR_RTIT_CTL, 0);
+
+ if ( rc )
+ return rc;
+
+ rc = vmx_add_guest_msr(v, MSR_RTIT_CTL,
+ RTIT_CTL_TRACE_EN | RTIT_CTL_OS |
+ RTIT_CTL_USR | RTIT_CTL_BRANCH_EN);
+
+ if ( rc )
+ return rc;
+
+ return 0;
+}
+
+static int vmx_destroy_pt(struct vcpu* v)
+{
+ if ( v->arch.hvm.vmx.ipt_state )
+ xfree(v->arch.hvm.vmx.ipt_state);
+
+ v->arch.hvm.vmx.ipt_state = NULL;
+ return 0;
+}
+
+
static int vmx_vcpu_initialise(struct vcpu *v)
{
int rc;
@@ -471,6 +521,14 @@ static int vmx_vcpu_initialise(struct vcpu *v)
vmx_install_vlapic_mapping(v);
+ if ( v->domain->processor_trace_buf_kb )
+ {
+ rc = vmx_init_pt(v);
+
+ if ( rc )
+ return rc;
+ }
+
return 0;
}
@@ -483,6 +541,7 @@ static void vmx_vcpu_destroy(struct vcpu *v)
* prior to vmx_domain_destroy so we need to disable PML for each vcpu
* separately here.
*/
+ vmx_destroy_pt(v);
vmx_vcpu_disable_pml(v);
vmx_destroy_vmcs(v);
passive_domain_destroy(v);
@@ -513,6 +572,18 @@ static void vmx_save_guest_msrs(struct vcpu *v)
* be updated at any time via SWAPGS, which we cannot trap.
*/
v->arch.hvm.vmx.shadow_gs = rdgsshadow();
+
+ if ( unlikely(v->arch.hvm.vmx.ipt_state &&
+ v->arch.hvm.vmx.ipt_state->active) )
+ {
+ uint64_t rtit_ctl;
+ rdmsrl(MSR_RTIT_CTL, rtit_ctl);
+ BUG_ON(rtit_ctl & RTIT_CTL_TRACE_EN);
+
+ rdmsrl(MSR_RTIT_STATUS, v->arch.hvm.vmx.ipt_state->status);
+ rdmsrl(MSR_RTIT_OUTPUT_MASK,
+ v->arch.hvm.vmx.ipt_state->output_mask.raw);
+ }
}
static void vmx_restore_guest_msrs(struct vcpu *v)
@@ -524,6 +595,17 @@ static void vmx_restore_guest_msrs(struct vcpu *v)
if ( cpu_has_msr_tsc_aux )
wrmsr_tsc_aux(v->arch.msrs->tsc_aux);
+
+ if ( unlikely(v->arch.hvm.vmx.ipt_state &&
+ v->arch.hvm.vmx.ipt_state->active) )
+ {
+ wrmsrl(MSR_RTIT_OUTPUT_BASE,
+ v->arch.hvm.vmx.ipt_state->output_base);
+ wrmsrl(MSR_RTIT_OUTPUT_MASK,
+ v->arch.hvm.vmx.ipt_state->output_mask.raw);
+ wrmsrl(MSR_RTIT_STATUS,
+ v->arch.hvm.vmx.ipt_state->status);
+ }
}
void vmx_update_cpu_exec_control(struct vcpu *v)
@@ -2240,6 +2322,25 @@ static bool vmx_get_pending_event(struct vcpu *v, struct x86_event *info)
return true;
}
+static int vmx_control_pt(struct vcpu *v, bool enable)
+{
+ if ( !v->arch.hvm.vmx.ipt_state )
+ return -EINVAL;
+
+ v->arch.hvm.vmx.ipt_state->active = enable;
+ return 0;
+}
+
+static int vmx_get_pt_offset(struct vcpu *v, uint64_t *offset, uint64_t *size)
+{
+ if ( !v->arch.hvm.vmx.ipt_state )
+ return -EINVAL;
+
+ *offset = v->arch.hvm.vmx.ipt_state->output_mask.offset;
+ *size = v->arch.hvm.vmx.ipt_state->output_mask.size + 1;
+ return 0;
+}
+
static struct hvm_function_table __initdata vmx_function_table = {
.name = "VMX",
.cpu_up_prepare = vmx_cpu_up_prepare,
@@ -2295,6 +2396,8 @@ static struct hvm_function_table __initdata vmx_function_table = {
.altp2m_vcpu_update_vmfunc_ve = vmx_vcpu_update_vmfunc_ve,
.altp2m_vcpu_emulate_ve = vmx_vcpu_emulate_ve,
.altp2m_vcpu_emulate_vmfunc = vmx_vcpu_emulate_vmfunc,
+ .vmtrace_control_pt = vmx_control_pt,
+ .vmtrace_get_pt_offset = vmx_get_pt_offset,
.tsc_scaling = {
.max_ratio = VMX_TSC_MULTIPLIER_MAX,
},
@@ -3674,6 +3777,13 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
hvm_invalidate_regs_fields(regs);
+ if ( unlikely(v->arch.hvm.vmx.ipt_state &&
+ v->arch.hvm.vmx.ipt_state->active) )
+ {
+ rdmsrl(MSR_RTIT_OUTPUT_MASK,
+ v->arch.hvm.vmx.ipt_state->output_mask.raw);
+ }
+
if ( paging_mode_hap(v->domain) )
{
/*
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 6153ba6769..65971fa6ad 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -186,6 +186,9 @@ struct vmx_vcpu {
* pCPU and wakeup the related vCPU.
*/
struct pi_blocking_vcpu pi_blocking;
+
+ /* State of processor trace feature */
+ struct ipt_state *ipt_state;
};
int vmx_create_vmcs(struct vcpu *v);
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h
index 111ccd7e61..8d7c67e43d 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -689,4 +689,18 @@ typedef union ldt_or_tr_instr_info {
};
} ldt_or_tr_instr_info_t;
+/* Processor Trace state per vCPU */
+struct ipt_state {
+ bool active;
+ uint64_t status;
+ uint64_t output_base;
+ union {
+ uint64_t raw;
+ struct {
+ uint32_t size;
+ uint32_t offset;
+ };
+ } output_mask;
+};
+
#endif /* __ASM_X86_HVM_VMX_VMX_H__ */
--
2.17.1
On Tue, Jul 07, 2020 at 09:39:46PM +0200, Michał Leszczyński wrote:
> From: Michal Leszczynski <michal.leszczynski@cert.pl>
>
> Use Intel Processor Trace feature to provide vmtrace_pt_*
> interface for HVM/VMX.
>
> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> ---
> xen/arch/x86/hvm/vmx/vmx.c | 110 +++++++++++++++++++++++++++++
> xen/include/asm-x86/hvm/vmx/vmcs.h | 3 +
> xen/include/asm-x86/hvm/vmx/vmx.h | 14 ++++
> 3 files changed, 127 insertions(+)
>
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index cc6d4ece22..63a5a76e16 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -428,6 +428,56 @@ static void vmx_domain_relinquish_resources(struct domain *d)
> vmx_free_vlapic_mapping(d);
> }
>
> +static int vmx_init_pt(struct vcpu *v)
> +{
> + int rc;
> + uint64_t size = v->domain->processor_trace_buf_kb * KB(1);
As I commented in other patches, I don't think there's a need to have
the size in bytes, and hence you could just convert to number of pages?
You might have to check that the value is rounded to a page boundary.
> +
> + if ( !v->vmtrace.pt_buf || !size )
> + return -EINVAL;
> +
> + /*
> + * We don't accept trace buffer size smaller than single page
> + * and the upper bound is defined as 4GB in the specification.
> + * The buffer size must be also a power of 2.
> + */
> + if ( size < PAGE_SIZE || size > GB(4) || (size & (size - 1)) )
> + return -EINVAL;
IMO there should be a hook to sanitize the buffer size before you go
and allocate it. It makes no sense to allocate a buffer to come here
and realize it's not suitable.
> +
> + v->arch.hvm.vmx.ipt_state = xzalloc(struct ipt_state);
> +
Extra newline.
> + if ( !v->arch.hvm.vmx.ipt_state )
> + return -ENOMEM;
> +
> + v->arch.hvm.vmx.ipt_state->output_base =
> + page_to_maddr(v->vmtrace.pt_buf);
The above fits on a single line now. You could also avoid having an
output_base field and just do the conversion in
vmx_restore_guest_msrs, I'm not sure there's much value in having this
cached here.
> + v->arch.hvm.vmx.ipt_state->output_mask.raw = size - 1;
> +
> + rc = vmx_add_host_load_msr(v, MSR_RTIT_CTL, 0);
> +
> + if ( rc )
> + return rc;
> +
> + rc = vmx_add_guest_msr(v, MSR_RTIT_CTL,
> + RTIT_CTL_TRACE_EN | RTIT_CTL_OS |
> + RTIT_CTL_USR | RTIT_CTL_BRANCH_EN);
> +
> + if ( rc )
> + return rc;
We don't usually leave an empty line between setting and testing rc.
> +
> + return 0;
> +}
> +
> +static int vmx_destroy_pt(struct vcpu* v)
> +{
> + if ( v->arch.hvm.vmx.ipt_state )
> + xfree(v->arch.hvm.vmx.ipt_state);
> +
> + v->arch.hvm.vmx.ipt_state = NULL;
> + return 0;
> +}
> +
> +
Double newline, just one newline please between functions.
> static int vmx_vcpu_initialise(struct vcpu *v)
> {
> int rc;
> @@ -471,6 +521,14 @@ static int vmx_vcpu_initialise(struct vcpu *v)
>
> vmx_install_vlapic_mapping(v);
>
> + if ( v->domain->processor_trace_buf_kb )
Can you move this check inside of vmx_init_pt, so that here you just
do:
return vmx_init_pt(v);
> + {
> + rc = vmx_init_pt(v);
> +
> + if ( rc )
> + return rc;
> + }
> +
> return 0;
> }
>
> @@ -483,6 +541,7 @@ static void vmx_vcpu_destroy(struct vcpu *v)
> * prior to vmx_domain_destroy so we need to disable PML for each vcpu
> * separately here.
> */
> + vmx_destroy_pt(v);
> vmx_vcpu_disable_pml(v);
> vmx_destroy_vmcs(v);
> passive_domain_destroy(v);
> @@ -513,6 +572,18 @@ static void vmx_save_guest_msrs(struct vcpu *v)
> * be updated at any time via SWAPGS, which we cannot trap.
> */
> v->arch.hvm.vmx.shadow_gs = rdgsshadow();
> +
> + if ( unlikely(v->arch.hvm.vmx.ipt_state &&
> + v->arch.hvm.vmx.ipt_state->active) )
> + {
> + uint64_t rtit_ctl;
Missing newline.
> + rdmsrl(MSR_RTIT_CTL, rtit_ctl);
> + BUG_ON(rtit_ctl & RTIT_CTL_TRACE_EN);
> +
> + rdmsrl(MSR_RTIT_STATUS, v->arch.hvm.vmx.ipt_state->status);
> + rdmsrl(MSR_RTIT_OUTPUT_MASK,
> + v->arch.hvm.vmx.ipt_state->output_mask.raw);
> + }
> }
>
> static void vmx_restore_guest_msrs(struct vcpu *v)
> @@ -524,6 +595,17 @@ static void vmx_restore_guest_msrs(struct vcpu *v)
>
> if ( cpu_has_msr_tsc_aux )
> wrmsr_tsc_aux(v->arch.msrs->tsc_aux);
> +
> + if ( unlikely(v->arch.hvm.vmx.ipt_state &&
> + v->arch.hvm.vmx.ipt_state->active) )
> + {
> + wrmsrl(MSR_RTIT_OUTPUT_BASE,
> + v->arch.hvm.vmx.ipt_state->output_base);
> + wrmsrl(MSR_RTIT_OUTPUT_MASK,
> + v->arch.hvm.vmx.ipt_state->output_mask.raw);
> + wrmsrl(MSR_RTIT_STATUS,
> + v->arch.hvm.vmx.ipt_state->status);
> + }
> }
>
> void vmx_update_cpu_exec_control(struct vcpu *v)
> @@ -2240,6 +2322,25 @@ static bool vmx_get_pending_event(struct vcpu *v, struct x86_event *info)
> return true;
> }
>
> +static int vmx_control_pt(struct vcpu *v, bool enable)
> +{
> + if ( !v->arch.hvm.vmx.ipt_state )
> + return -EINVAL;
> +
> + v->arch.hvm.vmx.ipt_state->active = enable;
I think you should assert that the vCPU is paused? As doing this on a
non-paused vCPU is not going to work reliably?
> + return 0;
> +}
> +
> +static int vmx_get_pt_offset(struct vcpu *v, uint64_t *offset, uint64_t *size)
> +{
> + if ( !v->arch.hvm.vmx.ipt_state )
> + return -EINVAL;
> +
> + *offset = v->arch.hvm.vmx.ipt_state->output_mask.offset;
> + *size = v->arch.hvm.vmx.ipt_state->output_mask.size + 1;
> + return 0;
> +}
> +
> static struct hvm_function_table __initdata vmx_function_table = {
> .name = "VMX",
> .cpu_up_prepare = vmx_cpu_up_prepare,
> @@ -2295,6 +2396,8 @@ static struct hvm_function_table __initdata vmx_function_table = {
> .altp2m_vcpu_update_vmfunc_ve = vmx_vcpu_update_vmfunc_ve,
> .altp2m_vcpu_emulate_ve = vmx_vcpu_emulate_ve,
> .altp2m_vcpu_emulate_vmfunc = vmx_vcpu_emulate_vmfunc,
> + .vmtrace_control_pt = vmx_control_pt,
> + .vmtrace_get_pt_offset = vmx_get_pt_offset,
> .tsc_scaling = {
> .max_ratio = VMX_TSC_MULTIPLIER_MAX,
> },
> @@ -3674,6 +3777,13 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>
> hvm_invalidate_regs_fields(regs);
>
> + if ( unlikely(v->arch.hvm.vmx.ipt_state &&
> + v->arch.hvm.vmx.ipt_state->active) )
> + {
> + rdmsrl(MSR_RTIT_OUTPUT_MASK,
> + v->arch.hvm.vmx.ipt_state->output_mask.raw);
> + }
Unneeded braces.
Thanks.
On 07.07.2020 21:39, Michał Leszczyński wrote:
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -428,6 +428,56 @@ static void vmx_domain_relinquish_resources(struct domain *d)
> vmx_free_vlapic_mapping(d);
> }
>
> +static int vmx_init_pt(struct vcpu *v)
> +{
> + int rc;
> + uint64_t size = v->domain->processor_trace_buf_kb * KB(1);
> +
> + if ( !v->vmtrace.pt_buf || !size )
> + return -EINVAL;
> +
> + /*
> + * We don't accept trace buffer size smaller than single page
> + * and the upper bound is defined as 4GB in the specification.
> + * The buffer size must be also a power of 2.
> + */
> + if ( size < PAGE_SIZE || size > GB(4) || (size & (size - 1)) )
> + return -EINVAL;
> +
> + v->arch.hvm.vmx.ipt_state = xzalloc(struct ipt_state);
> +
> + if ( !v->arch.hvm.vmx.ipt_state )
> + return -ENOMEM;
> +
> + v->arch.hvm.vmx.ipt_state->output_base =
> + page_to_maddr(v->vmtrace.pt_buf);
> + v->arch.hvm.vmx.ipt_state->output_mask.raw = size - 1;
> +
> + rc = vmx_add_host_load_msr(v, MSR_RTIT_CTL, 0);
> +
> + if ( rc )
> + return rc;
> +
> + rc = vmx_add_guest_msr(v, MSR_RTIT_CTL,
> + RTIT_CTL_TRACE_EN | RTIT_CTL_OS |
> + RTIT_CTL_USR | RTIT_CTL_BRANCH_EN);
Indentation is off by three here, and ...
> + if ( rc )
> + return rc;
> +
> + return 0;
> +}
... this whole thing would be shorter (and hence easier to read) as
return vmx_add_guest_msr(v, MSR_RTIT_CTL,
RTIT_CTL_TRACE_EN | RTIT_CTL_OS |
RTIT_CTL_USR | RTIT_CTL_BRANCH_EN);
> +static int vmx_destroy_pt(struct vcpu* v)
> +{
> + if ( v->arch.hvm.vmx.ipt_state )
> + xfree(v->arch.hvm.vmx.ipt_state);
No need for the if().
> + v->arch.hvm.vmx.ipt_state = NULL;
And everything together is actually simply
"XFREE(v->arch.hvm.vmx.ipt_state);".
> @@ -471,6 +521,14 @@ static int vmx_vcpu_initialise(struct vcpu *v)
>
> vmx_install_vlapic_mapping(v);
>
> + if ( v->domain->processor_trace_buf_kb )
> + {
> + rc = vmx_init_pt(v);
> +
> + if ( rc )
> + return rc;
> + }
> +
> return 0;
> }
Is there no cleanup you need to do in case of failure? The caller
will invoke vmx_vcpu_destroy() only for failures subsequent to one
coming from here.
> @@ -513,6 +572,18 @@ static void vmx_save_guest_msrs(struct vcpu *v)
> * be updated at any time via SWAPGS, which we cannot trap.
> */
> v->arch.hvm.vmx.shadow_gs = rdgsshadow();
> +
> + if ( unlikely(v->arch.hvm.vmx.ipt_state &&
> + v->arch.hvm.vmx.ipt_state->active) )
likely() / unlikely(), for being efficient, don't want && or ||
in their expressions. Please limit to just the left side of &&.
> @@ -2240,6 +2322,25 @@ static bool vmx_get_pending_event(struct vcpu *v, struct x86_event *info)
> return true;
> }
>
> +static int vmx_control_pt(struct vcpu *v, bool enable)
> +{
> + if ( !v->arch.hvm.vmx.ipt_state )
> + return -EINVAL;
> +
> + v->arch.hvm.vmx.ipt_state->active = enable;
Peeking ahead into patch 9, the vCPU is paused at this point.
Please add a respective ASSERT(), if only for documentation
purposes.
> +static int vmx_get_pt_offset(struct vcpu *v, uint64_t *offset, uint64_t *size)
> +{
> + if ( !v->arch.hvm.vmx.ipt_state )
> + return -EINVAL;
> +
> + *offset = v->arch.hvm.vmx.ipt_state->output_mask.offset;
> + *size = v->arch.hvm.vmx.ipt_state->output_mask.size + 1;
Either the function parameter or the struct field is misnamed,
or else there shouldn't be an addition of 1 here.
> @@ -2295,6 +2396,8 @@ static struct hvm_function_table __initdata vmx_function_table = {
> .altp2m_vcpu_update_vmfunc_ve = vmx_vcpu_update_vmfunc_ve,
> .altp2m_vcpu_emulate_ve = vmx_vcpu_emulate_ve,
> .altp2m_vcpu_emulate_vmfunc = vmx_vcpu_emulate_vmfunc,
> + .vmtrace_control_pt = vmx_control_pt,
> + .vmtrace_get_pt_offset = vmx_get_pt_offset,
Better install these hooks only if the underlying feature is
available (like we do for several other hooks)?
> @@ -3674,6 +3777,13 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>
> hvm_invalidate_regs_fields(regs);
>
> + if ( unlikely(v->arch.hvm.vmx.ipt_state &&
> + v->arch.hvm.vmx.ipt_state->active) )
> + {
> + rdmsrl(MSR_RTIT_OUTPUT_MASK,
> + v->arch.hvm.vmx.ipt_state->output_mask.raw);
Don't you also need to latch RTIT_STATUS?
> --- a/xen/include/asm-x86/hvm/vmx/vmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmx.h
> @@ -689,4 +689,18 @@ typedef union ldt_or_tr_instr_info {
> };
> } ldt_or_tr_instr_info_t;
>
> +/* Processor Trace state per vCPU */
> +struct ipt_state {
> + bool active;
> + uint64_t status;
> + uint64_t output_base;
maddr_t according to its use.
Jan
© 2016 - 2026 Red Hat, Inc.