[v6] Implement support for external IPT monitoring

[PATCH v6 07/11] x86/vmx: implement IPT in VMX

Posted by Michał Leszczyński 5 years, 7 months ago

From: Michal Leszczynski <michal.leszczynski@cert.pl>

Use Intel Processor Trace feature to provide vmtrace_pt_*
interface for HVM/VMX.

Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
---
 xen/arch/x86/hvm/vmx/vmx.c         | 110 +++++++++++++++++++++++++++++
 xen/include/asm-x86/hvm/vmx/vmcs.h |   3 +
 xen/include/asm-x86/hvm/vmx/vmx.h  |  14 ++++
 3 files changed, 127 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index cc6d4ece22..63a5a76e16 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -428,6 +428,56 @@ static void vmx_domain_relinquish_resources(struct domain *d)
     vmx_free_vlapic_mapping(d);
 }
 
+static int vmx_init_pt(struct vcpu *v)
+{
+    int rc;
+    uint64_t size = v->domain->processor_trace_buf_kb * KB(1);
+
+    if ( !v->vmtrace.pt_buf || !size )
+        return -EINVAL;
+
+    /*
+     * We don't accept trace buffer size smaller than single page
+     * and the upper bound is defined as 4GB in the specification.
+     * The buffer size must be also a power of 2.
+     */
+    if ( size < PAGE_SIZE || size > GB(4) || (size & (size - 1)) )
+        return -EINVAL;
+
+    v->arch.hvm.vmx.ipt_state = xzalloc(struct ipt_state);
+
+    if ( !v->arch.hvm.vmx.ipt_state )
+        return -ENOMEM;
+
+    v->arch.hvm.vmx.ipt_state->output_base =
+        page_to_maddr(v->vmtrace.pt_buf);
+    v->arch.hvm.vmx.ipt_state->output_mask.raw = size - 1;
+
+    rc = vmx_add_host_load_msr(v, MSR_RTIT_CTL, 0);
+
+    if ( rc )
+        return rc;
+
+    rc = vmx_add_guest_msr(v, MSR_RTIT_CTL,
+                              RTIT_CTL_TRACE_EN | RTIT_CTL_OS |
+                              RTIT_CTL_USR | RTIT_CTL_BRANCH_EN);
+
+    if ( rc )
+        return rc;
+
+    return 0;
+}
+
+static int vmx_destroy_pt(struct vcpu* v)
+{
+    if ( v->arch.hvm.vmx.ipt_state )
+        xfree(v->arch.hvm.vmx.ipt_state);
+
+    v->arch.hvm.vmx.ipt_state = NULL;
+    return 0;
+}
+
+
 static int vmx_vcpu_initialise(struct vcpu *v)
 {
     int rc;
@@ -471,6 +521,14 @@ static int vmx_vcpu_initialise(struct vcpu *v)
 
     vmx_install_vlapic_mapping(v);
 
+    if ( v->domain->processor_trace_buf_kb )
+    {
+        rc = vmx_init_pt(v);
+
+        if ( rc )
+            return rc;
+    }
+
     return 0;
 }
 
@@ -483,6 +541,7 @@ static void vmx_vcpu_destroy(struct vcpu *v)
      * prior to vmx_domain_destroy so we need to disable PML for each vcpu
      * separately here.
      */
+    vmx_destroy_pt(v);
     vmx_vcpu_disable_pml(v);
     vmx_destroy_vmcs(v);
     passive_domain_destroy(v);
@@ -513,6 +572,18 @@ static void vmx_save_guest_msrs(struct vcpu *v)
      * be updated at any time via SWAPGS, which we cannot trap.
      */
     v->arch.hvm.vmx.shadow_gs = rdgsshadow();
+
+    if ( unlikely(v->arch.hvm.vmx.ipt_state &&
+                  v->arch.hvm.vmx.ipt_state->active) )
+    {
+        uint64_t rtit_ctl;
+        rdmsrl(MSR_RTIT_CTL, rtit_ctl);
+        BUG_ON(rtit_ctl & RTIT_CTL_TRACE_EN);
+
+        rdmsrl(MSR_RTIT_STATUS, v->arch.hvm.vmx.ipt_state->status);
+        rdmsrl(MSR_RTIT_OUTPUT_MASK,
+               v->arch.hvm.vmx.ipt_state->output_mask.raw);
+    }
 }
 
 static void vmx_restore_guest_msrs(struct vcpu *v)
@@ -524,6 +595,17 @@ static void vmx_restore_guest_msrs(struct vcpu *v)
 
     if ( cpu_has_msr_tsc_aux )
         wrmsr_tsc_aux(v->arch.msrs->tsc_aux);
+
+    if ( unlikely(v->arch.hvm.vmx.ipt_state &&
+                  v->arch.hvm.vmx.ipt_state->active) )
+    {
+        wrmsrl(MSR_RTIT_OUTPUT_BASE,
+               v->arch.hvm.vmx.ipt_state->output_base);
+        wrmsrl(MSR_RTIT_OUTPUT_MASK,
+               v->arch.hvm.vmx.ipt_state->output_mask.raw);
+        wrmsrl(MSR_RTIT_STATUS,
+               v->arch.hvm.vmx.ipt_state->status);
+    }
 }
 
 void vmx_update_cpu_exec_control(struct vcpu *v)
@@ -2240,6 +2322,25 @@ static bool vmx_get_pending_event(struct vcpu *v, struct x86_event *info)
     return true;
 }
 
+static int vmx_control_pt(struct vcpu *v, bool enable)
+{
+    if ( !v->arch.hvm.vmx.ipt_state )
+        return -EINVAL;
+
+    v->arch.hvm.vmx.ipt_state->active = enable;
+    return 0;
+}
+
+static int vmx_get_pt_offset(struct vcpu *v, uint64_t *offset, uint64_t *size)
+{
+    if ( !v->arch.hvm.vmx.ipt_state )
+        return -EINVAL;
+
+    *offset = v->arch.hvm.vmx.ipt_state->output_mask.offset;
+    *size = v->arch.hvm.vmx.ipt_state->output_mask.size + 1;
+    return 0;
+}
+
 static struct hvm_function_table __initdata vmx_function_table = {
     .name                 = "VMX",
     .cpu_up_prepare       = vmx_cpu_up_prepare,
@@ -2295,6 +2396,8 @@ static struct hvm_function_table __initdata vmx_function_table = {
     .altp2m_vcpu_update_vmfunc_ve = vmx_vcpu_update_vmfunc_ve,
     .altp2m_vcpu_emulate_ve = vmx_vcpu_emulate_ve,
     .altp2m_vcpu_emulate_vmfunc = vmx_vcpu_emulate_vmfunc,
+    .vmtrace_control_pt = vmx_control_pt,
+    .vmtrace_get_pt_offset = vmx_get_pt_offset,
     .tsc_scaling = {
         .max_ratio = VMX_TSC_MULTIPLIER_MAX,
     },
@@ -3674,6 +3777,13 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
 
     hvm_invalidate_regs_fields(regs);
 
+    if ( unlikely(v->arch.hvm.vmx.ipt_state &&
+                  v->arch.hvm.vmx.ipt_state->active) )
+    {
+        rdmsrl(MSR_RTIT_OUTPUT_MASK,
+               v->arch.hvm.vmx.ipt_state->output_mask.raw);
+    }
+
     if ( paging_mode_hap(v->domain) )
     {
         /*
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 6153ba6769..65971fa6ad 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -186,6 +186,9 @@ struct vmx_vcpu {
      * pCPU and wakeup the related vCPU.
      */
     struct pi_blocking_vcpu pi_blocking;
+
+    /* State of processor trace feature */
+    struct ipt_state      *ipt_state;
 };
 
 int vmx_create_vmcs(struct vcpu *v);
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h
index 111ccd7e61..8d7c67e43d 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -689,4 +689,18 @@ typedef union ldt_or_tr_instr_info {
     };
 } ldt_or_tr_instr_info_t;
 
+/* Processor Trace state per vCPU */
+struct ipt_state {
+    bool active;
+    uint64_t status;
+    uint64_t output_base;
+    union {
+        uint64_t raw;
+        struct {
+            uint32_t size;
+            uint32_t offset;
+        };
+    } output_mask;
+};
+
 #endif /* __ASM_X86_HVM_VMX_VMX_H__ */
-- 
2.17.1

Re: [PATCH v6 07/11] x86/vmx: implement IPT in VMX

Posted by Roger Pau Monné 5 years, 6 months ago

On Tue, Jul 07, 2020 at 09:39:46PM +0200, Michał Leszczyński wrote:
> From: Michal Leszczynski <michal.leszczynski@cert.pl>
> 
> Use Intel Processor Trace feature to provide vmtrace_pt_*
> interface for HVM/VMX.
> 
> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> ---
>  xen/arch/x86/hvm/vmx/vmx.c         | 110 +++++++++++++++++++++++++++++
>  xen/include/asm-x86/hvm/vmx/vmcs.h |   3 +
>  xen/include/asm-x86/hvm/vmx/vmx.h  |  14 ++++
>  3 files changed, 127 insertions(+)
> 
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index cc6d4ece22..63a5a76e16 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -428,6 +428,56 @@ static void vmx_domain_relinquish_resources(struct domain *d)
>      vmx_free_vlapic_mapping(d);
>  }
>  
> +static int vmx_init_pt(struct vcpu *v)
> +{
> +    int rc;
> +    uint64_t size = v->domain->processor_trace_buf_kb * KB(1);

As I commented in other patches, I don't think there's a need to have
the size in bytes, and hence you could just convert to number of pages?

You might have to check that the value is rounded to a page boundary.

> +
> +    if ( !v->vmtrace.pt_buf || !size )
> +        return -EINVAL;
> +
> +    /*
> +     * We don't accept trace buffer size smaller than single page
> +     * and the upper bound is defined as 4GB in the specification.
> +     * The buffer size must be also a power of 2.
> +     */
> +    if ( size < PAGE_SIZE || size > GB(4) || (size & (size - 1)) )
> +        return -EINVAL;

IMO there should be a hook to sanitize the buffer size before you go
and allocate it. It makes no sense to allocate a buffer to come here
and realize it's not suitable.

> +
> +    v->arch.hvm.vmx.ipt_state = xzalloc(struct ipt_state);
> +

Extra newline.

> +    if ( !v->arch.hvm.vmx.ipt_state )
> +        return -ENOMEM;
> +
> +    v->arch.hvm.vmx.ipt_state->output_base =
> +        page_to_maddr(v->vmtrace.pt_buf);

The above fits on a single line now. You could also avoid having an
output_base field and just do the conversion in
vmx_restore_guest_msrs, I'm not sure there's much value in having this
cached here.

> +    v->arch.hvm.vmx.ipt_state->output_mask.raw = size - 1;
> +
> +    rc = vmx_add_host_load_msr(v, MSR_RTIT_CTL, 0);
> +
> +    if ( rc )
> +        return rc;
> +
> +    rc = vmx_add_guest_msr(v, MSR_RTIT_CTL,
> +                              RTIT_CTL_TRACE_EN | RTIT_CTL_OS |
> +                              RTIT_CTL_USR | RTIT_CTL_BRANCH_EN);
> +
> +    if ( rc )
> +        return rc;

We don't usually leave an empty line between setting and testing rc.

> +
> +    return 0;
> +}
> +
> +static int vmx_destroy_pt(struct vcpu* v)
> +{
> +    if ( v->arch.hvm.vmx.ipt_state )
> +        xfree(v->arch.hvm.vmx.ipt_state);
> +
> +    v->arch.hvm.vmx.ipt_state = NULL;
> +    return 0;
> +}
> +
> +

Double newline, just one newline please between functions.

>  static int vmx_vcpu_initialise(struct vcpu *v)
>  {
>      int rc;
> @@ -471,6 +521,14 @@ static int vmx_vcpu_initialise(struct vcpu *v)
>  
>      vmx_install_vlapic_mapping(v);
>  
> +    if ( v->domain->processor_trace_buf_kb )

Can you move this check inside of vmx_init_pt, so that here you just
do:

return vmx_init_pt(v);

> +    {
> +        rc = vmx_init_pt(v);
> +
> +        if ( rc )
> +            return rc;
> +    }
> +
>      return 0;
>  }
>  
> @@ -483,6 +541,7 @@ static void vmx_vcpu_destroy(struct vcpu *v)
>       * prior to vmx_domain_destroy so we need to disable PML for each vcpu
>       * separately here.
>       */
> +    vmx_destroy_pt(v);
>      vmx_vcpu_disable_pml(v);
>      vmx_destroy_vmcs(v);
>      passive_domain_destroy(v);
> @@ -513,6 +572,18 @@ static void vmx_save_guest_msrs(struct vcpu *v)
>       * be updated at any time via SWAPGS, which we cannot trap.
>       */
>      v->arch.hvm.vmx.shadow_gs = rdgsshadow();
> +
> +    if ( unlikely(v->arch.hvm.vmx.ipt_state &&
> +                  v->arch.hvm.vmx.ipt_state->active) )
> +    {
> +        uint64_t rtit_ctl;

Missing newline.

> +        rdmsrl(MSR_RTIT_CTL, rtit_ctl);
> +        BUG_ON(rtit_ctl & RTIT_CTL_TRACE_EN);
> +
> +        rdmsrl(MSR_RTIT_STATUS, v->arch.hvm.vmx.ipt_state->status);
> +        rdmsrl(MSR_RTIT_OUTPUT_MASK,
> +               v->arch.hvm.vmx.ipt_state->output_mask.raw);
> +    }
>  }
>  
>  static void vmx_restore_guest_msrs(struct vcpu *v)
> @@ -524,6 +595,17 @@ static void vmx_restore_guest_msrs(struct vcpu *v)
>  
>      if ( cpu_has_msr_tsc_aux )
>          wrmsr_tsc_aux(v->arch.msrs->tsc_aux);
> +
> +    if ( unlikely(v->arch.hvm.vmx.ipt_state &&
> +                  v->arch.hvm.vmx.ipt_state->active) )
> +    {
> +        wrmsrl(MSR_RTIT_OUTPUT_BASE,
> +               v->arch.hvm.vmx.ipt_state->output_base);
> +        wrmsrl(MSR_RTIT_OUTPUT_MASK,
> +               v->arch.hvm.vmx.ipt_state->output_mask.raw);
> +        wrmsrl(MSR_RTIT_STATUS,
> +               v->arch.hvm.vmx.ipt_state->status);
> +    }
>  }
>  
>  void vmx_update_cpu_exec_control(struct vcpu *v)
> @@ -2240,6 +2322,25 @@ static bool vmx_get_pending_event(struct vcpu *v, struct x86_event *info)
>      return true;
>  }
>  
> +static int vmx_control_pt(struct vcpu *v, bool enable)
> +{
> +    if ( !v->arch.hvm.vmx.ipt_state )
> +        return -EINVAL;
> +
> +    v->arch.hvm.vmx.ipt_state->active = enable;

I think you should assert that the vCPU is paused? As doing this on a
non-paused vCPU is not going to work reliably?

> +    return 0;
> +}
> +
> +static int vmx_get_pt_offset(struct vcpu *v, uint64_t *offset, uint64_t *size)
> +{
> +    if ( !v->arch.hvm.vmx.ipt_state )
> +        return -EINVAL;
> +
> +    *offset = v->arch.hvm.vmx.ipt_state->output_mask.offset;
> +    *size = v->arch.hvm.vmx.ipt_state->output_mask.size + 1;
> +    return 0;
> +}
> +
>  static struct hvm_function_table __initdata vmx_function_table = {
>      .name                 = "VMX",
>      .cpu_up_prepare       = vmx_cpu_up_prepare,
> @@ -2295,6 +2396,8 @@ static struct hvm_function_table __initdata vmx_function_table = {
>      .altp2m_vcpu_update_vmfunc_ve = vmx_vcpu_update_vmfunc_ve,
>      .altp2m_vcpu_emulate_ve = vmx_vcpu_emulate_ve,
>      .altp2m_vcpu_emulate_vmfunc = vmx_vcpu_emulate_vmfunc,
> +    .vmtrace_control_pt = vmx_control_pt,
> +    .vmtrace_get_pt_offset = vmx_get_pt_offset,
>      .tsc_scaling = {
>          .max_ratio = VMX_TSC_MULTIPLIER_MAX,
>      },
> @@ -3674,6 +3777,13 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>  
>      hvm_invalidate_regs_fields(regs);
>  
> +    if ( unlikely(v->arch.hvm.vmx.ipt_state &&
> +                  v->arch.hvm.vmx.ipt_state->active) )
> +    {
> +        rdmsrl(MSR_RTIT_OUTPUT_MASK,
> +               v->arch.hvm.vmx.ipt_state->output_mask.raw);
> +    }

Unneeded braces.

Thanks.

Re: [PATCH v6 07/11] x86/vmx: implement IPT in VMX

Posted by Jan Beulich 5 years, 6 months ago

On 07.07.2020 21:39, Michał Leszczyński wrote:
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -428,6 +428,56 @@ static void vmx_domain_relinquish_resources(struct domain *d)
>      vmx_free_vlapic_mapping(d);
>  }
>  
> +static int vmx_init_pt(struct vcpu *v)
> +{
> +    int rc;
> +    uint64_t size = v->domain->processor_trace_buf_kb * KB(1);
> +
> +    if ( !v->vmtrace.pt_buf || !size )
> +        return -EINVAL;
> +
> +    /*
> +     * We don't accept trace buffer size smaller than single page
> +     * and the upper bound is defined as 4GB in the specification.
> +     * The buffer size must be also a power of 2.
> +     */
> +    if ( size < PAGE_SIZE || size > GB(4) || (size & (size - 1)) )
> +        return -EINVAL;
> +
> +    v->arch.hvm.vmx.ipt_state = xzalloc(struct ipt_state);
> +
> +    if ( !v->arch.hvm.vmx.ipt_state )
> +        return -ENOMEM;
> +
> +    v->arch.hvm.vmx.ipt_state->output_base =
> +        page_to_maddr(v->vmtrace.pt_buf);
> +    v->arch.hvm.vmx.ipt_state->output_mask.raw = size - 1;
> +
> +    rc = vmx_add_host_load_msr(v, MSR_RTIT_CTL, 0);
> +
> +    if ( rc )
> +        return rc;
> +
> +    rc = vmx_add_guest_msr(v, MSR_RTIT_CTL,
> +                              RTIT_CTL_TRACE_EN | RTIT_CTL_OS |
> +                              RTIT_CTL_USR | RTIT_CTL_BRANCH_EN);

Indentation is off by three here, and ...

> +    if ( rc )
> +        return rc;
> +
> +    return 0;
> +}

... this whole thing would be shorter (and hence easier to read) as

    return vmx_add_guest_msr(v, MSR_RTIT_CTL,
                             RTIT_CTL_TRACE_EN | RTIT_CTL_OS |
                             RTIT_CTL_USR | RTIT_CTL_BRANCH_EN);

> +static int vmx_destroy_pt(struct vcpu* v)
> +{
> +    if ( v->arch.hvm.vmx.ipt_state )
> +        xfree(v->arch.hvm.vmx.ipt_state);

No need for the if(). 

> +    v->arch.hvm.vmx.ipt_state = NULL;

And everything together is actually simply
"XFREE(v->arch.hvm.vmx.ipt_state);".

> @@ -471,6 +521,14 @@ static int vmx_vcpu_initialise(struct vcpu *v)
>  
>      vmx_install_vlapic_mapping(v);
>  
> +    if ( v->domain->processor_trace_buf_kb )
> +    {
> +        rc = vmx_init_pt(v);
> +
> +        if ( rc )
> +            return rc;
> +    }
> +
>      return 0;
>  }

Is there no cleanup you need to do in case of failure? The caller
will invoke vmx_vcpu_destroy() only for failures subsequent to one
coming from here.

> @@ -513,6 +572,18 @@ static void vmx_save_guest_msrs(struct vcpu *v)
>       * be updated at any time via SWAPGS, which we cannot trap.
>       */
>      v->arch.hvm.vmx.shadow_gs = rdgsshadow();
> +
> +    if ( unlikely(v->arch.hvm.vmx.ipt_state &&
> +                  v->arch.hvm.vmx.ipt_state->active) )

likely() / unlikely(), for being efficient, don't want && or ||
in their expressions. Please limit to just the left side of &&.

> @@ -2240,6 +2322,25 @@ static bool vmx_get_pending_event(struct vcpu *v, struct x86_event *info)
>      return true;
>  }
>  
> +static int vmx_control_pt(struct vcpu *v, bool enable)
> +{
> +    if ( !v->arch.hvm.vmx.ipt_state )
> +        return -EINVAL;
> +
> +    v->arch.hvm.vmx.ipt_state->active = enable;

Peeking ahead into patch 9, the vCPU is paused at this point.
Please add a respective ASSERT(), if only for documentation
purposes.

> +static int vmx_get_pt_offset(struct vcpu *v, uint64_t *offset, uint64_t *size)
> +{
> +    if ( !v->arch.hvm.vmx.ipt_state )
> +        return -EINVAL;
> +
> +    *offset = v->arch.hvm.vmx.ipt_state->output_mask.offset;
> +    *size = v->arch.hvm.vmx.ipt_state->output_mask.size + 1;

Either the function parameter or the struct field is misnamed,
or else there shouldn't be an addition of 1 here.

> @@ -2295,6 +2396,8 @@ static struct hvm_function_table __initdata vmx_function_table = {
>      .altp2m_vcpu_update_vmfunc_ve = vmx_vcpu_update_vmfunc_ve,
>      .altp2m_vcpu_emulate_ve = vmx_vcpu_emulate_ve,
>      .altp2m_vcpu_emulate_vmfunc = vmx_vcpu_emulate_vmfunc,
> +    .vmtrace_control_pt = vmx_control_pt,
> +    .vmtrace_get_pt_offset = vmx_get_pt_offset,

Better install these hooks only if the underlying feature is
available (like we do for several other hooks)?

> @@ -3674,6 +3777,13 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>  
>      hvm_invalidate_regs_fields(regs);
>  
> +    if ( unlikely(v->arch.hvm.vmx.ipt_state &&
> +                  v->arch.hvm.vmx.ipt_state->active) )
> +    {
> +        rdmsrl(MSR_RTIT_OUTPUT_MASK,
> +               v->arch.hvm.vmx.ipt_state->output_mask.raw);

Don't you also need to latch RTIT_STATUS?

> --- a/xen/include/asm-x86/hvm/vmx/vmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmx.h
> @@ -689,4 +689,18 @@ typedef union ldt_or_tr_instr_info {
>      };
>  } ldt_or_tr_instr_info_t;
>  
> +/* Processor Trace state per vCPU */
> +struct ipt_state {
> +    bool active;
> +    uint64_t status;
> +    uint64_t output_base;

maddr_t according to its use.

Jan