[PATCH] x86/hvm: Include interruptibility state in hvm_hw_cpu

Tamas K Lengyel posted 1 patch 2 years, 1 month ago
Test gitlab-ci passed
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/05d0a5b5c18d667a5527e6f834347f54a10309da.1646937728.git.tamas.lengyel@intel.com
xen/arch/x86/hvm/hvm.c                 |  9 +++++----
xen/arch/x86/hvm/vmx/vmx.c             |  4 ++++
xen/arch/x86/include/asm/hvm/hvm.h     | 26 ++++++++++++++++++++++++++
xen/include/public/arch-x86/hvm/save.h |  3 ++-
4 files changed, 37 insertions(+), 5 deletions(-)
[PATCH] x86/hvm: Include interruptibility state in hvm_hw_cpu
Posted by Tamas K Lengyel 2 years, 1 month ago
During VM fork resetting a failed vmentry has been observed when the reset
is performed immediately after a STI instruction executed. This is due to
the guest interruptibility state in the VMCS being modified by STI but the
subsequent reset removes the IF bit from FLAGS, causing the failed vmentry.

Include the interruptibility state information in the public hvm_hw_cpu struct
so that the CPU can be safely saved/restored.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
 xen/arch/x86/hvm/hvm.c                 |  9 +++++----
 xen/arch/x86/hvm/vmx/vmx.c             |  4 ++++
 xen/arch/x86/include/asm/hvm/hvm.h     | 26 ++++++++++++++++++++++++++
 xen/include/public/arch-x86/hvm/save.h |  3 ++-
 4 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 709a4191ef..b239c72215 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -897,6 +897,8 @@ static int cf_check hvm_save_cpu_ctxt(struct vcpu *v, hvm_domain_context_t *h)
         ctxt.flags = XEN_X86_FPU_INITIALISED;
     }
 
+    ctxt.interruptibility_info = hvm_get_interrupt_shadow(v);
+
     return hvm_save_entry(CPU, v->vcpu_id, h, &ctxt);
 }
 
@@ -990,9 +992,6 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
     if ( hvm_load_entry_zeroextend(CPU, h, &ctxt) != 0 )
         return -EINVAL;
 
-    if ( ctxt.pad0 != 0 )
-        return -EINVAL;
-
     /* Sanity check some control registers. */
     if ( (ctxt.cr0 & HVM_CR0_GUEST_RESERVED_BITS) ||
          !(ctxt.cr0 & X86_CR0_ET) ||
@@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
     v->arch.dr6   = ctxt.dr6;
     v->arch.dr7   = ctxt.dr7;
 
+    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);
+
     hvmemul_cancel(v);
 
     /* Auxiliary processors should be woken immediately. */
@@ -3888,7 +3889,7 @@ enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack)
          !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) )
         return hvm_intblk_rflags_ie;
 
-    intr_shadow = alternative_call(hvm_funcs.get_interrupt_shadow, v);
+    intr_shadow = hvm_get_interrupt_shadow(v);
 
     if ( intr_shadow & (HVM_INTR_SHADOW_STI|HVM_INTR_SHADOW_MOV_SS) )
         return hvm_intblk_shadow;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index c075370f64..e13817431a 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1323,7 +1323,9 @@ static unsigned int cf_check vmx_get_interrupt_shadow(struct vcpu *v)
 {
     unsigned long intr_shadow;
 
+    vmx_vmcs_enter(v);
     __vmread(GUEST_INTERRUPTIBILITY_INFO, &intr_shadow);
+    vmx_vmcs_exit(v);
 
     return intr_shadow;
 }
@@ -1331,7 +1333,9 @@ static unsigned int cf_check vmx_get_interrupt_shadow(struct vcpu *v)
 static void cf_check vmx_set_interrupt_shadow(
     struct vcpu *v, unsigned int intr_shadow)
 {
+    vmx_vmcs_enter(v);
     __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
+    vmx_vmcs_exit(v);
 }
 
 static void vmx_load_pdptrs(struct vcpu *v)
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index 5b7ec0cf69..2fb7865a05 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -720,6 +720,22 @@ static inline int hvm_vmtrace_reset(struct vcpu *v)
     return -EOPNOTSUPP;
 }
 
+static inline unsigned long hvm_get_interrupt_shadow(struct vcpu *v)
+{
+    if ( hvm_funcs.get_interrupt_shadow )
+        return alternative_call(hvm_funcs.get_interrupt_shadow, v);
+
+    return -EOPNOTSUPP;
+}
+
+static inline void
+hvm_set_interrupt_shadow(struct vcpu *v, unsigned long val)
+{
+    if ( hvm_funcs.set_interrupt_shadow )
+        alternative_vcall(hvm_funcs.set_interrupt_shadow, v, val);
+}
+
+
 /*
  * Accessors for registers which have per-guest-type or per-vendor locations
  * (e.g. VMCS, msr load/save lists, VMCB, VMLOAD lazy, etc).
@@ -863,6 +879,16 @@ static inline void hvm_set_reg(struct vcpu *v, unsigned int reg, uint64_t val)
     ASSERT_UNREACHABLE();
 }
 
+static inline unsigned long hvm_get_interrupt_shadow(struct vcpu *v)
+{
+    ASSERT_UNREACHABLE();
+    return 0;
+}
+static inline void hvm_set_interrupt_shadow(struct vcpu *v, unsigned long val)
+{
+    ASSERT_UNREACHABLE();
+}
+
 #define is_viridian_domain(d) ((void)(d), false)
 #define is_viridian_vcpu(v) ((void)(v), false)
 #define has_viridian_time_ref_count(d) ((void)(d), false)
diff --git a/xen/include/public/arch-x86/hvm/save.h b/xen/include/public/arch-x86/hvm/save.h
index 773a380bc2..e944b1756a 100644
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -165,7 +165,8 @@ struct hvm_hw_cpu {
 #define _XEN_X86_FPU_INITIALISED        0
 #define XEN_X86_FPU_INITIALISED         (1U<<_XEN_X86_FPU_INITIALISED)
     uint32_t flags;
-    uint32_t pad0;
+
+    uint32_t interruptibility_info;
 };
 
 struct hvm_hw_cpu_compat {
-- 
2.25.1
Re: [PATCH] x86/hvm: Include interruptibility state in hvm_hw_cpu
Posted by Jan Beulich 2 years, 1 month ago
On 10.03.2022 19:44, Tamas K Lengyel wrote:
> During VM fork resetting a failed vmentry has been observed when the reset
> is performed immediately after a STI instruction executed. This is due to
> the guest interruptibility state in the VMCS being modified by STI but the
> subsequent reset removes the IF bit from FLAGS, causing the failed vmentry.

I first thought this was backwards, but after re-reading a couple of
times I think the issue is merely with you stating this as if this
was what always happens, while it really depends on the state that
the VM is being reset to. I think it would further be helpful if you
made clear that other interruptibility state could also cause issues
when not properly restored. One way to express this would be to
simply insert "e.g." ahead of "a STI instruction".

> @@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
>      v->arch.dr6   = ctxt.dr6;
>      v->arch.dr7   = ctxt.dr7;
>  
> +    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);

Setting reserved bits as well as certain combinations of bits will
cause VM entry to fail. I think it would be nice to report this as
an error here rather than waiting for the VM entry failure.

> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -720,6 +720,22 @@ static inline int hvm_vmtrace_reset(struct vcpu *v)
>      return -EOPNOTSUPP;
>  }
>  
> +static inline unsigned long hvm_get_interrupt_shadow(struct vcpu *v)

unsigned long here and ...

> +{
> +    if ( hvm_funcs.get_interrupt_shadow )
> +        return alternative_call(hvm_funcs.get_interrupt_shadow, v);
> +
> +    return -EOPNOTSUPP;
> +}
> +
> +static inline void
> +hvm_set_interrupt_shadow(struct vcpu *v, unsigned long val)

... here are not in line with the hooks' types. Same for the stubs
further down then.

> +{
> +    if ( hvm_funcs.set_interrupt_shadow )
> +        alternative_vcall(hvm_funcs.set_interrupt_shadow, v, val);
> +}
> +
> +
>  /*

Please don't insert multiple successive blank lines.

Jan
Re: [PATCH] x86/hvm: Include interruptibility state in hvm_hw_cpu
Posted by Tamas K Lengyel 2 years, 1 month ago
On Thu, Mar 17, 2022 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 10.03.2022 19:44, Tamas K Lengyel wrote:
> > During VM fork resetting a failed vmentry has been observed when the reset
> > is performed immediately after a STI instruction executed. This is due to
> > the guest interruptibility state in the VMCS being modified by STI but the
> > subsequent reset removes the IF bit from FLAGS, causing the failed vmentry.
>
> I first thought this was backwards, but after re-reading a couple of
> times I think the issue is merely with you stating this as if this
> was what always happens, while it really depends on the state that
> the VM is being reset to. I think it would further be helpful if you
> made clear that other interruptibility state could also cause issues
> when not properly restored. One way to express this would be to
> simply insert "e.g." ahead of "a STI instruction".

Correct, there could be other instances where the interruptibility
state could go out of sync with RFLAGS, executing STI and then
resetting only the register state to the pre-STI parent is just one I
stumbled into.

>
> > @@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
> >      v->arch.dr6   = ctxt.dr6;
> >      v->arch.dr7   = ctxt.dr7;
> >
> > +    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);
>
> Setting reserved bits as well as certain combinations of bits will
> cause VM entry to fail. I think it would be nice to report this as
> an error here rather than waiting for the VM entry failure.

Not sure if this would be the right spot to do that since that's all
VMX specific and this is the common hvm code.

>
> > --- a/xen/arch/x86/include/asm/hvm/hvm.h
> > +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> > @@ -720,6 +720,22 @@ static inline int hvm_vmtrace_reset(struct vcpu *v)
> >      return -EOPNOTSUPP;
> >  }
> >
> > +static inline unsigned long hvm_get_interrupt_shadow(struct vcpu *v)
>
> unsigned long here and ...
>
> > +{
> > +    if ( hvm_funcs.get_interrupt_shadow )
> > +        return alternative_call(hvm_funcs.get_interrupt_shadow, v);
> > +
> > +    return -EOPNOTSUPP;
> > +}
> > +
> > +static inline void
> > +hvm_set_interrupt_shadow(struct vcpu *v, unsigned long val)
>
> ... here are not in line with the hooks' types. Same for the stubs
> further down then.
>
> > +{
> > +    if ( hvm_funcs.set_interrupt_shadow )
> > +        alternative_vcall(hvm_funcs.set_interrupt_shadow, v, val);
> > +}
> > +
> > +
> >  /*
>
> Please don't insert multiple successive blank lines.

Ack.

Tamas
Re: [PATCH] x86/hvm: Include interruptibility state in hvm_hw_cpu
Posted by Jan Beulich 2 years, 1 month ago
On 17.03.2022 15:43, Tamas K Lengyel wrote:
> On Thu, Mar 17, 2022 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
>> On 10.03.2022 19:44, Tamas K Lengyel wrote:
>>> @@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
>>>      v->arch.dr6   = ctxt.dr6;
>>>      v->arch.dr7   = ctxt.dr7;
>>>
>>> +    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);
>>
>> Setting reserved bits as well as certain combinations of bits will
>> cause VM entry to fail. I think it would be nice to report this as
>> an error here rather than waiting for the VM entry failure.
> 
> Not sure if this would be the right spot to do that since that's all
> VMX specific and this is the common hvm code.

Well, it would be the VMX hook to do the checking, with an error
propagated back here.

Jan
Re: [PATCH] x86/hvm: Include interruptibility state in hvm_hw_cpu
Posted by Tamas K Lengyel 2 years, 1 month ago
On Thu, Mar 17, 2022 at 11:06 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 17.03.2022 15:43, Tamas K Lengyel wrote:
> > On Thu, Mar 17, 2022 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
> >> On 10.03.2022 19:44, Tamas K Lengyel wrote:
> >>> @@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
> >>>      v->arch.dr6   = ctxt.dr6;
> >>>      v->arch.dr7   = ctxt.dr7;
> >>>
> >>> +    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);
> >>
> >> Setting reserved bits as well as certain combinations of bits will
> >> cause VM entry to fail. I think it would be nice to report this as
> >> an error here rather than waiting for the VM entry failure.
> >
> > Not sure if this would be the right spot to do that since that's all
> > VMX specific and this is the common hvm code.
>
> Well, it would be the VMX hook to do the checking, with an error
> propagated back here.

I'm actually against it because the overhead of that error-checking
during vm forking would be significant with really no benefit. We are
copying the state from the parent where it was running fine before, so
doing that sanity checking thousands of times per second when we
already know its fine is bad.

Tamas
Re: [PATCH] x86/hvm: Include interruptibility state in hvm_hw_cpu
Posted by Jan Beulich 2 years, 1 month ago
On 17.03.2022 16:59, Tamas K Lengyel wrote:
> On Thu, Mar 17, 2022 at 11:06 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 17.03.2022 15:43, Tamas K Lengyel wrote:
>>> On Thu, Mar 17, 2022 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>> On 10.03.2022 19:44, Tamas K Lengyel wrote:
>>>>> @@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
>>>>>      v->arch.dr6   = ctxt.dr6;
>>>>>      v->arch.dr7   = ctxt.dr7;
>>>>>
>>>>> +    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);
>>>>
>>>> Setting reserved bits as well as certain combinations of bits will
>>>> cause VM entry to fail. I think it would be nice to report this as
>>>> an error here rather than waiting for the VM entry failure.
>>>
>>> Not sure if this would be the right spot to do that since that's all
>>> VMX specific and this is the common hvm code.
>>
>> Well, it would be the VMX hook to do the checking, with an error
>> propagated back here.
> 
> I'm actually against it because the overhead of that error-checking
> during vm forking would be significant with really no benefit. We are
> copying the state from the parent where it was running fine before, so
> doing that sanity checking thousands of times per second when we
> already know its fine is bad.

I can see your point, but my concern is not forking but normal migration
or restoring of guests, where the incoming data is of effectively
unknown origin.

Jan
Re: [PATCH] x86/hvm: Include interruptibility state in hvm_hw_cpu
Posted by Tamas K Lengyel 2 years, 1 month ago
On Thu, Mar 17, 2022 at 12:07 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 17.03.2022 16:59, Tamas K Lengyel wrote:
> > On Thu, Mar 17, 2022 at 11:06 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 17.03.2022 15:43, Tamas K Lengyel wrote:
> >>> On Thu, Mar 17, 2022 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>> On 10.03.2022 19:44, Tamas K Lengyel wrote:
> >>>>> @@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
> >>>>>      v->arch.dr6   = ctxt.dr6;
> >>>>>      v->arch.dr7   = ctxt.dr7;
> >>>>>
> >>>>> +    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);
> >>>>
> >>>> Setting reserved bits as well as certain combinations of bits will
> >>>> cause VM entry to fail. I think it would be nice to report this as
> >>>> an error here rather than waiting for the VM entry failure.
> >>>
> >>> Not sure if this would be the right spot to do that since that's all
> >>> VMX specific and this is the common hvm code.
> >>
> >> Well, it would be the VMX hook to do the checking, with an error
> >> propagated back here.
> >
> > I'm actually against it because the overhead of that error-checking
> > during vm forking would be significant with really no benefit. We are
> > copying the state from the parent where it was running fine before, so
> > doing that sanity checking thousands of times per second when we
> > already know its fine is bad.
>
> I can see your point, but my concern is not forking but normal migration
> or restoring of guests, where the incoming data is of effectively
> unknown origin.

IMHO for that route the error checking is better performed at the
toolstack level that sends the data to Xen.

Tamas