[PATCH] x86/msr: Unify the real {rd, wr}msr() paths in guest_{rd, wr}msr()

Andrew Cooper posted 1 patch 2 weeks ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/xen tags/patchew/20200722105529.12177-1-andrew.cooper3@citrix.com
xen/arch/x86/msr.c | 54 ++++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 42 insertions(+), 12 deletions(-)

[PATCH] x86/msr: Unify the real {rd, wr}msr() paths in guest_{rd, wr}msr()

Posted by Andrew Cooper 2 weeks ago
Both the read and write side have commonalities which can be abstracted away.
This also allows for additional safety in release builds, and slightly more
helpful diagnostics in debug builds.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wl@xen.org>
CC: Roger Pau Monné <roger.pau@citrix.com>

I'm not a massive fan of the global scope want_rdmsr_safe boolean, but I can't
think of a reasonable way to fix it without starting to use other
flexibiltiies offered to us by C99.  (And to preempt the other question, an
extra set of braces makes extremely confusing to read logic.)
---
 xen/arch/x86/msr.c | 54 ++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
index 22f921cc71..68f3aadeab 100644
--- a/xen/arch/x86/msr.c
+++ b/xen/arch/x86/msr.c
@@ -154,6 +154,7 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
     const struct cpuid_policy *cp = d->arch.cpuid;
     const struct msr_policy *mp = d->arch.msr;
     const struct vcpu_msrs *msrs = v->arch.msrs;
+    bool want_rdmsr_safe = false;
     int ret = X86EMUL_OKAY;
 
     switch ( msr )
@@ -204,10 +205,9 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
          */
         if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_AMD)) ||
              !(boot_cpu_data.x86_vendor &
-               (X86_VENDOR_INTEL | X86_VENDOR_AMD)) ||
-             rdmsr_safe(MSR_AMD_PATCHLEVEL, *val) )
+               (X86_VENDOR_INTEL | X86_VENDOR_AMD)) )
             goto gp_fault;
-        break;
+        goto read_from_hw_safe;
 
     case MSR_SPEC_CTRL:
         if ( !cp->feat.ibrsb )
@@ -278,7 +278,7 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
          */
 #ifdef CONFIG_HVM
         if ( v == current && is_hvm_domain(d) && v->arch.hvm.flag_dr_dirty )
-            rdmsrl(msr, *val);
+            goto read_from_hw;
         else
 #endif
             *val = msrs->dr_mask[
@@ -303,6 +303,23 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
 
     return ret;
 
+ read_from_hw_safe:
+    want_rdmsr_safe = true;
+ read_from_hw:
+    if ( !rdmsr_safe(msr, *val) )
+        return X86EMUL_OKAY;
+
+    /*
+     * Paths which didn't want rdmsr_safe() and get here took a #GP fault.
+     * Something is broken with the above logic, so make it obvious in debug
+     * builds, and fail safe by handing #GP back to guests in release builds.
+     */
+    if ( !want_rdmsr_safe )
+    {
+        gprintk(XENLOG_ERR, "Bad rdmsr %#x\n", msr);
+        ASSERT_UNREACHABLE();
+    }
+
  gp_fault:
     return X86EMUL_EXCEPTION;
 }
@@ -402,9 +419,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
         if ( val & ~PRED_CMD_IBPB )
             goto gp_fault; /* Rsvd bit set? */
 
-        if ( v == curr )
-            wrmsrl(MSR_PRED_CMD, val);
-        break;
+        goto maybe_write_to_hw;
 
     case MSR_FLUSH_CMD:
         if ( !cp->feat.l1d_flush )
@@ -413,9 +428,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
         if ( val & ~FLUSH_CMD_L1D )
             goto gp_fault; /* Rsvd bit set? */
 
-        if ( v == curr )
-            wrmsrl(MSR_FLUSH_CMD, val);
-        break;
+        goto maybe_write_to_hw;
 
     case MSR_INTEL_MISC_FEATURES_ENABLES:
     {
@@ -493,8 +506,8 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
                                ? 0 : (msr - MSR_AMD64_DR1_ADDRESS_MASK + 1),
                                ARRAY_SIZE(msrs->dr_mask))] = val;
 
-        if ( v == curr && (curr->arch.dr7 & DR7_ACTIVE_MASK) )
-            wrmsrl(msr, val);
+        if ( curr->arch.dr7 & DR7_ACTIVE_MASK )
+            goto maybe_write_to_hw;
         break;
 
     default:
@@ -509,6 +522,23 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
 
     return ret;
 
+ maybe_write_to_hw:
+    /*
+     * All paths potentially updating a value in hardware need to check
+     * whether the call is in current context or not, so the logic is
+     * implemented here.  Remote context need do nothing more.
+     */
+    if ( v != curr || !wrmsr_safe(msr, val) )
+        return X86EMUL_OKAY;
+
+    /*
+     * Paths which end up here took a #GP fault in wrmsr_safe().  Something is
+     * broken with the logic above, so make it obvious in debug builds, and
+     * fail safe by handing #GP back to the guests in release builds.
+     */
+    gprintk(XENLOG_ERR, "Bad wrmsr %#x val %016"PRIx64"\n", msr, val);
+    ASSERT_UNREACHABLE();
+
  gp_fault:
     return X86EMUL_EXCEPTION;
 }
-- 
2.11.0


Re: [PATCH] x86/msr: Unify the real {rd,wr}msr() paths in guest_{rd,wr}msr()

Posted by Roger Pau Monné 2 weeks ago
On Wed, Jul 22, 2020 at 11:55:29AM +0100, Andrew Cooper wrote:
> Both the read and write side have commonalities which can be abstracted away.
> This also allows for additional safety in release builds, and slightly more
> helpful diagnostics in debug builds.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Wei Liu <wl@xen.org>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> 
> I'm not a massive fan of the global scope want_rdmsr_safe boolean, but I can't
> think of a reasonable way to fix it without starting to use other
> flexibiltiies offered to us by C99.  (And to preempt the other question, an
> extra set of braces makes extremely confusing to read logic.)

The logic could be moved to a helper that takes a expected_safe or
some such parameter, but I think I prefer this approach.

Thanks, Roger.

Re: [PATCH] x86/msr: Unify the real {rd,wr}msr() paths in guest_{rd,wr}msr()

Posted by Jan Beulich 2 weeks ago
On 22.07.2020 12:55, Andrew Cooper wrote:
> Both the read and write side have commonalities which can be abstracted away.
> This also allows for additional safety in release builds, and slightly more
> helpful diagnostics in debug builds.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Wei Liu <wl@xen.org>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> 
> I'm not a massive fan of the global scope want_rdmsr_safe boolean, but I can't
> think of a reasonable way to fix it without starting to use other
> flexibiltiies offered to us by C99.

I can't seem to be able to guess what C99 feature(s) you mean.
If there are any that would help, why not use them?

> @@ -204,10 +205,9 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>           */
>          if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_AMD)) ||
>               !(boot_cpu_data.x86_vendor &
> -               (X86_VENDOR_INTEL | X86_VENDOR_AMD)) ||
> -             rdmsr_safe(MSR_AMD_PATCHLEVEL, *val) )
> +               (X86_VENDOR_INTEL | X86_VENDOR_AMD)) )
>              goto gp_fault;
> -        break;
> +        goto read_from_hw_safe;

Above from here is a read from MSR_IA32_PLATFORM_ID - any reason
it doesn't also get folded?

> @@ -278,7 +278,7 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>           */
>  #ifdef CONFIG_HVM
>          if ( v == current && is_hvm_domain(d) && v->arch.hvm.flag_dr_dirty )
> -            rdmsrl(msr, *val);
> +            goto read_from_hw;

In the write path you also abstract out the check for v being current.
Wouldn't this better be abstracted out here, too, as reading an actual
MSR when not current isn't generally very helpful?

> @@ -493,8 +506,8 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
>                                 ? 0 : (msr - MSR_AMD64_DR1_ADDRESS_MASK + 1),
>                                 ARRAY_SIZE(msrs->dr_mask))] = val;
>  
> -        if ( v == curr && (curr->arch.dr7 & DR7_ACTIVE_MASK) )
> -            wrmsrl(msr, val);
> +        if ( curr->arch.dr7 & DR7_ACTIVE_MASK )
> +            goto maybe_write_to_hw;
>          break;

I have to admit that I'd find it more logical if v was now used
here instead of curr.

> @@ -509,6 +522,23 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
>  
>      return ret;
>  
> + maybe_write_to_hw:
> +    /*
> +     * All paths potentially updating a value in hardware need to check
> +     * whether the call is in current context or not, so the logic is
> +     * implemented here.  Remote context need do nothing more.
> +     */
> +    if ( v != curr || !wrmsr_safe(msr, val) )
> +        return X86EMUL_OKAY;
> +
> +    /*
> +     * Paths which end up here took a #GP fault in wrmsr_safe().  Something is
> +     * broken with the logic above, so make it obvious in debug builds, and
> +     * fail safe by handing #GP back to the guests in release builds.
> +     */
> +    gprintk(XENLOG_ERR, "Bad wrmsr %#x val %016"PRIx64"\n", msr, val);

Didn't you indicate more than once that you dislike mixing 0x-
prefixed and non-prefixed hex values in a single message?
(Personally I'd simple drop the #, but I expect you to prefer it
the other way around.)

Also both here and in the read path I'm unconvinced of the
"by handing #GP back" wording: When v != curr, no #GP fault can
typically be handed anywhere. And even when v == curr it's still
up to the caller to decide what to do. IOW how about "by
suggesting to hand back #GP" or some such?

Jan

Re: [PATCH] x86/msr: Unify the real {rd,wr}msr() paths in guest_{rd,wr}msr()

Posted by Andrew Cooper 2 weeks ago
On 23/07/2020 12:15, Jan Beulich wrote:
> On 22.07.2020 12:55, Andrew Cooper wrote:
>> Both the read and write side have commonalities which can be abstracted away.
>> This also allows for additional safety in release builds, and slightly more
>> helpful diagnostics in debug builds.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> ---
>> CC: Jan Beulich <JBeulich@suse.com>
>> CC: Wei Liu <wl@xen.org>
>> CC: Roger Pau Monné <roger.pau@citrix.com>
>>
>> I'm not a massive fan of the global scope want_rdmsr_safe boolean, but I can't
>> think of a reasonable way to fix it without starting to use other
>> flexibiltiies offered to us by C99.
> I can't seem to be able to guess what C99 feature(s) you mean.
> If there are any that would help, why not use them?

This hunk:

@@ -154,7 +154,6 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr,
uint64_t *val)
     const struct cpuid_policy *cp = d->arch.cpuid;
     const struct msr_policy *mp = d->arch.msr;
     const struct vcpu_msrs *msrs = v->arch.msrs;
-    bool want_rdmsr_safe = false;
     int ret = X86EMUL_OKAY;
 
     switch ( msr )
@@ -303,6 +302,8 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr,
uint64_t *val)
 
     return ret;
 
+    bool want_rdmsr_safe = false;
+
  read_from_hw_safe:
     want_rdmsr_safe = true;
  read_from_hw:


Except that in our root Config.mk, we pass $(call
cc-option-add,CFLAGS,CC,-Wdeclaration-after-statement)  (and then
various bits of tools/ override to -Wno-declaration-after-statement).

Perhaps this is something we want to generally permit across our
codebase, seeing as some pieces already depend on it.

>> @@ -204,10 +205,9 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>>           */
>>          if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_AMD)) ||
>>               !(boot_cpu_data.x86_vendor &
>> -               (X86_VENDOR_INTEL | X86_VENDOR_AMD)) ||
>> -             rdmsr_safe(MSR_AMD_PATCHLEVEL, *val) )
>> +               (X86_VENDOR_INTEL | X86_VENDOR_AMD)) )
>>              goto gp_fault;
>> -        break;
>> +        goto read_from_hw_safe;
> Above from here is a read from MSR_IA32_PLATFORM_ID - any reason
> it doesn't also get folded?

Oh - looks to be a rebasing error.  This patch is actually more than a
year old at this point.

>> @@ -278,7 +278,7 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>>           */
>>  #ifdef CONFIG_HVM
>>          if ( v == current && is_hvm_domain(d) && v->arch.hvm.flag_dr_dirty )
>> -            rdmsrl(msr, *val);
>> +            goto read_from_hw;
> In the write path you also abstract out the check for v being current.
> Wouldn't this better be abstracted out here, too, as reading an actual
> MSR when not current isn't generally very helpful?

This is rather complicated to answer.

In the example of PLATFORM_ID above, which is consistent across the
entire system, and therefore it doesn't matter if we read it in
non-current context.

More generally however, the read and write paths truly are asymmetric
when it comes to their use in remote context.  Read is "I need this
value now", so always has to be of the form "if current do one thing,
else read from struct vcpu", whereas write is "always update struct
vcpu/etc, and let context switch handle getting it into hardware".


Then again, the more I think about this, the more I'm unsure if either
of the approaches here is ideal.

I think what this is going to need to morph into is a
get_reg()/set_reg() pair of helpers, which are first split between PV
and HVM, and then has further vmx/svm logic.  We're gaining an
increasing number of registers which might be RAM only (things emulated
for PV), or might be in the VMCB/VMCS (some even depending on hardware
generation), or might be in the MSR load lists (Intel Only) or might be
actually in hardware, or stale in hardware (VMLOAD/VMSAVE), and these
positions might vary on a per-VM or per context basis, and when we
finally get on to nested virt, might vary based on the settings of the
L1 hypervisor.

I'm wondering whether I should in fact withdraw this patch, and wait
until we've implemented guest_{rd,wr}msr() for some of the more
interesting MSRs, and see how the logic looks at that point.

>> @@ -493,8 +506,8 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
>>                                 ? 0 : (msr - MSR_AMD64_DR1_ADDRESS_MASK + 1),
>>                                 ARRAY_SIZE(msrs->dr_mask))] = val;
>>  
>> -        if ( v == curr && (curr->arch.dr7 & DR7_ACTIVE_MASK) )
>> -            wrmsrl(msr, val);
>> +        if ( curr->arch.dr7 & DR7_ACTIVE_MASK )
>> +            goto maybe_write_to_hw;
>>          break;
> I have to admit that I'd find it more logical if v was now used
> here instead of curr.

Hmm true.

>
>> @@ -509,6 +522,23 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
>>  
>>      return ret;
>>  
>> + maybe_write_to_hw:
>> +    /*
>> +     * All paths potentially updating a value in hardware need to check
>> +     * whether the call is in current context or not, so the logic is
>> +     * implemented here.  Remote context need do nothing more.
>> +     */
>> +    if ( v != curr || !wrmsr_safe(msr, val) )
>> +        return X86EMUL_OKAY;
>> +
>> +    /*
>> +     * Paths which end up here took a #GP fault in wrmsr_safe().  Something is
>> +     * broken with the logic above, so make it obvious in debug builds, and
>> +     * fail safe by handing #GP back to the guests in release builds.
>> +     */
>> +    gprintk(XENLOG_ERR, "Bad wrmsr %#x val %016"PRIx64"\n", msr, val);
> Didn't you indicate more than once that you dislike mixing 0x-
> prefixed and non-prefixed hex values in a single message?

Yes - my mistake.

> (Personally I'd simple drop the #, but I expect you to prefer it
> the other way around.)

In this case, I'm not overly fussed about the 0x.  It is clear from
context (WRMSR in the message, and the two numbers of exact width) that
we're using only hex.

> Also both here and in the read path I'm unconvinced of the
> "by handing #GP back" wording: When v != curr, no #GP fault can
> typically be handed anywhere. And even when v == curr it's still
> up to the caller to decide what to do. IOW how about "by
> suggesting to hand back #GP" or some such?

The overwhelming majority usecase is in current context, so I suppose it
is mostly true.

For the remote usecase, if this were to go wrong, something on the
context switch path would explode.

~Andrew