x86/svm: Fold nsvm_{wr, rd}msr() into svm_msr_{read, write}_intercept()

[PATCH] x86/svm: Fold nsvm_{wr, rd}msr() into svm_msr_{read, write}_intercept()

Posted by Andrew Cooper 3 years, 9 months ago

... to simplify the default cases.

There are multiple errors with the handling of these three MSRs, but they are
deliberately not addressed this point.

This removes the dance converting -1/0/1 into X86EMUL_*, allowing for the
removal of the 'ret' variable.

While cleaning this up, drop the gdprintk()'s for #GP conditions, and the
'result' variable from svm_msr_write_intercept() is it is never modified.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wl@xen.org>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/hvm/svm/nestedsvm.c        | 77 ---------------------------------
 xen/arch/x86/hvm/svm/svm.c              | 46 +++++++++++++-------
 xen/include/asm-x86/hvm/svm/nestedsvm.h |  4 --
 3 files changed, 31 insertions(+), 96 deletions(-)

diff --git a/xen/arch/x86/hvm/svm/nestedsvm.c b/xen/arch/x86/hvm/svm/nestedsvm.c
index 11dc9c089c..a193d9de45 100644
--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -47,22 +47,6 @@ nestedsvm_vcpu_stgi(struct vcpu *v)
     local_event_delivery_enable(); /* unmask events for PV drivers */
 }
 
-static int
-nestedsvm_vmcb_isvalid(struct vcpu *v, uint64_t vmcxaddr)
-{
-    /* Address must be 4k aligned */
-    if ( (vmcxaddr & ~PAGE_MASK) != 0 )
-        return 0;
-
-    /* Maximum valid physical address.
-     * See AMD BKDG for HSAVE_PA MSR.
-     */
-    if ( vmcxaddr > 0xfd00000000ULL )
-        return 0;
-
-    return 1;
-}
-
 int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr)
 {
     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
@@ -1263,67 +1247,6 @@ enum hvm_intblk nsvm_intr_blocked(struct vcpu *v)
     return hvm_intblk_none;
 }
 
-/* MSR handling */
-int nsvm_rdmsr(struct vcpu *v, unsigned int msr, uint64_t *msr_content)
-{
-    struct nestedsvm *svm = &vcpu_nestedsvm(v);
-    int ret = 1;
-
-    *msr_content = 0;
-
-    switch (msr) {
-    case MSR_K8_VM_CR:
-        break;
-    case MSR_K8_VM_HSAVE_PA:
-        *msr_content = svm->ns_msr_hsavepa;
-        break;
-    case MSR_AMD64_TSC_RATIO:
-        *msr_content = svm->ns_tscratio;
-        break;
-    default:
-        ret = 0;
-        break;
-    }
-
-    return ret;
-}
-
-int nsvm_wrmsr(struct vcpu *v, unsigned int msr, uint64_t msr_content)
-{
-    int ret = 1;
-    struct nestedsvm *svm = &vcpu_nestedsvm(v);
-
-    switch (msr) {
-    case MSR_K8_VM_CR:
-        /* ignore write. handle all bits as read-only. */
-        break;
-    case MSR_K8_VM_HSAVE_PA:
-        if (!nestedsvm_vmcb_isvalid(v, msr_content)) {
-            gdprintk(XENLOG_ERR,
-                "MSR_K8_VM_HSAVE_PA value invalid %#"PRIx64"\n", msr_content);
-            ret = -1; /* inject #GP */
-            break;
-        }
-        svm->ns_msr_hsavepa = msr_content;
-        break;
-    case MSR_AMD64_TSC_RATIO:
-        if ((msr_content & ~TSC_RATIO_RSVD_BITS) != msr_content) {
-            gdprintk(XENLOG_ERR,
-                "reserved bits set in MSR_AMD64_TSC_RATIO %#"PRIx64"\n",
-                msr_content);
-            ret = -1; /* inject #GP */
-            break;
-        }
-        svm->ns_tscratio = msr_content;
-        break;
-    default:
-        ret = 0;
-        break;
-    }
-
-    return ret;
-}
-
 /* VMEXIT emulation */
 void
 nestedsvm_vmexit_defer(struct vcpu *v,
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 4eb41792e2..bbe73744b8 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1788,10 +1788,10 @@ static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
 
 static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
 {
-    int ret;
     struct vcpu *v = current;
     const struct domain *d = v->domain;
     struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb;
+    const struct nestedsvm *nsvm = &vcpu_nestedsvm(v);
 
     switch ( msr )
     {
@@ -1914,6 +1914,18 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
             goto gpf;
         break;
 
+    case MSR_K8_VM_CR:
+        *msr_content = 0;
+        break;
+
+    case MSR_K8_VM_HSAVE_PA:
+        *msr_content = nsvm->ns_msr_hsavepa;
+        break;
+
+    case MSR_AMD64_TSC_RATIO:
+        *msr_content = nsvm->ns_tscratio;
+        break;
+
     case MSR_AMD_OSVW_ID_LENGTH:
     case MSR_AMD_OSVW_STATUS:
         if ( !d->arch.cpuid->extd.osvw )
@@ -1922,12 +1934,6 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
         break;
 
     default:
-        ret = nsvm_rdmsr(v, msr, msr_content);
-        if ( ret < 0 )
-            goto gpf;
-        else if ( ret )
-            break;
-
         if ( rdmsr_safe(msr, *msr_content) == 0 )
             break;
 
@@ -1956,10 +1962,10 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
 
 static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
 {
-    int ret, result = X86EMUL_OKAY;
     struct vcpu *v = current;
     struct domain *d = v->domain;
     struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb;
+    struct nestedsvm *nsvm = &vcpu_nestedsvm(v);
 
     switch ( msr )
     {
@@ -2085,6 +2091,22 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
             goto gpf;
         break;
 
+    case MSR_K8_VM_CR:
+        /* ignore write. handle all bits as read-only. */
+        break;
+
+    case MSR_K8_VM_HSAVE_PA:
+        if ( (msr_content & ~PAGE_MASK) || msr_content > 0xfd00000000ULL )
+            goto gpf;
+        nsvm->ns_msr_hsavepa = msr_content;
+        break;
+
+    case MSR_AMD64_TSC_RATIO:
+        if ( msr_content & TSC_RATIO_RSVD_BITS )
+            goto gpf;
+        nsvm->ns_tscratio = msr_content;
+        break;
+
     case MSR_IA32_MCx_MISC(4): /* Threshold register */
     case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
         /*
@@ -2102,12 +2124,6 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
         break;
 
     default:
-        ret = nsvm_wrmsr(v, msr, msr_content);
-        if ( ret < 0 )
-            goto gpf;
-        else if ( ret )
-            break;
-
         /* Match up with the RDMSR side; ultimately this should go away. */
         if ( rdmsr_safe(msr, msr_content) == 0 )
             break;
@@ -2115,7 +2131,7 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
         goto gpf;
     }
 
-    return result;
+    return X86EMUL_OKAY;
 
  gpf:
     return X86EMUL_EXCEPTION;
diff --git a/xen/include/asm-x86/hvm/svm/nestedsvm.h b/xen/include/asm-x86/hvm/svm/nestedsvm.h
index 31fb4bfeb4..0873698457 100644
--- a/xen/include/asm-x86/hvm/svm/nestedsvm.h
+++ b/xen/include/asm-x86/hvm/svm/nestedsvm.h
@@ -118,10 +118,6 @@ bool_t nsvm_vmcb_guest_intercepts_event(
 bool_t nsvm_vmcb_hap_enabled(struct vcpu *v);
 enum hvm_intblk nsvm_intr_blocked(struct vcpu *v);
 
-/* MSRs */
-int nsvm_rdmsr(struct vcpu *v, unsigned int msr, uint64_t *msr_content);
-int nsvm_wrmsr(struct vcpu *v, unsigned int msr, uint64_t msr_content);
-
 /* Interrupts, vGIF */
 void svm_vmexit_do_clgi(struct cpu_user_regs *regs, struct vcpu *v);
 void svm_vmexit_do_stgi(struct cpu_user_regs *regs, struct vcpu *v);
-- 
2.11.0

Re: [PATCH] x86/svm: Fold nsvm_{wr,rd}msr() into svm_msr_{read,write}_intercept()

Posted by Roger Pau Monné 3 years, 9 months ago

On Tue, Jul 21, 2020 at 06:22:08PM +0100, Andrew Cooper wrote:
> ... to simplify the default cases.
> 
> There are multiple errors with the handling of these three MSRs, but they are
> deliberately not addressed this point.
                            ^ at
> 
> This removes the dance converting -1/0/1 into X86EMUL_*, allowing for the
> removal of the 'ret' variable.
> 
> While cleaning this up, drop the gdprintk()'s for #GP conditions, and the
> 'result' variable from svm_msr_write_intercept() is it is never modified.
                                                   ^ extra is
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

I've got one question (not really patch related).

> @@ -1956,10 +1962,10 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
>  
>  static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
>  {
> -    int ret, result = X86EMUL_OKAY;
>      struct vcpu *v = current;
>      struct domain *d = v->domain;
>      struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb;
> +    struct nestedsvm *nsvm = &vcpu_nestedsvm(v);
>  
>      switch ( msr )
>      {
> @@ -2085,6 +2091,22 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
>              goto gpf;
>          break;
>  
> +    case MSR_K8_VM_CR:
> +        /* ignore write. handle all bits as read-only. */
> +        break;
> +
> +    case MSR_K8_VM_HSAVE_PA:
> +        if ( (msr_content & ~PAGE_MASK) || msr_content > 0xfd00000000ULL )

Regarding the address check, the PM states "the maximum supported
physical address for this implementation", but I don't seem to be able
to find where is this actually announced.

Roger.

Re: [PATCH] x86/svm: Fold nsvm_{wr,rd}msr() into svm_msr_{read,write}_intercept()

Posted by Jan Beulich 3 years, 9 months ago

On 22.07.2020 11:26, Roger Pau Monné wrote:
> On Tue, Jul 21, 2020 at 06:22:08PM +0100, Andrew Cooper wrote:
>> @@ -2085,6 +2091,22 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
>>              goto gpf;
>>          break;
>>  
>> +    case MSR_K8_VM_CR:
>> +        /* ignore write. handle all bits as read-only. */
>> +        break;
>> +
>> +    case MSR_K8_VM_HSAVE_PA:
>> +        if ( (msr_content & ~PAGE_MASK) || msr_content > 0xfd00000000ULL )
> 
> Regarding the address check, the PM states "the maximum supported
> physical address for this implementation", but I don't seem to be able
> to find where is this actually announced.

I think you'd typically find this information in the BKDG or PPR only.
The PM is generic, while the named two are specific to particular
families or even just models.

Jan

Re: [PATCH] x86/svm: Fold nsvm_{wr,rd}msr() into svm_msr_{read,write}_intercept()

Posted by Andrew Cooper 3 years, 9 months ago

On 22/07/2020 10:34, Jan Beulich wrote:
> On 22.07.2020 11:26, Roger Pau Monné wrote:
>> On Tue, Jul 21, 2020 at 06:22:08PM +0100, Andrew Cooper wrote:
>>> @@ -2085,6 +2091,22 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
>>>              goto gpf;
>>>          break;
>>>  
>>> +    case MSR_K8_VM_CR:
>>> +        /* ignore write. handle all bits as read-only. */
>>> +        break;
>>> +
>>> +    case MSR_K8_VM_HSAVE_PA:
>>> +        if ( (msr_content & ~PAGE_MASK) || msr_content > 0xfd00000000ULL )
>> Regarding the address check, the PM states "the maximum supported
>> physical address for this implementation", but I don't seem to be able
>> to find where is this actually announced.
> I think you'd typically find this information in the BKDG or PPR only.
> The PM is generic, while the named two are specific to particular
> families or even just models.

Furthermore, the BKDG/PPR's are misleading/wrong.

For pre Fam17h, it is MAXPHYSADDR - 12G, which gives a limit lower than
0xfd00000000 on various SoC and embedded platforms.

On Fam17h, it is also lowered dynamically by how much memory encryption
is turned on (and therefore steals bits from the upper end of MAXPHYSADDR).

However, neither of these points are relevant in the slightest to
nested-svm because we don't ever map the HyperTransport range into
guests to start with - we'd get #PF[Rsvd] if we ever tried to use these
mappings.

Last time I presented this patch (nearly 2 years ago, and in the middle
of a series), it got very bogged down in a swamp of nested virt work,
which is why this time I've gone for no functional change, and punting
all the nested virt work to some future point where I've got time to
deal with it, and its not blocking the improvement wanted here.

~Andrew

Re: [PATCH] x86/svm: Fold nsvm_{wr,rd}msr() into svm_msr_{read,write}_intercept()

Posted by Jan Beulich 3 years, 9 months ago

On 21.07.2020 19:22, Andrew Cooper wrote:
> ... to simplify the default cases.
> 
> There are multiple errors with the handling of these three MSRs, but they are
> deliberately not addressed this point.
> 
> This removes the dance converting -1/0/1 into X86EMUL_*, allowing for the
> removal of the 'ret' variable.
> 
> While cleaning this up, drop the gdprintk()'s for #GP conditions, and the
> 'result' variable from svm_msr_write_intercept() is it is never modified.
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>

However, ...

> @@ -2085,6 +2091,22 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
>              goto gpf;
>          break;
>  
> +    case MSR_K8_VM_CR:
> +        /* ignore write. handle all bits as read-only. */
> +        break;
> +
> +    case MSR_K8_VM_HSAVE_PA:
> +        if ( (msr_content & ~PAGE_MASK) || msr_content > 0xfd00000000ULL )

... while you're moving this code here, wouldn't it be worthwhile
to at least fix the > to be >= ?

Jan

Re: [PATCH] x86/svm: Fold nsvm_{wr,rd}msr() into svm_msr_{read,write}_intercept()

Posted by Andrew Cooper 3 years, 9 months ago

On 22/07/2020 10:16, Jan Beulich wrote:
> On 21.07.2020 19:22, Andrew Cooper wrote:
>> ... to simplify the default cases.
>>
>> There are multiple errors with the handling of these three MSRs, but they are
>> deliberately not addressed this point.
>>
>> This removes the dance converting -1/0/1 into X86EMUL_*, allowing for the
>> removal of the 'ret' variable.
>>
>> While cleaning this up, drop the gdprintk()'s for #GP conditions, and the
>> 'result' variable from svm_msr_write_intercept() is it is never modified.
>>
>> No functional change.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>
> However, ...
>
>> @@ -2085,6 +2091,22 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
>>              goto gpf;
>>          break;
>>  
>> +    case MSR_K8_VM_CR:
>> +        /* ignore write. handle all bits as read-only. */
>> +        break;
>> +
>> +    case MSR_K8_VM_HSAVE_PA:
>> +        if ( (msr_content & ~PAGE_MASK) || msr_content > 0xfd00000000ULL )
> ... while you're moving this code here, wouldn't it be worthwhile
> to at least fix the > to be >= ?

I'd prefer not to, to avoid breaking the "No Functional Change" aspect.

In reality, this needs to be a path which takes an extra ref on the
nominated frame and globally maps it, seeing as we memcpy to/from it on
every virtual vmentry/exit.  The check against the HT range is quite bogus.

~Andrew

Re: [PATCH] x86/svm: Fold nsvm_{wr,rd}msr() into svm_msr_{read,write}_intercept()

Posted by Jan Beulich 3 years, 9 months ago

On 22.07.2020 12:10, Andrew Cooper wrote:
> On 22/07/2020 10:16, Jan Beulich wrote:
>> On 21.07.2020 19:22, Andrew Cooper wrote:
>>> ... to simplify the default cases.
>>>
>>> There are multiple errors with the handling of these three MSRs, but they are
>>> deliberately not addressed this point.
>>>
>>> This removes the dance converting -1/0/1 into X86EMUL_*, allowing for the
>>> removal of the 'ret' variable.
>>>
>>> While cleaning this up, drop the gdprintk()'s for #GP conditions, and the
>>> 'result' variable from svm_msr_write_intercept() is it is never modified.
>>>
>>> No functional change.
>>>
>>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>>
>> However, ...
>>
>>> @@ -2085,6 +2091,22 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
>>>              goto gpf;
>>>          break;
>>>  
>>> +    case MSR_K8_VM_CR:
>>> +        /* ignore write. handle all bits as read-only. */
>>> +        break;
>>> +
>>> +    case MSR_K8_VM_HSAVE_PA:
>>> +        if ( (msr_content & ~PAGE_MASK) || msr_content > 0xfd00000000ULL )
>> ... while you're moving this code here, wouldn't it be worthwhile
>> to at least fix the > to be >= ?
> 
> I'd prefer not to, to avoid breaking the "No Functional Change" aspect.

Well, so be it then.

> In reality, this needs to be a path which takes an extra ref on the
> nominated frame and globally maps it, seeing as we memcpy to/from it on
> every virtual vmentry/exit.  The check against the HT range is quite bogus.

I agree; I merely found the > so very obviously off-by-one that I
thought I'd at least inquire.

Jan