[v2] libs/guest: new CPUID/MSR interface

[PATCH v2 15/21] libs/guest: obtain a compatible cpu policy from two input ones

Posted by Roger Pau Monne 4 years, 10 months ago

Introduce a helper to obtain a compatible cpu policy based on two
input cpu policies. Currently this is done by and'ing all CPUID leaves
and MSR entries, except for MSR_ARCH_CAPABILITIES which has the RSBA
bit or'ed.

The _AC macro is pulled from libxl_internal.h into xen-tools/libs.h
since it's required in order to use the msr-index.h header.

Note there's no need to place this helper in libx86, since the
calculation of a compatible policy shouldn't be done from the
hypervisor.

No callers of the interface introduced.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v1:
 - Only AND the feature parts of cpuid.
 - Use a binary search to find the matching leaves and msr entries.
 - Remove default case from MSR level function.
---
 tools/include/xen-tools/libs.h    |   5 ++
 tools/include/xenctrl.h           |   4 +
 tools/libs/guest/xg_cpuid_x86.c   | 128 ++++++++++++++++++++++++++++++
 tools/libs/light/libxl_internal.h |   2 -
 4 files changed, 137 insertions(+), 2 deletions(-)

diff --git a/tools/include/xen-tools/libs.h b/tools/include/xen-tools/libs.h
index a16e0c38070..b9e89f9a711 100644
--- a/tools/include/xen-tools/libs.h
+++ b/tools/include/xen-tools/libs.h
@@ -63,4 +63,9 @@
 #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
 #endif
 
+#ifndef _AC
+#define __AC(X,Y)   (X##Y)
+#define _AC(X,Y)    __AC(X,Y)
+#endif
+
 #endif	/* __XEN_TOOLS_LIBS__ */
diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 5f699c09509..c41d794683c 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -2622,6 +2622,10 @@ int xc_cpu_policy_update_msrs(xc_interface *xch, xc_cpu_policy_t policy,
 /* Compatibility calculations. */
 bool xc_cpu_policy_is_compatible(xc_interface *xch, const xc_cpu_policy_t host,
                                  const xc_cpu_policy_t guest);
+int xc_cpu_policy_calc_compatible(xc_interface *xch,
+                                  const xc_cpu_policy_t p1,
+                                  const xc_cpu_policy_t p2,
+                                  xc_cpu_policy_t out);
 
 int xc_get_cpu_levelling_caps(xc_interface *xch, uint32_t *caps);
 int xc_get_cpu_featureset(xc_interface *xch, uint32_t index,
diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c
index bd2f31dd87f..6cfa4cb39d1 100644
--- a/tools/libs/guest/xg_cpuid_x86.c
+++ b/tools/libs/guest/xg_cpuid_x86.c
@@ -32,6 +32,7 @@ enum {
 #include <xen/arch-x86/cpufeatureset.h>
 };
 
+#include <xen/asm/msr-index.h>
 #include <xen/asm/x86-vendors.h>
 
 #include <xen/lib/x86/cpu-policy.h>
@@ -944,3 +945,130 @@ bool xc_cpu_policy_is_compatible(xc_interface *xch, const xc_cpu_policy_t host,
 
     return false;
 }
+
+static uint64_t level_msr(unsigned int index, uint64_t val1, uint64_t val2)
+{
+    uint64_t val = val1 & val2;;
+
+    switch ( index )
+    {
+    case MSR_ARCH_CAPABILITIES:
+        /*
+         * Set RSBA if present on any of the input values to notice the guest
+         * might run on vulnerable hardware at some point.
+         */
+        val |= (val1 | val2) & ARCH_CAPS_RSBA;
+        break;
+    }
+
+    return val;
+}
+
+static bool level_leaf(xen_cpuid_leaf_t *l1, xen_cpuid_leaf_t *l2,
+                       xen_cpuid_leaf_t *out)
+{
+    *out = (xen_cpuid_leaf_t){ };
+
+    switch ( l1->leaf )
+    {
+    case 0x1:
+    case 0x80000001:
+        out->c = l1->c & l2->c;
+        out->d = l1->d & l2->d;
+        return true;
+
+    case 0xd:
+        if ( l1->subleaf != 1 )
+            break;
+        out->a = l1->a & l2->a;
+        return true;
+
+    case 0x7:
+        switch ( l1->subleaf )
+        {
+        case 0:
+            out->b = l1->b & l2->b;
+            out->c = l1->c & l2->c;
+            out->d = l1->d & l2->d;
+            return true;
+
+        case 1:
+            out->a = l1->a & l2->a;
+            return true;
+        }
+        break;
+
+    case 0x80000007:
+        out->d = l1->d & l2->d;
+        return true;
+
+    case 0x80000008:
+        out->b = l1->b & l2->b;
+        return true;
+    }
+
+    return false;
+}
+
+int xc_cpu_policy_calc_compatible(xc_interface *xch,
+                                  const xc_cpu_policy_t p1,
+                                  const xc_cpu_policy_t p2,
+                                  xc_cpu_policy_t out)
+{
+    unsigned int nr_leaves, nr_msrs, i, index;
+    unsigned int p1_nr_leaves, p2_nr_leaves;
+    unsigned int p1_nr_entries, p2_nr_entries;
+    int rc;
+
+    p1_nr_leaves = p2_nr_leaves = ARRAY_SIZE(p1->leaves);
+    p1_nr_entries = p2_nr_entries = ARRAY_SIZE(p1->entries);
+
+    rc = xc_cpu_policy_serialise(xch, p1, p1->leaves, &p1_nr_leaves,
+                                 p1->entries, &p1_nr_entries);
+    if ( rc )
+        return rc;
+    rc = xc_cpu_policy_serialise(xch, p2, p2->leaves, &p2_nr_leaves,
+                                 p2->entries, &p2_nr_entries);
+    if ( rc )
+        return rc;
+
+    index = 0;
+    for ( i = 0; i < p1_nr_leaves; i++ )
+    {
+        xen_cpuid_leaf_t *l1 = &p1->leaves[i];
+        xen_cpuid_leaf_t *l2 = find_leaf(p2->leaves, p2_nr_leaves,
+                                         l1->leaf, l1->subleaf);
+
+        if ( l2 && level_leaf(&out->leaves[index], l1, l2) )
+        {
+            out->leaves[index].leaf = l1->leaf;
+            out->leaves[index].subleaf = l1->subleaf;
+            index++;
+        }
+    }
+    nr_leaves = index;
+
+    index = 0;
+    for ( i = 0; i < p1_nr_entries; i++ )
+    {
+        xen_msr_entry_t *l1 = &p1->entries[i];
+        xen_msr_entry_t *l2 = find_entry(p2->entries, p2_nr_entries, l1->idx);
+
+        if ( !l2 )
+            continue;
+
+        out->entries[index].idx = l1->idx;
+        out->entries[index].val = level_msr(l1->idx, l1->val, l2->val);
+        index++;
+    }
+    nr_msrs = index;
+
+    rc = deserialize_policy(xch, out, nr_leaves, nr_msrs);
+    if ( rc )
+    {
+        errno = -rc;
+        rc = -1;
+    }
+
+    return rc;
+}
diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
index 44a2f3c8fe3..5709bcb93fa 100644
--- a/tools/libs/light/libxl_internal.h
+++ b/tools/libs/light/libxl_internal.h
@@ -126,8 +126,6 @@
 #define PVSHIM_CMDLINE "pv-shim console=xen,pv"
 
 /* Size macros. */
-#define __AC(X,Y)   (X##Y)
-#define _AC(X,Y)    __AC(X,Y)
 #define MB(_mb)     (_AC(_mb, ULL) << 20)
 #define GB(_gb)     (_AC(_gb, ULL) << 30)
 
-- 
2.30.1

Re: [PATCH v2 15/21] libs/guest: obtain a compatible cpu policy from two input ones

Posted by Jan Beulich 4 years, 10 months ago

On 13.04.2021 16:01, Roger Pau Monne wrote:
> @@ -944,3 +945,130 @@ bool xc_cpu_policy_is_compatible(xc_interface *xch, const xc_cpu_policy_t host,
>  
>      return false;
>  }
> +
> +static uint64_t level_msr(unsigned int index, uint64_t val1, uint64_t val2)
> +{
> +    uint64_t val = val1 & val2;;

For arbitrary MSRs this isn't going to do any good. If only very
specific MSRs are assumed to make it here, I think this wants
commenting on.

Also, nit: stray semicolon.

> +    switch ( index )
> +    {
> +    case MSR_ARCH_CAPABILITIES:
> +        /*
> +         * Set RSBA if present on any of the input values to notice the guest
> +         * might run on vulnerable hardware at some point.
> +         */
> +        val |= (val1 | val2) & ARCH_CAPS_RSBA;
> +        break;
> +    }
> +
> +    return val;
> +}
> +
> +static bool level_leaf(xen_cpuid_leaf_t *l1, xen_cpuid_leaf_t *l2,

const (twice)?

> +                       xen_cpuid_leaf_t *out)
> +{
> +    *out = (xen_cpuid_leaf_t){ };
> +
> +    switch ( l1->leaf )
> +    {
> +    case 0x1:
> +    case 0x80000001:
> +        out->c = l1->c & l2->c;
> +        out->d = l1->d & l2->d;
> +        return true;
> +
> +    case 0xd:
> +        if ( l1->subleaf != 1 )
> +            break;
> +        out->a = l1->a & l2->a;
> +        return true;

Could you explain your thinking behind this (a code comment would
likely help)? You effectively discard everything except subleaf 1
by returning false in that case, don't you?

> +    case 0x7:
> +        switch ( l1->subleaf )
> +        {
> +        case 0:
> +            out->b = l1->b & l2->b;
> +            out->c = l1->c & l2->c;
> +            out->d = l1->d & l2->d;
> +            return true;
> +
> +        case 1:
> +            out->a = l1->a & l2->a;
> +            return true;
> +        }
> +        break;

Can we perhaps assume all subleaves here are going to hold flags,
and hence and both sides together without regard to what subleaf
we're actually dealing with (subleaf 1 remaining special as to
EAX of course)? This would avoid having to remember to make yet
another mechanical change when enabling a new subleaf.

> +    case 0x80000007:
> +        out->d = l1->d & l2->d;
> +        return true;
> +
> +    case 0x80000008:
> +        out->b = l1->b & l2->b;
> +        return true;
> +    }
> +
> +    return false;
> +}

Considering your LFENCE-always-serializing patch, I assume
whichever ends up going in last will take care of adding handling
of that leaf here?

Jan

Re: [PATCH v2 15/21] libs/guest: obtain a compatible cpu policy from two input ones

Posted by Roger Pau Monné 4 years, 9 months ago

On Wed, Apr 14, 2021 at 03:49:02PM +0200, Jan Beulich wrote:
> On 13.04.2021 16:01, Roger Pau Monne wrote:
> > @@ -944,3 +945,130 @@ bool xc_cpu_policy_is_compatible(xc_interface *xch, const xc_cpu_policy_t host,
> >  
> >      return false;
> >  }
> > +
> > +static uint64_t level_msr(unsigned int index, uint64_t val1, uint64_t val2)
> > +{
> > +    uint64_t val = val1 & val2;;
> 
> For arbitrary MSRs this isn't going to do any good. If only very
> specific MSRs are assumed to make it here, I think this wants
> commenting on.

I've added: "MSRs passed to level_msr are expected to be bitmaps of
features"

> > +                       xen_cpuid_leaf_t *out)
> > +{
> > +    *out = (xen_cpuid_leaf_t){ };
> > +
> > +    switch ( l1->leaf )
> > +    {
> > +    case 0x1:
> > +    case 0x80000001:
> > +        out->c = l1->c & l2->c;
> > +        out->d = l1->d & l2->d;
> > +        return true;
> > +
> > +    case 0xd:
> > +        if ( l1->subleaf != 1 )
> > +            break;
> > +        out->a = l1->a & l2->a;
> > +        return true;
> 
> Could you explain your thinking behind this (a code comment would
> likely help)? You effectively discard everything except subleaf 1
> by returning false in that case, don't you?

Yes, the intent is to only level the features bitfield found in
subleaf 1.

I was planning for level_leaf so far in this series to deal with the
feature leaves part of the featureset only. I guess you would also
like to leverage other parts of the xstate leaf, like the max_size or
the supported bits in xss_{low,high}?

> > +    case 0x7:
> > +        switch ( l1->subleaf )
> > +        {
> > +        case 0:
> > +            out->b = l1->b & l2->b;
> > +            out->c = l1->c & l2->c;
> > +            out->d = l1->d & l2->d;
> > +            return true;
> > +
> > +        case 1:
> > +            out->a = l1->a & l2->a;
> > +            return true;
> > +        }
> > +        break;
> 
> Can we perhaps assume all subleaves here are going to hold flags,
> and hence and both sides together without regard to what subleaf
> we're actually dealing with (subleaf 1 remaining special as to

I think you meant subleaf 0 EAX (the max subleaf value)?

subleaf 1 EAX is just a normal bitfield AFAIK.

> EAX of course)? This would avoid having to remember to make yet
> another mechanical change when enabling a new subleaf.

Sure, seems like a fine approach since leaf 7 will only contain
feature bitmaps.

> > +    case 0x80000007:
> > +        out->d = l1->d & l2->d;
> > +        return true;
> > +
> > +    case 0x80000008:
> > +        out->b = l1->b & l2->b;
> > +        return true;
> > +    }
> > +
> > +    return false;
> > +}
> 
> Considering your LFENCE-always-serializing patch, I assume
> whichever ends up going in last will take care of adding handling
> of that leaf here?

Indeed.

Thanks, Roger.

Re: [PATCH v2 15/21] libs/guest: obtain a compatible cpu policy from two input ones

Posted by Jan Beulich 4 years, 9 months ago

On 22.04.2021 11:42, Roger Pau Monné wrote:
> On Wed, Apr 14, 2021 at 03:49:02PM +0200, Jan Beulich wrote:
>> On 13.04.2021 16:01, Roger Pau Monne wrote:
>>> @@ -944,3 +945,130 @@ bool xc_cpu_policy_is_compatible(xc_interface *xch, const xc_cpu_policy_t host,
>>>  
>>>      return false;
>>>  }
>>> +
>>> +static uint64_t level_msr(unsigned int index, uint64_t val1, uint64_t val2)
>>> +{
>>> +    uint64_t val = val1 & val2;;
>>
>> For arbitrary MSRs this isn't going to do any good. If only very
>> specific MSRs are assumed to make it here, I think this wants
>> commenting on.
> 
> I've added: "MSRs passed to level_msr are expected to be bitmaps of
> features"

How does such a comment help? I.e. how does the caller tell which MSRs
to pass here and which to deal with anyother way?

>>> +                       xen_cpuid_leaf_t *out)
>>> +{
>>> +    *out = (xen_cpuid_leaf_t){ };
>>> +
>>> +    switch ( l1->leaf )
>>> +    {
>>> +    case 0x1:
>>> +    case 0x80000001:
>>> +        out->c = l1->c & l2->c;
>>> +        out->d = l1->d & l2->d;
>>> +        return true;
>>> +
>>> +    case 0xd:
>>> +        if ( l1->subleaf != 1 )
>>> +            break;
>>> +        out->a = l1->a & l2->a;
>>> +        return true;
>>
>> Could you explain your thinking behind this (a code comment would
>> likely help)? You effectively discard everything except subleaf 1
>> by returning false in that case, don't you?
> 
> Yes, the intent is to only level the features bitfield found in
> subleaf 1.
> 
> I was planning for level_leaf so far in this series to deal with the
> feature leaves part of the featureset only. I guess you would also
> like to leverage other parts of the xstate leaf, like the max_size or
> the supported bits in xss_{low,high}?

The latter is clearly one of the things to consider, yes (alongside
the respective bits in sub-leaf 0 for XCR0). Sub-leaves > 1 may also
need dealing with ECX. Yet then again some or all of this may need
handling elsewhere, not the least because of the unusual handling of
leaf 0xd in the hypervisor. What gets checked and/or adjusted where
needs to be settled upon, and then the different parts of code would
imo better cross-reference each other.

>>> +    case 0x7:
>>> +        switch ( l1->subleaf )
>>> +        {
>>> +        case 0:
>>> +            out->b = l1->b & l2->b;
>>> +            out->c = l1->c & l2->c;
>>> +            out->d = l1->d & l2->d;
>>> +            return true;
>>> +
>>> +        case 1:
>>> +            out->a = l1->a & l2->a;
>>> +            return true;
>>> +        }
>>> +        break;
>>
>> Can we perhaps assume all subleaves here are going to hold flags,
>> and hence and both sides together without regard to what subleaf
>> we're actually dealing with (subleaf 1 remaining special as to
> 
> I think you meant subleaf 0 EAX (the max subleaf value)?
> 
> subleaf 1 EAX is just a normal bitfield AFAIK.

Indeed I did.

Jan

>> EAX of course)? This would avoid having to remember to make yet
>> another mechanical change when enabling a new subleaf.
> 
> Sure, seems like a fine approach since leaf 7 will only contain
> feature bitmaps.

Re: [PATCH v2 15/21] libs/guest: obtain a compatible cpu policy from two input ones

Posted by Wei Liu 4 years, 9 months ago

On Tue, Apr 13, 2021 at 04:01:33PM +0200, Roger Pau Monne wrote:
> Introduce a helper to obtain a compatible cpu policy based on two
> input cpu policies. Currently this is done by and'ing all CPUID leaves
> and MSR entries, except for MSR_ARCH_CAPABILITIES which has the RSBA
> bit or'ed.
> 

I thought canonical source for compatibility was to be put into the
hypervisor, thus all this functionality would be in the hypervisor. Am I
misremembering?

Wei.

Re: [PATCH v2 15/21] libs/guest: obtain a compatible cpu policy from two input ones

Posted by Roger Pau Monné 4 years, 9 months ago

On Wed, Apr 21, 2021 at 10:22:39AM +0000, Wei Liu wrote:
> On Tue, Apr 13, 2021 at 04:01:33PM +0200, Roger Pau Monne wrote:
> > Introduce a helper to obtain a compatible cpu policy based on two
> > input cpu policies. Currently this is done by and'ing all CPUID leaves
> > and MSR entries, except for MSR_ARCH_CAPABILITIES which has the RSBA
> > bit or'ed.
> > 
> 
> I thought canonical source for compatibility was to be put into the
> hypervisor, thus all this functionality would be in the hypervisor. Am I
> misremembering?

Andrew said something similar on v1, but I'm not able to figure how
this would be used by the hypervisor.

It's my understating that the toolstack will attempt to generate a CPU
policy and forward it to the hypervisor, which will either accept or
reject it based on the capabilities of the system. I'm not sure I see
why we would need to give the hypervisor two policies in order to
generate a resulting compatible one - it should all be done by the
toolstack AFAICT.

If there's a use case for this being in the hypervisor I'm happy to
add it there, but so far I haven't been able to come up with one
myself, and hence I don't see the need to make the code available.

Thanks, Roger.

Re: [PATCH v2 15/21] libs/guest: obtain a compatible cpu policy from two input ones

Posted by Wei Liu 4 years, 9 months ago

On Wed, Apr 21, 2021 at 01:26:49PM +0200, Roger Pau Monné wrote:
> On Wed, Apr 21, 2021 at 10:22:39AM +0000, Wei Liu wrote:
> > On Tue, Apr 13, 2021 at 04:01:33PM +0200, Roger Pau Monne wrote:
> > > Introduce a helper to obtain a compatible cpu policy based on two
> > > input cpu policies. Currently this is done by and'ing all CPUID leaves
> > > and MSR entries, except for MSR_ARCH_CAPABILITIES which has the RSBA
> > > bit or'ed.
> > > 
> > 
> > I thought canonical source for compatibility was to be put into the
> > hypervisor, thus all this functionality would be in the hypervisor. Am I
> > misremembering?
> 
> Andrew said something similar on v1, but I'm not able to figure how
> this would be used by the hypervisor.
> 
> It's my understating that the toolstack will attempt to generate a CPU
> policy and forward it to the hypervisor, which will either accept or
> reject it based on the capabilities of the system. I'm not sure I see
> why we would need to give the hypervisor two policies in order to
> generate a resulting compatible one - it should all be done by the
> toolstack AFAICT.
> 
> If there's a use case for this being in the hypervisor I'm happy to
> add it there, but so far I haven't been able to come up with one
> myself, and hence I don't see the need to make the code available.

I have no opinion here. Just wanted to get on the same page.

Thanks for the explanation.

Wei.

> 
> Thanks, Roger.