[PATCH v3 05/22] x86/xstate: drop xstate_offsets[] and xstate_sizes[]

Jan Beulich posted 22 patches 4 years, 9 months ago
[PATCH v3 05/22] x86/xstate: drop xstate_offsets[] and xstate_sizes[]
Posted by Jan Beulich 4 years, 9 months ago
They're redundant with respective fields from the raw CPUID policy; no
need to keep two copies of the same data. This also breaks
recalculate_xstate()'s dependency on xstate_init(), allowing host CPUID
policy calculation to be moved together with that of the raw one (which
a subsequent change will require anyway).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -498,6 +498,8 @@ void identify_cpu(struct cpuinfo_x86 *c)
 	}
 
 	/* Now the feature flags better reflect actual CPU features! */
+	if (c == &boot_cpu_data)
+		init_host_cpuid();
 
 	xstate_init(c);
 
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -170,32 +170,32 @@ static void recalculate_xstate(struct cp
     {
         xstates |= X86_XCR0_YMM;
         xstate_size = max(xstate_size,
-                          xstate_offsets[X86_XCR0_YMM_POS] +
-                          xstate_sizes[X86_XCR0_YMM_POS]);
+                          xstate_offset(X86_XCR0_YMM_POS) +
+                          xstate_size(X86_XCR0_YMM_POS));
     }
 
     if ( p->feat.mpx )
     {
         xstates |= X86_XCR0_BNDREGS | X86_XCR0_BNDCSR;
         xstate_size = max(xstate_size,
-                          xstate_offsets[X86_XCR0_BNDCSR_POS] +
-                          xstate_sizes[X86_XCR0_BNDCSR_POS]);
+                          xstate_offset(X86_XCR0_BNDCSR_POS) +
+                          xstate_size(X86_XCR0_BNDCSR_POS));
     }
 
     if ( p->feat.avx512f )
     {
         xstates |= X86_XCR0_OPMASK | X86_XCR0_ZMM | X86_XCR0_HI_ZMM;
         xstate_size = max(xstate_size,
-                          xstate_offsets[X86_XCR0_HI_ZMM_POS] +
-                          xstate_sizes[X86_XCR0_HI_ZMM_POS]);
+                          xstate_offset(X86_XCR0_HI_ZMM_POS) +
+                          xstate_size(X86_XCR0_HI_ZMM_POS));
     }
 
     if ( p->feat.pku )
     {
         xstates |= X86_XCR0_PKRU;
         xstate_size = max(xstate_size,
-                          xstate_offsets[X86_XCR0_PKRU_POS] +
-                          xstate_sizes[X86_XCR0_PKRU_POS]);
+                          xstate_offset(X86_XCR0_PKRU_POS) +
+                          xstate_size(X86_XCR0_PKRU_POS));
     }
 
     p->xstate.max_size  =  xstate_size;
@@ -218,8 +218,8 @@ static void recalculate_xstate(struct cp
         if ( !(xstates & curr_xstate) )
             continue;
 
-        p->xstate.comp[i].size   = xstate_sizes[i];
-        p->xstate.comp[i].offset = xstate_offsets[i];
+        p->xstate.comp[i].size   = xstate_size(i);
+        p->xstate.comp[i].offset = xstate_offset(i);
         p->xstate.comp[i].xss    = curr_xstate & XSTATE_XSAVES_ONLY;
         p->xstate.comp[i].align  = curr_xstate & xstate_align;
     }
@@ -531,10 +531,16 @@ static void __init calculate_hvm_def_pol
     x86_cpuid_policy_shrink_max_leaves(p);
 }
 
-void __init init_guest_cpuid(void)
+void __init init_host_cpuid(void)
 {
     calculate_raw_policy();
     calculate_host_policy();
+}
+
+void __init init_guest_cpuid(void)
+{
+    /* Do this a 2nd time to account for setup_{clear,force}_cpu_cap() uses. */
+    calculate_host_policy();
 
     if ( IS_ENABLED(CONFIG_PV) )
     {
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -9,6 +9,7 @@
 #include <xen/percpu.h>
 #include <xen/sched.h>
 #include <xen/xvmalloc.h>
+#include <asm/cpuid.h>
 #include <asm/current.h>
 #include <asm/processor.h>
 #include <asm/hvm/support.h>
@@ -26,8 +27,6 @@ static u32 __read_mostly xsave_cntxt_siz
 /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
 u64 __read_mostly xfeature_mask;
 
-unsigned int *__read_mostly xstate_offsets;
-unsigned int *__read_mostly xstate_sizes;
 u64 __read_mostly xstate_align;
 static unsigned int __read_mostly xstate_features;
 
@@ -93,34 +92,19 @@ static int setup_xstate_features(bool bs
     unsigned int leaf, eax, ebx, ecx, edx;
 
     if ( bsp )
-    {
         xstate_features = flsl(xfeature_mask);
-        xstate_offsets = xzalloc_array(unsigned int, xstate_features);
-        if ( !xstate_offsets )
-            return -ENOMEM;
-
-        xstate_sizes = xzalloc_array(unsigned int, xstate_features);
-        if ( !xstate_sizes )
-            return -ENOMEM;
-    }
 
     for ( leaf = 2; leaf < xstate_features; leaf++ )
     {
-        if ( bsp )
-        {
-            cpuid_count(XSTATE_CPUID, leaf, &xstate_sizes[leaf],
-                        &xstate_offsets[leaf], &ecx, &edx);
-            if ( ecx & XSTATE_ALIGN64 )
-                __set_bit(leaf, &xstate_align);
-        }
+        cpuid_count(XSTATE_CPUID, leaf, &eax,
+                    &ebx, &ecx, &edx);
+        BUG_ON(eax != xstate_size(leaf));
+        BUG_ON(ebx != xstate_offset(leaf));
+
+        if ( bsp && (ecx & XSTATE_ALIGN64) )
+            __set_bit(leaf, &xstate_align);
         else
-        {
-            cpuid_count(XSTATE_CPUID, leaf, &eax,
-                        &ebx, &ecx, &edx);
-            BUG_ON(eax != xstate_sizes[leaf]);
-            BUG_ON(ebx != xstate_offsets[leaf]);
             BUG_ON(!(ecx & XSTATE_ALIGN64) != !test_bit(leaf, &xstate_align));
-        }
     }
 
     return 0;
@@ -150,7 +134,7 @@ static void setup_xstate_comp(uint16_t *
             if ( test_bit(i, &xstate_align) )
                 offset = ROUNDUP(offset, 64);
             comp_offsets[i] = offset;
-            offset += xstate_sizes[i];
+            offset += xstate_size(i);
         }
     }
     ASSERT(offset <= xsave_cntxt_size);
@@ -213,10 +197,10 @@ void expand_xsave_states(struct vcpu *v,
          * comp_offsets[] information, something is very broken.
          */
         BUG_ON(!comp_offsets[index]);
-        BUG_ON((xstate_offsets[index] + xstate_sizes[index]) > size);
+        BUG_ON((xstate_offset(index) + xstate_size(index)) > size);
 
-        memcpy(dest + xstate_offsets[index], src + comp_offsets[index],
-               xstate_sizes[index]);
+        memcpy(dest + xstate_offset(index), src + comp_offsets[index],
+               xstate_size(index));
 
         valid &= ~feature;
     }
@@ -279,10 +263,10 @@ void compress_xsave_states(struct vcpu *
          * comp_offset[] information, something is very broken.
          */
         BUG_ON(!comp_offsets[index]);
-        BUG_ON((xstate_offsets[index] + xstate_sizes[index]) > size);
+        BUG_ON((xstate_offset(index) + xstate_size(index)) > size);
 
-        memcpy(dest + comp_offsets[index], src + xstate_offsets[index],
-               xstate_sizes[index]);
+        memcpy(dest + comp_offsets[index], src + xstate_offset(index),
+               xstate_size(index));
 
         valid &= ~feature;
     }
@@ -516,8 +500,8 @@ int xstate_alloc_save_area(struct vcpu *
         unsigned int i;
 
         for ( size = 0, i = 2; i < xstate_features; ++i )
-            if ( size < xstate_sizes[i] )
-                size = xstate_sizes[i];
+            if ( size < xstate_size(i) )
+                size = xstate_size(i);
         size += XSTATE_AREA_MIN_SIZE;
     }
 
@@ -559,9 +543,9 @@ int xstate_update_save_area(struct vcpu
     for ( size = old = XSTATE_AREA_MIN_SIZE, i = 2; i < xstate_features; ++i )
     {
         if ( xcr0_max & (1ul << i) )
-            size = max(size, xstate_offsets[i] + xstate_sizes[i]);
+            size = max(size, xstate_offset(i) + xstate_size(i));
         if ( v->arch.xcr0_accum & (1ul << i) )
-            old = max(old, xstate_offsets[i] + xstate_sizes[i]);
+            old = max(old, xstate_offset(i) + xstate_size(i));
     }
 
     save_area = _xvrealloc(v->arch.xsave_area, size, __alignof(*save_area));
@@ -819,7 +803,7 @@ uint64_t read_bndcfgu(void)
               : "=m" (*xstate)
               : "a" (X86_XCR0_BNDCSR), "d" (0), "D" (xstate) );
 
-        bndcsr = (void *)xstate + xstate_offsets[X86_XCR0_BNDCSR_POS];
+        bndcsr = (void *)xstate + xstate_offset(X86_XCR0_BNDCSR_POS);
     }
 
     if ( cr0 & X86_CR0_TS )
--- a/xen/include/asm-x86/cpuid.h
+++ b/xen/include/asm-x86/cpuid.h
@@ -16,6 +16,7 @@
 extern const uint32_t known_features[FSCAPINTS];
 extern const uint32_t special_features[FSCAPINTS];
 
+void init_host_cpuid(void);
 void init_guest_cpuid(void);
 
 /*
--- a/xen/include/asm-x86/xstate.h
+++ b/xen/include/asm-x86/xstate.h
@@ -44,8 +44,9 @@ extern uint32_t mxcsr_mask;
 
 extern u64 xfeature_mask;
 extern u64 xstate_align;
-extern unsigned int *xstate_offsets;
-extern unsigned int *xstate_sizes;
+
+#define xstate_offset(n) (raw_cpuid_policy.xstate.comp[n].offset)
+#define xstate_size(n)   (raw_cpuid_policy.xstate.comp[n].size)
 
 /* extended state save area */
 struct __attribute__((aligned (64))) xsave_struct


Re: [PATCH v3 05/22] x86/xstate: drop xstate_offsets[] and xstate_sizes[]
Posted by Andrew Cooper 4 years, 9 months ago
On 22/04/2021 15:45, Jan Beulich wrote:
> They're redundant with respective fields from the raw CPUID policy; no
> need to keep two copies of the same data.

So before I read this patch of yours, I had a separate cleanup patch
turning the two arrays into static const.

> This also breaks
> recalculate_xstate()'s dependency on xstate_init(),

It doesn't, because you've retained the reference to xstate_align, which
is calculated in xstate_init().  I've posted "[PATCH 4/5] x86/cpuid:
Simplify recalculate_xstate()" which goes rather further.

xstate_align, and xstate_xfd as you've got later in the series, don't
need to be variables.  They're constants, just like the offset/size
information, because they're all a description of the XSAVE ISA
instruction behaviour.

We never turn on states we don't understand, which means we don't
actually need to refer to any component subleaf, other than to cross-check.

I'm still on the fence as to whether it is better to compile in the
constants, or to just use the raw policy.  Absolutely nothing good will
come of the constants changing, and one of my backup plans for dealing
with the size of cpuid_policy if it becomes a problem was to not store
these leaves, and generate them dynamically on request.


> allowing host CPUID
> policy calculation to be moved together with that of the raw one (which
> a subsequent change will require anyway).

While breaking up the host/raw calculations from the rest, we really
need to group the MSR policy calculations with their CPUID counterparts.

~Andrew


Re: [PATCH v3 05/22] x86/xstate: drop xstate_offsets[] and xstate_sizes[]
Posted by Jan Beulich 4 years, 9 months ago
On 03.05.2021 18:10, Andrew Cooper wrote:
> On 22/04/2021 15:45, Jan Beulich wrote:
>> They're redundant with respective fields from the raw CPUID policy; no
>> need to keep two copies of the same data.
> 
> So before I read this patch of yours, I had a separate cleanup patch
> turning the two arrays into static const.
> 
>> This also breaks
>> recalculate_xstate()'s dependency on xstate_init(),
> 
> It doesn't, because you've retained the reference to xstate_align, which
> is calculated in xstate_init().

Good point - s/breaks/eliminates some of/.

>  I've posted "[PATCH 4/5] x86/cpuid:
> Simplify recalculate_xstate()" which goes rather further.

I'll see to take a look soonish.

> xstate_align, and xstate_xfd as you've got later in the series, don't
> need to be variables.  They're constants, just like the offset/size
> information, because they're all a description of the XSAVE ISA
> instruction behaviour.

Hmm, I think there are multiple views possible - for xfd_mask even more
than for xstate_align: XFD is, according to my understanding of the
spec, not a prereq feature to AMX. IOW AMX would function fine without
XFD, just that lazy state saving space allocation then wouldn't be
possible. And I also can't, in principle, see any reason why largish
components like the AVX512 ones couldn't become XFD-sensitive (in
hardware, we of course can't mimic this in software).

(I could take as proof sde reporting AMX but not XFD with -spr, but I
rather suspect this to be an oversight in their CPUID data. I've posted
a respective question in their forum.)

If there really was a strict static relationship, I'm having trouble
seeing why the information would need expressing in CPUID at all. It
would at least feel like over-engineering then.

> We never turn on states we don't understand, which means we don't
> actually need to refer to any component subleaf, other than to cross-check.
> 
> I'm still on the fence as to whether it is better to compile in the
> constants, or to just use the raw policy.  Absolutely nothing good will
> come of the constants changing, and one of my backup plans for dealing
> with the size of cpuid_policy if it becomes a problem was to not store
> these leaves, and generate them dynamically on request.

Actually it is my understanding that the reason the offsets are
expressed via CPUID is that originally it was meant for them to be
able to vary between implementations (see in particular the placement
of the LWP component, which has resulted in a curious 128-byte gap
ahead of the MPX components). Until it was realized what implications
this would have on migration.

>> allowing host CPUID
>> policy calculation to be moved together with that of the raw one (which
>> a subsequent change will require anyway).
> 
> While breaking up the host/raw calculations from the rest, we really
> need to group the MSR policy calculations with their CPUID counterparts.

But that's orthogonal to the change here, i.e. if at all for this
series subject of a separate patch. Plus I have to admit I'm not
sure I see what your plan here would be - cpuid.c and msr.c so far
don't cross reference one another. And I thought this separation
was intentional.

Jan