[PATCH] target/i386/hvf: fix handling of XSAVE-related CPUID bits

Paolo Bonzini posted 1 patch 3 weeks, 2 days ago
host/include/i386/host/cpuinfo.h |  1 +
target/i386/hvf/x86_cpuid.c      | 56 +++++++++++++++++++-------------
util/cpuinfo-i386.c              |  1 +
3 files changed, 35 insertions(+), 23 deletions(-)
[PATCH] target/i386/hvf: fix handling of XSAVE-related CPUID bits
Posted by Paolo Bonzini 3 weeks, 2 days ago
The call to xgetbv() is passing the ecx value for cpuid function 0xD,
index 0. The xgetbv call thus returns false (OSXSAVE is bit 27, which is
well out of the range of CPUID[0xD,0].ECX) and eax is not modified. While
fixing it, cache the whole computation of supported XCR0 bits since it
will be used for more than just CPUID leaf 0xD.

Furthermore, unsupported subleafs of CPUID 0xD (including all those
corresponding to zero bits in host's XCR0) must be hidden; if OSXSAVE
is not set at all, the whole of CPUID leaf 0xD plus the XSAVE bit must
be hidden.

Finally, unconditionally drop XSTATE_BNDREGS_MASK and XSTATE_BNDCSR_MASK;
real hardware will only show them if the MPX bit is set in CPUID;
this is never the case for hvf_get_supported_cpuid() because QEMU's
Hypervisor.framework support does not handle the VMX fields related to
MPX (even in the unlikely possibility that the host has MPX enabled).
So hide those bits in the new cache_host_xcr0().

Supersedes: <20241029130401.525297-1-pbonzini@redhat.com>
Cc: Phil Dennis-Jordan <lists@philjordan.eu>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 host/include/i386/host/cpuinfo.h |  1 +
 target/i386/hvf/x86_cpuid.c      | 56 +++++++++++++++++++-------------
 util/cpuinfo-i386.c              |  1 +
 3 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h
index 81771733eaa..9541a64da61 100644
--- a/host/include/i386/host/cpuinfo.h
+++ b/host/include/i386/host/cpuinfo.h
@@ -9,6 +9,7 @@
 /* Digested version of <cpuid.h> */
 
 #define CPUINFO_ALWAYS          (1u << 0)  /* so cpuinfo is nonzero */
+#define CPUINFO_OSXSAVE         (1u << 1)
 #define CPUINFO_MOVBE           (1u << 2)
 #define CPUINFO_LZCNT           (1u << 3)
 #define CPUINFO_POPCNT          (1u << 4)
diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c
index e56cd8411ba..3a116548a3d 100644
--- a/target/i386/hvf/x86_cpuid.c
+++ b/target/i386/hvf/x86_cpuid.c
@@ -21,28 +21,38 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cpuid.h"
+#include "host/cpuinfo.h"
 #include "cpu.h"
 #include "x86.h"
 #include "vmx.h"
 #include "sysemu/hvf.h"
 #include "hvf-i386.h"
 
-static bool xgetbv(uint32_t cpuid_ecx, uint32_t idx, uint64_t *xcr)
+static bool cached_xcr0;
+static uint64_t supported_xcr0;
+
+static void cache_host_xcr0()
 {
-    uint32_t xcrl, xcrh;
-
-    if (cpuid_ecx & CPUID_EXT_OSXSAVE) {
-        /*
-         * The xgetbv instruction is not available to older versions of
-         * the assembler, so we encode the instruction manually.
-         */
-        asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (idx));
-
-        *xcr = (((uint64_t)xcrh) << 32) | xcrl;
-        return true;
+    if (cached_xcr0) {
+        return;
     }
 
-    return false;
+    if (cpuinfo & CPUINFO_OSXSAVE) {
+        uint64_t host_xcr0 = xgetbv_low(0);
+
+        /* Only show xcr0 bits corresponding to usable features.  */
+        supported_xcr0 = host_xcr0 & (XSTATE_FP_MASK |
+                                      XSTATE_SSE_MASK | XSTATE_YMM_MASK |
+                                      XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK |
+                                      XSTATE_Hi16_ZMM_MASK);
+        if ((supported_xcr0 & (XSTATE_FP_MASK | XSTATE_SSE_MASK)) !=
+            (XSTATE_FP_MASK | XSTATE_SSE_MASK)) {
+            supported_xcr0 = 0;
+        }
+    }
+
+    cached_xcr0 = true;
 }
 
 uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
@@ -51,6 +61,7 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
     uint64_t cap;
     uint32_t eax, ebx, ecx, edx;
 
+    cache_host_xcr0();
     host_cpuid(func, idx, &eax, &ebx, &ecx, &edx);
 
     switch (func) {
@@ -66,7 +77,8 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
         ecx &= CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 |
              CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID |
              CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_MOVBE |
-             CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE |
+             CPUID_EXT_POPCNT | CPUID_EXT_AES |
+             (supported_xcr0 ? CPUID_EXT_XSAVE : 0) |
              CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND;
         ecx |= CPUID_EXT_HYPERVISOR;
         break;
@@ -107,16 +119,14 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
         eax = 0;
         break;
     case 0xD:
+        if (!supported_xcr0 ||
+            (idx > 1 && !(supported_xcr0 & (1 << idx)))) {
+            eax = ebx = ecx = edx = 0;
+            break;
+        }
+
         if (idx == 0) {
-            uint64_t host_xcr0;
-            if (xgetbv(ecx, 0, &host_xcr0)) {
-                uint64_t supp_xcr0 = host_xcr0 & (XSTATE_FP_MASK |
-                                  XSTATE_SSE_MASK | XSTATE_YMM_MASK |
-                                  XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK |
-                                  XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK |
-                                  XSTATE_Hi16_ZMM_MASK);
-                eax &= supp_xcr0;
-            }
+            eax = supported_xcr0;
         } else if (idx == 1) {
             hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, &cap);
             eax &= CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1;
diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
index 90f92a42dc8..c8c8a1b3705 100644
--- a/util/cpuinfo-i386.c
+++ b/util/cpuinfo-i386.c
@@ -35,6 +35,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
         __cpuid(1, a, b, c, d);
 
         info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0);
+        info |= (c & bit_OSXSAVE ? CPUINFO_OSXSAVE : 0);
         info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
         info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
         info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0);
-- 
2.47.0
Re: [PATCH] target/i386/hvf: fix handling of XSAVE-related CPUID bits
Posted by Phil Dennis-Jordan 3 weeks, 1 day ago
Paolo,

I've just manually tested this, also including some debug output, and
that looks good to me. I've got some concerns about edge cases though,
see comments inline.

On Thu, 31 Oct 2024 at 11:57, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> The call to xgetbv() is passing the ecx value for cpuid function 0xD,
> index 0. The xgetbv call thus returns false (OSXSAVE is bit 27, which is
> well out of the range of CPUID[0xD,0].ECX) and eax is not modified. While
> fixing it, cache the whole computation of supported XCR0 bits since it
> will be used for more than just CPUID leaf 0xD.
>
> Furthermore, unsupported subleafs of CPUID 0xD (including all those
> corresponding to zero bits in host's XCR0) must be hidden; if OSXSAVE
> is not set at all, the whole of CPUID leaf 0xD plus the XSAVE bit must
> be hidden.
>
> Finally, unconditionally drop XSTATE_BNDREGS_MASK and XSTATE_BNDCSR_MASK;
> real hardware will only show them if the MPX bit is set in CPUID;
> this is never the case for hvf_get_supported_cpuid() because QEMU's
> Hypervisor.framework support does not handle the VMX fields related to
> MPX (even in the unlikely possibility that the host has MPX enabled).
> So hide those bits in the new cache_host_xcr0().

The commit message is much more helpful now, thank you.

> Supersedes: <20241029130401.525297-1-pbonzini@redhat.com>
> Cc: Phil Dennis-Jordan <lists@philjordan.eu>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  host/include/i386/host/cpuinfo.h |  1 +
>  target/i386/hvf/x86_cpuid.c      | 56 +++++++++++++++++++-------------
>  util/cpuinfo-i386.c              |  1 +
>  3 files changed, 35 insertions(+), 23 deletions(-)
>
> diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h
> index 81771733eaa..9541a64da61 100644
> --- a/host/include/i386/host/cpuinfo.h
> +++ b/host/include/i386/host/cpuinfo.h
> @@ -9,6 +9,7 @@
>  /* Digested version of <cpuid.h> */
>
>  #define CPUINFO_ALWAYS          (1u << 0)  /* so cpuinfo is nonzero */
> +#define CPUINFO_OSXSAVE         (1u << 1)
>  #define CPUINFO_MOVBE           (1u << 2)
>  #define CPUINFO_LZCNT           (1u << 3)
>  #define CPUINFO_POPCNT          (1u << 4)
> diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c
> index e56cd8411ba..3a116548a3d 100644
> --- a/target/i386/hvf/x86_cpuid.c
> +++ b/target/i386/hvf/x86_cpuid.c
> @@ -21,28 +21,38 @@
>   */
>
>  #include "qemu/osdep.h"
> +#include "qemu/cpuid.h"
> +#include "host/cpuinfo.h"
>  #include "cpu.h"
>  #include "x86.h"
>  #include "vmx.h"
>  #include "sysemu/hvf.h"
>  #include "hvf-i386.h"
>
> -static bool xgetbv(uint32_t cpuid_ecx, uint32_t idx, uint64_t *xcr)
> +static bool cached_xcr0;
> +static uint64_t supported_xcr0;
> +
> +static void cache_host_xcr0()

Nit: Should be (void) parameter list, not empty.

>  {
> -    uint32_t xcrl, xcrh;
> -
> -    if (cpuid_ecx & CPUID_EXT_OSXSAVE) {
> -        /*
> -         * The xgetbv instruction is not available to older versions of
> -         * the assembler, so we encode the instruction manually.
> -         */
> -        asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (idx));
> -
> -        *xcr = (((uint64_t)xcrh) << 32) | xcrl;
> -        return true;
> +    if (cached_xcr0) {
> +        return;
>      }
>
> -    return false;
> +    if (cpuinfo & CPUINFO_OSXSAVE) {
> +        uint64_t host_xcr0 = xgetbv_low(0);
> +
> +        /* Only show xcr0 bits corresponding to usable features.  */
> +        supported_xcr0 = host_xcr0 & (XSTATE_FP_MASK |
> +                                      XSTATE_SSE_MASK | XSTATE_YMM_MASK |
> +                                      XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK |
> +                                      XSTATE_Hi16_ZMM_MASK);
> +        if ((supported_xcr0 & (XSTATE_FP_MASK | XSTATE_SSE_MASK)) !=
> +            (XSTATE_FP_MASK | XSTATE_SSE_MASK)) {
> +            supported_xcr0 = 0;
> +        }
> +    }
> +
> +    cached_xcr0 = true;
>  }
>
>  uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
> @@ -51,6 +61,7 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
>      uint64_t cap;
>      uint32_t eax, ebx, ecx, edx;
>
> +    cache_host_xcr0();
>      host_cpuid(func, idx, &eax, &ebx, &ecx, &edx);
>
>      switch (func) {
> @@ -66,7 +77,8 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
>          ecx &= CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 |
>               CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID |
>               CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_MOVBE |
> -             CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE |
> +             CPUID_EXT_POPCNT | CPUID_EXT_AES |
> +             (supported_xcr0 ? CPUID_EXT_XSAVE : 0) |
>               CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND;
>          ecx |= CPUID_EXT_HYPERVISOR;
>          break;
> @@ -107,16 +119,14 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
>          eax = 0;
>          break;
>      case 0xD:
> +        if (!supported_xcr0 ||
> +            (idx > 1 && !(supported_xcr0 & (1 << idx)))) {

If I'm not mistaken, we have undefined behaviour here for idx > 30?
(Literal 1 is int, left shift by 31 or more is undefined)

Based on a quick reading of Intel's SDM section 13.2, only idx <63 are
specified. So perhaps the condition should be:

if (!supported_xcr0 || idx >= 63 || (idx > 1 && !(supported_xcr0 &
(UINT64_C(1) << idx)))) {

> +            eax = ebx = ecx = edx = 0;
> +            break;
> +        }
> +
>          if (idx == 0) {
> -            uint64_t host_xcr0;
> -            if (xgetbv(ecx, 0, &host_xcr0)) {
> -                uint64_t supp_xcr0 = host_xcr0 & (XSTATE_FP_MASK |
> -                                  XSTATE_SSE_MASK | XSTATE_YMM_MASK |
> -                                  XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK |
> -                                  XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK |
> -                                  XSTATE_Hi16_ZMM_MASK);
> -                eax &= supp_xcr0;
> -            }
> +            eax = supported_xcr0;
>          } else if (idx == 1) {
>              hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, &cap);
>              eax &= CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1;

Not actually part of your patch, but I've just been reading around
this code to make sense of the patch, and it seems like the existing
idx == 1 case is also buggy. At the end of the above snippet (eax &=
…), eax can at most be 0x3. The two subsequent lines,

            if (!(cap & CPU_BASED2_XSAVES_XRSTORS)) {
                eax &= ~CPUID_XSAVE_XSAVES;

Are therefore effectively a no-op, I believe? Should the line,

eax &= CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1;

therefore perhaps be,

eax &= CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES;

?

(I have to admit I'm no expert on the XSAVE mechanism, so I'm mostly
going by how this code would make the most sense, not by Intel's
spec.)

> diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
> index 90f92a42dc8..c8c8a1b3705 100644
> --- a/util/cpuinfo-i386.c
> +++ b/util/cpuinfo-i386.c
> @@ -35,6 +35,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
>          __cpuid(1, a, b, c, d);
>
>          info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0);
> +        info |= (c & bit_OSXSAVE ? CPUINFO_OSXSAVE : 0);
>          info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
>          info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
>          info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0);
> --
> 2.47.0
>

All the best,
phil