[Xen-devel] [PATCH v3 0/8] x86emul: further work

Jan Beulich posted 8 patches 4 years, 6 months ago
Only 0 patches received!
There is a newer version of this series
[Xen-devel] [PATCH v3 0/8] x86emul: further work
Posted by Jan Beulich 4 years, 6 months ago
1: x86emul: support WBNOINVD
2: x86/HVM: ignore guest INVD uses
3: x86emul: generalize invlpg() hook
4: x86emul: support INVPCID
5: x86emul: support MOVDIR{I,64B} insns
6: x86/HVM: scale MPERF values reported to guests (on AMD)
7: x86emul: support RDPRU
8: x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v3 1/8] x86emul: support WBNOINVD
Posted by Jan Beulich 4 years, 6 months ago
Rev 037 of Intel's ISA extensions document does not state intercept
behavior for the insn (I've been unofficially told that the distinction
is going to be by exit qualification, as I would have assumed
considering that this way it's sufficiently transparent to unaware
software, as using WBINVD in place of WBNOINVD is always correct, just
less efficient). Similarly AMD's PM volume 2 version 3.31 only states
that both use the same VMEXIT, but not how to distinugish them (other
than by decoding the insn). Therefore in the HVM case for now it'll be
backed by the same ->wbinvd_intercept() handlers.

Use this occasion and also add the two missing table entries for
CLDEMOTE, which doesn't require any further changes to make work.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
---
v3: Drop A attribute from public header addition. Comment out
    cpu_has_wbnoinvd part of conditional. Extend description.
v2: Re-base. Convert wbnoinvd() inline function.

--- a/tools/libxl/libxl_cpuid.c
+++ b/tools/libxl/libxl_cpuid.c
@@ -208,6 +208,7 @@ int libxl_cpuid_parse_config(libxl_cpuid
         {"avx512-bitalg",0x00000007,  0, CPUID_REG_ECX, 12,  1},
         {"avx512-vpopcntdq",0x00000007,0,CPUID_REG_ECX, 14,  1},
         {"rdpid",        0x00000007,  0, CPUID_REG_ECX, 22,  1},
+        {"cldemote",     0x00000007,  0, CPUID_REG_ECX, 25,  1},
 
         {"avx512-4vnniw",0x00000007,  0, CPUID_REG_EDX,  2,  1},
         {"avx512-4fmaps",0x00000007,  0, CPUID_REG_EDX,  3,  1},
@@ -256,6 +257,7 @@ int libxl_cpuid_parse_config(libxl_cpuid
 
         {"invtsc",       0x80000007, NA, CPUID_REG_EDX,  8,  1},
 
+        {"wbnoinvd",     0x80000008, NA, CPUID_REG_EBX,  9,  1},
         {"ibpb",         0x80000008, NA, CPUID_REG_EBX, 12,  1},
         {"nc",           0x80000008, NA, CPUID_REG_ECX,  0,  8},
         {"apicidsize",   0x80000008, NA, CPUID_REG_ECX, 12,  4},
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -146,6 +146,8 @@ static const char *const str_e8b[32] =
 {
     [ 0] = "clzero",
 
+    /* [ 8] */            [ 9] = "wbnoinvd",
+
     [12] = "ibpb",
 };
 
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -2212,6 +2212,7 @@ static int hvmemul_cache_op(
         /* fall through */
     case x86emul_invd:
     case x86emul_wbinvd:
+    case x86emul_wbnoinvd:
         alternative_vcall(hvm_funcs.wbinvd_intercept);
         break;
     }
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -1120,7 +1120,7 @@ static int write_msr(unsigned int reg, u
 static int cache_op(enum x86emul_cache_op op, enum x86_segment seg,
                     unsigned long offset, struct x86_emulate_ctxt *ctxt)
 {
-    ASSERT(op == x86emul_wbinvd);
+    ASSERT(op == x86emul_wbinvd || op == x86emul_wbnoinvd);
 
     /* Ignore the instruction if unprivileged. */
     if ( !cache_flush_permitted(current->domain) )
@@ -1129,6 +1129,8 @@ static int cache_op(enum x86emul_cache_o
          * newer linux uses this in some start-of-day timing loops.
          */
         ;
+    else if ( op == x86emul_wbnoinvd /* && cpu_has_wbnoinvd */ )
+        wbnoinvd();
     else
         wbinvd();
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1869,6 +1869,7 @@ in_protmode(
 #define vcpu_has_fma4()        (ctxt->cpuid->extd.fma4)
 #define vcpu_has_tbm()         (ctxt->cpuid->extd.tbm)
 #define vcpu_has_clzero()      (ctxt->cpuid->extd.clzero)
+#define vcpu_has_wbnoinvd()    (ctxt->cpuid->extd.wbnoinvd)
 
 #define vcpu_has_bmi1()        (ctxt->cpuid->feat.bmi1)
 #define vcpu_has_hle()         (ctxt->cpuid->feat.hle)
@@ -5931,10 +5932,13 @@ x86_emulate(
         break;
 
     case X86EMUL_OPC(0x0f, 0x08): /* invd */
-    case X86EMUL_OPC(0x0f, 0x09): /* wbinvd */
+    case X86EMUL_OPC(0x0f, 0x09): /* wbinvd / wbnoinvd */
         generate_exception_if(!mode_ring0(), EXC_GP, 0);
         fail_if(!ops->cache_op);
-        if ( (rc = ops->cache_op(b == 0x09 ? x86emul_wbinvd
+        if ( (rc = ops->cache_op(b == 0x09 ? !repe_prefix() ||
+                                             !vcpu_has_wbnoinvd()
+                                             ? x86emul_wbinvd
+                                             : x86emul_wbnoinvd
                                            : x86emul_invd,
                                  x86_seg_none, 0,
                                  ctxt)) != X86EMUL_OKAY )
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -182,6 +182,7 @@ enum x86emul_cache_op {
     x86emul_clwb,
     x86emul_invd,
     x86emul_wbinvd,
+    x86emul_wbnoinvd,
 };
 
 struct x86_emulate_state;
--- a/xen/include/asm-x86/system.h
+++ b/xen/include/asm-x86/system.h
@@ -16,6 +16,11 @@ static inline void wbinvd(void)
     asm volatile ( "wbinvd" ::: "memory" );
 }
 
+static inline void wbnoinvd(void)
+{
+    asm volatile ( "repe; wbinvd" : : : "memory" );
+}
+
 static inline void clflush(const void *p)
 {
     asm volatile ( "clflush %0" :: "m" (*(const char *)p) );
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -236,6 +236,7 @@ XEN_CPUFEATURE(AVX512_VNNI,   6*32+11) /
 XEN_CPUFEATURE(AVX512_BITALG, 6*32+12) /*A  Support for VPOPCNT[B,W] and VPSHUFBITQMB */
 XEN_CPUFEATURE(AVX512_VPOPCNTDQ, 6*32+14) /*A  POPCNT for vectors of DW/QW */
 XEN_CPUFEATURE(RDPID,         6*32+22) /*A  RDPID instruction */
+XEN_CPUFEATURE(CLDEMOTE,      6*32+25) /*A  CLDEMOTE instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
 XEN_CPUFEATURE(ITSC,          7*32+ 8) /*   Invariant TSC */
@@ -243,6 +244,7 @@ XEN_CPUFEATURE(EFRO,          7*32+10) /
 
 /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */
 XEN_CPUFEATURE(CLZERO,        8*32+ 0) /*A  CLZERO instruction */
+XEN_CPUFEATURE(WBNOINVD,      8*32+ 9) /*   WBNOINVD instruction */
 XEN_CPUFEATURE(IBPB,          8*32+12) /*A  IBPB support only (no IBRS, used by AMD) */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 1/8] x86emul: support WBNOINVD
Posted by Andrew Cooper 4 years, 6 months ago
On 03/09/2019 10:37, Jan Beulich wrote:
> Rev 037 of Intel's ISA extensions document does not state intercept
> behavior for the insn (I've been unofficially told that the distinction
> is going to be by exit qualification, as I would have assumed
> considering that this way it's sufficiently transparent to unaware
> software, as using WBINVD in place of WBNOINVD is always correct, just
> less efficient). Similarly AMD's PM volume 2 version 3.31 only states
> that both use the same VMEXIT, but not how to distinugish them (other
> than by decoding the insn). Therefore in the HVM case for now it'll be
> backed by the same ->wbinvd_intercept() handlers.
>
> Use this occasion and also add the two missing table entries for
> CLDEMOTE, which doesn't require any further changes to make work.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

I've put enabling WBNOINVD on the todo list, but there is still 0
feedback on the "docs/sphinx: todo/wishlist" thread towards the general
principle.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v3 2/8] x86/HVM: ignore guest INVD uses
Posted by Jan Beulich 4 years, 6 months ago
The only place we'd expect the insn to be sensibly used is in
(virtualization unaware) firmware.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: New.

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -2210,11 +2210,18 @@ static int hvmemul_cache_op(
 
         hvmemul_unmap_linear_addr(mapping, addr, 0, hvmemul_ctxt);
         /* fall through */
-    case x86emul_invd:
     case x86emul_wbinvd:
     case x86emul_wbnoinvd:
         alternative_vcall(hvm_funcs.wbinvd_intercept);
         break;
+
+    case x86emul_invd:
+        /*
+         * Deliberately ignored: We don't want to issue INVD, and issuing WBINVD
+         * wouldn't match the request. And the only place we'd expect the insn
+         * to be sensibly used is in (virtualization unaware) firmware.
+         */
+        break;
     }
 
     return X86EMUL_OKAY;


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 2/8] x86/HVM: ignore guest INVD uses
Posted by Paul Durrant 4 years, 6 months ago
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: 03 September 2019 10:38
> To: xen-devel@lists.xenproject.org
> Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>; Paul Durrant <Paul.Durrant@citrix.com>; Roger Pau Monne
> <roger.pau@citrix.com>; Wei Liu <wl@xen.org>
> Subject: [PATCH v3 2/8] x86/HVM: ignore guest INVD uses
> 
> The only place we'd expect the insn to be sensibly used is in
> (virtualization unaware) firmware.
> 
> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Seems like a reasonable optimization.

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

> ---
> v3: New.
> 
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -2210,11 +2210,18 @@ static int hvmemul_cache_op(
> 
>          hvmemul_unmap_linear_addr(mapping, addr, 0, hvmemul_ctxt);
>          /* fall through */
> -    case x86emul_invd:
>      case x86emul_wbinvd:
>      case x86emul_wbnoinvd:
>          alternative_vcall(hvm_funcs.wbinvd_intercept);
>          break;
> +
> +    case x86emul_invd:
> +        /*
> +         * Deliberately ignored: We don't want to issue INVD, and issuing WBINVD
> +         * wouldn't match the request. And the only place we'd expect the insn
> +         * to be sensibly used is in (virtualization unaware) firmware.
> +         */
> +        break;
>      }
> 
>      return X86EMUL_OKAY;

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 2/8] x86/HVM: ignore guest INVD uses
Posted by Andrew Cooper 4 years, 6 months ago
On 03/09/2019 10:37, Jan Beulich wrote:
> The only place we'd expect the insn to be sensibly used is in
> (virtualization unaware) firmware.
>
> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v3: New.
>
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -2210,11 +2210,18 @@ static int hvmemul_cache_op(
>  
>          hvmemul_unmap_linear_addr(mapping, addr, 0, hvmemul_ctxt);
>          /* fall through */
> -    case x86emul_invd:
>      case x86emul_wbinvd:
>      case x86emul_wbnoinvd:
>          alternative_vcall(hvm_funcs.wbinvd_intercept);
>          break;
> +
> +    case x86emul_invd:
> +        /*
> +         * Deliberately ignored: We don't want to issue INVD, and issuing WBINVD

I'd phrase this more strongly.  We absolutely must not issue INVD or we
break cache coherency.

Ideally with this adjusted, Reviewed-by: Andrew Cooper
<andrew.cooper3@citrix.com>

> +         * wouldn't match the request. And the only place we'd expect the insn
> +         * to be sensibly used is in (virtualization unaware) firmware.
> +         */
> +        break;
>      }
>  
>      return X86EMUL_OKAY;
>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 2/8] x86/HVM: ignore guest INVD uses
Posted by Jan Beulich 4 years, 6 months ago
On 03.09.2019 12:18, Andrew Cooper wrote:
> On 03/09/2019 10:37, Jan Beulich wrote:
>> The only place we'd expect the insn to be sensibly used is in
>> (virtualization unaware) firmware.
>>
>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> v3: New.
>>
>> --- a/xen/arch/x86/hvm/emulate.c
>> +++ b/xen/arch/x86/hvm/emulate.c
>> @@ -2210,11 +2210,18 @@ static int hvmemul_cache_op(
>>  
>>          hvmemul_unmap_linear_addr(mapping, addr, 0, hvmemul_ctxt);
>>          /* fall through */
>> -    case x86emul_invd:
>>      case x86emul_wbinvd:
>>      case x86emul_wbnoinvd:
>>          alternative_vcall(hvm_funcs.wbinvd_intercept);
>>          break;
>> +
>> +    case x86emul_invd:
>> +        /*
>> +         * Deliberately ignored: We don't want to issue INVD, and issuing WBINVD
> 
> I'd phrase this more strongly.  We absolutely must not issue INVD or we
> break cache coherency.
> 
> Ideally with this adjusted, Reviewed-by: Andrew Cooper
> <andrew.cooper3@citrix.com>

Thanks, I've replaced "don't want to" with "mustn't".

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v3 3/8] x86emul: generalize invlpg() hook
Posted by Jan Beulich 4 years, 6 months ago
The hook is already in use for INVLPGA as well. Rename the hook and add
parameters. For the moment INVLPGA with a non-zero ASID remains
unsupported, but the TODO item gets pushed into the actual hook handler.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v2: New.

--- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
@@ -370,16 +370,23 @@ static int fuzz_cmpxchg(
     return maybe_fail(ctxt, "cmpxchg", true);
 }
 
-static int fuzz_invlpg(
-    enum x86_segment seg,
-    unsigned long offset,
+static int fuzz_tlb_op(
+    enum x86emul_tlb_op op,
+    unsigned long addr,
+    unsigned long aux,
     struct x86_emulate_ctxt *ctxt)
 {
-    /* invlpg(), unlike all other hooks, may be called with x86_seg_none. */
-    assert(is_x86_user_segment(seg) || seg == x86_seg_none);
-    assert(ctxt->addr_size == 64 || !(offset >> 32));
+    switch ( op )
+    {
+    case x86emul_invlpg:
+        assert(is_x86_user_segment(aux));
+        /* fall through */
+    case x86emul_invlpga:
+        assert(ctxt->addr_size == 64 || !(addr >> 32));
+        break;
+    }
 
-    return maybe_fail(ctxt, "invlpg", false);
+    return maybe_fail(ctxt, "TLB-management", false);
 }
 
 static int fuzz_cache_op(
@@ -624,7 +631,7 @@ static const struct x86_emulate_ops all_
     SET(read_msr),
     SET(write_msr),
     SET(cache_op),
-    SET(invlpg),
+    SET(tlb_op),
     .get_fpu    = emul_test_get_fpu,
     .put_fpu    = emul_test_put_fpu,
     .cpuid      = emul_test_cpuid,
@@ -733,12 +740,12 @@ enum {
     HOOK_read_msr,
     HOOK_write_msr,
     HOOK_cache_op,
+    HOOK_tlb_op,
     HOOK_cpuid,
     HOOK_inject_hw_exception,
     HOOK_inject_sw_interrupt,
     HOOK_get_fpu,
     HOOK_put_fpu,
-    HOOK_invlpg,
     HOOK_vmfunc,
     CANONICALIZE_rip,
     CANONICALIZE_rsp,
@@ -777,9 +784,9 @@ static void disable_hooks(struct x86_emu
     MAYBE_DISABLE_HOOK(read_msr);
     MAYBE_DISABLE_HOOK(write_msr);
     MAYBE_DISABLE_HOOK(cache_op);
+    MAYBE_DISABLE_HOOK(tlb_op);
     MAYBE_DISABLE_HOOK(cpuid);
     MAYBE_DISABLE_HOOK(get_fpu);
-    MAYBE_DISABLE_HOOK(invlpg);
 }
 
 /*
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -2356,36 +2356,52 @@ static void hvmemul_put_fpu(
     }
 }
 
-static int hvmemul_invlpg(
-    enum x86_segment seg,
-    unsigned long offset,
+static int hvmemul_tlb_op(
+    enum x86emul_tlb_op op,
+    unsigned long addr,
+    unsigned long aux,
     struct x86_emulate_ctxt *ctxt)
 {
     struct hvm_emulate_ctxt *hvmemul_ctxt =
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
-    unsigned long addr;
-    int rc;
-
-    rc = hvmemul_virtual_to_linear(
-        seg, offset, 1, NULL, hvm_access_none, hvmemul_ctxt, &addr);
+    int rc = X86EMUL_OKAY;
 
-    if ( rc == X86EMUL_EXCEPTION )
+    switch ( op )
     {
-        /*
-         * `invlpg` takes segment bases into account, but is not subject to
-         * faults from segment type/limit checks, and is specified as a NOP
-         * when issued on non-canonical addresses.
-         *
-         * hvmemul_virtual_to_linear() raises exceptions for type/limit
-         * violations, so squash them.
-         */
-        x86_emul_reset_event(ctxt);
-        rc = X86EMUL_OKAY;
+    case x86emul_invlpg:
+        rc = hvmemul_virtual_to_linear(aux, addr, 1, NULL, hvm_access_none,
+                                       hvmemul_ctxt, &addr);
+
+        if ( rc == X86EMUL_EXCEPTION )
+        {
+            /*
+             * `invlpg` takes segment bases into account, but is not subject
+             * to faults from segment type/limit checks, and is specified as
+             * a NOP when issued on non-canonical addresses.
+             *
+             * hvmemul_virtual_to_linear() raises exceptions for type/limit
+             * violations, so squash them.
+             */
+            x86_emul_reset_event(ctxt);
+            rc = X86EMUL_OKAY;
+        }
+
+        if ( rc == X86EMUL_OKAY )
+            paging_invlpg(current, addr);
+        break;
+
+    case x86emul_invlpga:
+        /* TODO: Support ASIDs. */
+        if ( !aux )
+            paging_invlpg(current, addr);
+        else
+        {
+            x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt);
+            rc = X86EMUL_EXCEPTION;
+        }
+        break;
     }
 
-    if ( rc == X86EMUL_OKAY )
-        paging_invlpg(current, addr);
-
     return rc;
 }
 
@@ -2425,10 +2441,10 @@ static const struct x86_emulate_ops hvm_
     .read_msr      = hvmemul_read_msr,
     .write_msr     = hvmemul_write_msr,
     .cache_op      = hvmemul_cache_op,
+    .tlb_op        = hvmemul_tlb_op,
     .cpuid         = x86emul_cpuid,
     .get_fpu       = hvmemul_get_fpu,
     .put_fpu       = hvmemul_put_fpu,
-    .invlpg        = hvmemul_invlpg,
     .vmfunc        = hvmemul_vmfunc,
 };
 
@@ -2452,10 +2468,10 @@ static const struct x86_emulate_ops hvm_
     .read_msr      = hvmemul_read_msr,
     .write_msr     = hvmemul_write_msr_discard,
     .cache_op      = hvmemul_cache_op_discard,
+    .tlb_op        = hvmemul_tlb_op,
     .cpuid         = x86emul_cpuid,
     .get_fpu       = hvmemul_get_fpu,
     .put_fpu       = hvmemul_put_fpu,
-    .invlpg        = hvmemul_invlpg,
     .vmfunc        = hvmemul_vmfunc,
 };
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -5590,10 +5590,9 @@ x86_emulate(
             generate_exception_if(!(msr_val & EFER_SVME) ||
                                   !in_protmode(ctxt, ops), EXC_UD);
             generate_exception_if(!mode_ring0(), EXC_GP, 0);
-            generate_exception_if(_regs.ecx, EXC_UD); /* TODO: Support ASIDs. */
-            fail_if(ops->invlpg == NULL);
-            if ( (rc = ops->invlpg(x86_seg_none, truncate_ea(_regs.r(ax)),
-                                   ctxt)) )
+            fail_if(!ops->tlb_op);
+            if ( (rc = ops->tlb_op(x86emul_invlpga, truncate_ea(_regs.r(ax)),
+                                   _regs.ecx, ctxt)) != X86EMUL_OKAY )
                 goto done;
             break;
 
@@ -5747,8 +5746,9 @@ x86_emulate(
         case GRP7_MEM(7): /* invlpg */
             ASSERT(ea.type == OP_MEM);
             generate_exception_if(!mode_ring0(), EXC_GP, 0);
-            fail_if(ops->invlpg == NULL);
-            if ( (rc = ops->invlpg(ea.mem.seg, ea.mem.off, ctxt)) )
+            fail_if(!ops->tlb_op);
+            if ( (rc = ops->tlb_op(x86emul_invlpg, ea.mem.off, ea.mem.seg,
+                                   ctxt)) != X86EMUL_OKAY )
                 goto done;
             break;
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -185,6 +185,11 @@ enum x86emul_cache_op {
     x86emul_wbnoinvd,
 };
 
+enum x86emul_tlb_op {
+    x86emul_invlpg,
+    x86emul_invlpga,
+};
+
 struct x86_emulate_state;
 
 /*
@@ -472,6 +477,19 @@ struct x86_emulate_ops
         unsigned long offset,
         struct x86_emulate_ctxt *ctxt);
 
+    /*
+     * tlb_op: Invalidate paging structures which map addressed byte.
+     *
+     * @addr and @aux have @op-specific meaning:
+     * - INVLPG:  @aux:@addr represent seg:offset
+     * - INVLPGA: @addr is the linear address, @aux the ASID
+     */
+    int (*tlb_op)(
+        enum x86emul_tlb_op op,
+        unsigned long addr,
+        unsigned long aux,
+        struct x86_emulate_ctxt *ctxt);
+
     /* cpuid: Emulate CPUID via given set of EAX-EDX inputs/outputs. */
     int (*cpuid)(
         uint32_t leaf,
@@ -499,12 +517,6 @@ struct x86_emulate_ops
         enum x86_emulate_fpu_type backout,
         const struct x86_emul_fpu_aux *aux);
 
-    /* invlpg: Invalidate paging structures which map addressed byte. */
-    int (*invlpg)(
-        enum x86_segment seg,
-        unsigned long offset,
-        struct x86_emulate_ctxt *ctxt);
-
     /* vmfunc: Emulate VMFUNC via given set of EAX ECX inputs */
     int (*vmfunc)(
         struct x86_emulate_ctxt *ctxt);


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v3 4/8] x86emul: support INVPCID
Posted by Jan Beulich 4 years, 6 months ago
Just like for INVLPGA the HVM hook only supports PCID 0 for the time
being for individual address invalidation. It also translates the other
types to a full flush, which is architecturally permitted and
performance-wise presumably not much worse because emulation is slow
anyway.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v3: Re-base over X86_INVPCID_* name change.
v2: New.

--- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
@@ -382,6 +382,7 @@ static int fuzz_tlb_op(
         assert(is_x86_user_segment(aux));
         /* fall through */
     case x86emul_invlpga:
+    case x86emul_invpcid:
         assert(ctxt->addr_size == 64 || !(addr >> 32));
         break;
     }
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -684,6 +684,38 @@ static int read_msr(
     return X86EMUL_UNHANDLEABLE;
 }
 
+#define INVPCID_ADDR 0x12345678
+#define INVPCID_PCID 0x123
+
+static int read_cr_invpcid(
+    unsigned int reg,
+    unsigned long *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    int rc = emul_test_read_cr(reg, val, ctxt);
+
+    if ( rc == X86EMUL_OKAY && reg == 4 )
+        *val |= X86_CR4_PCIDE;
+
+    return rc;
+}
+
+static int tlb_op_invpcid(
+    enum x86emul_tlb_op op,
+    unsigned long addr,
+    unsigned long aux,
+    struct x86_emulate_ctxt *ctxt)
+{
+    static unsigned int seq;
+
+    if ( op != x86emul_invpcid || addr != INVPCID_ADDR ||
+         x86emul_invpcid_pcid(aux) != (seq < 4 ? 0 : INVPCID_PCID) ||
+         x86emul_invpcid_type(aux) != (seq++ & 3) )
+        return X86EMUL_UNHANDLEABLE;
+
+    return X86EMUL_OKAY;
+}
+
 static struct x86_emulate_ops emulops = {
     .read       = read,
     .insn_fetch = fetch,
@@ -4482,6 +4514,46 @@ int main(int argc, char **argv)
         printf("okay\n");
     }
     else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing invpcid 16(%ecx),%%edx...");
+    if ( stack_exec )
+    {
+        decl_insn(invpcid);
+
+        asm volatile ( put_insn(invpcid, "invpcid 16(%0), %1")
+                       :: "c" (NULL), "d" (0L) );
+
+        res[4] = 0;
+        res[5] = 0;
+        res[6] = INVPCID_ADDR;
+        res[7] = 0;
+        regs.ecx = (unsigned long)res;
+        emulops.tlb_op = tlb_op_invpcid;
+
+        for ( ; ; )
+        {
+            for ( regs.edx = 0; regs.edx < 4; ++regs.edx )
+            {
+                set_insn(invpcid);
+                rc = x86_emulate(&ctxt, &emulops);
+                if ( rc != X86EMUL_OKAY || !check_eip(invpcid) )
+                    goto fail;
+            }
+
+            if ( ctxt.addr_size < 64 || res[4] == INVPCID_PCID )
+                break;
+
+            emulops.read_cr = read_cr_invpcid;
+            res[4] = INVPCID_PCID;
+        }
+
+        emulops.read_cr = emul_test_read_cr;
+        emulops.tlb_op = NULL;
+
+        printf("okay\n");
+    }
+    else
         printf("skipped\n");
 
 #undef decl_insn
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -72,6 +72,7 @@ bool emul_test_init(void)
      * them.
      */
     cp.basic.movbe = true;
+    cp.feat.invpcid = true;
     cp.feat.adx = true;
     cp.feat.avx512pf = cp.feat.avx512f;
     cp.feat.rdpid = true;
@@ -141,7 +142,7 @@ int emul_test_cpuid(
      */
     if ( leaf == 7 && subleaf == 0 )
     {
-        res->b |= 1U << 19;
+        res->b |= (1U << 10) | (1U << 19);
         if ( res->b & (1U << 16) )
             res->b |= 1U << 26;
         res->c |= 1U << 22;
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -2390,8 +2390,16 @@ static int hvmemul_tlb_op(
             paging_invlpg(current, addr);
         break;
 
+    case x86emul_invpcid:
+        if ( x86emul_invpcid_type(aux) != X86_INVPCID_INDIV_ADDR )
+        {
+            hvm_asid_flush_vcpu(current);
+            break;
+        }
+        aux = x86emul_invpcid_pcid(aux);
+        /* fall through */
     case x86emul_invlpga:
-        /* TODO: Support ASIDs. */
+        /* TODO: Support ASIDs/PCIDs. */
         if ( !aux )
             paging_invlpg(current, addr);
         else
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -496,6 +496,7 @@ static const struct ext0f38_table {
     [0x7a ... 0x7c] = { .simd_size = simd_none, .two_op = 1 },
     [0x7d ... 0x7e] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x7f] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x82] = { .simd_size = simd_other },
     [0x83] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x88] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_dq },
     [0x89] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_dq },
@@ -1875,6 +1876,7 @@ in_protmode(
 #define vcpu_has_hle()         (ctxt->cpuid->feat.hle)
 #define vcpu_has_avx2()        (ctxt->cpuid->feat.avx2)
 #define vcpu_has_bmi2()        (ctxt->cpuid->feat.bmi2)
+#define vcpu_has_invpcid()     (ctxt->cpuid->feat.invpcid)
 #define vcpu_has_rtm()         (ctxt->cpuid->feat.rtm)
 #define vcpu_has_mpx()         (ctxt->cpuid->feat.mpx)
 #define vcpu_has_avx512f()     (ctxt->cpuid->feat.avx512f)
@@ -9124,6 +9126,48 @@ x86_emulate(
         ASSERT(!state->simd_size);
         break;
 
+    case X86EMUL_OPC_66(0x0f38, 0x82): /* invpcid reg,m128 */
+        vcpu_must_have(invpcid);
+        generate_exception_if(ea.type != OP_MEM, EXC_UD);
+        generate_exception_if(!mode_ring0(), EXC_GP, 0);
+
+        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16,
+                             ctxt)) != X86EMUL_OKAY )
+            goto done;
+
+        generate_exception_if(mmvalp->xmm[0] & ~0xfff, EXC_GP, 0);
+        dst.val = mode_64bit() ? *dst.reg : (uint32_t)*dst.reg;
+
+        switch ( dst.val )
+        {
+        case X86_INVPCID_INDIV_ADDR:
+             generate_exception_if(!is_canonical_address(mmvalp->xmm[1]),
+                                   EXC_GP, 0);
+             /* fall through */
+        case X86_INVPCID_SINGLE_CTXT:
+             if ( !mode_64bit() || !ops->read_cr )
+                 cr4 = 0;
+             else if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY )
+                 goto done;
+             generate_exception_if(!(cr4 & X86_CR4_PCIDE) && mmvalp->xmm[0],
+                                   EXC_GP, 0);
+             break;
+        case X86_INVPCID_ALL_INCL_GLOBAL:
+        case X86_INVPCID_ALL_NON_GLOBAL:
+             break;
+        default:
+             generate_exception(EXC_GP, 0);
+        }
+
+        fail_if(!ops->tlb_op);
+        if ( (rc = ops->tlb_op(x86emul_invpcid, truncate_ea(mmvalp->xmm[1]),
+                               x86emul_invpcid_aux(mmvalp->xmm[0], dst.val),
+                               ctxt)) != X86EMUL_OKAY )
+            goto done;
+
+        state->simd_size = simd_none;
+        break;
+
     case X86EMUL_OPC_EVEX_66(0x0f38, 0x83): /* vpmultishiftqb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
         generate_exception_if(!evex.w, EXC_UD);
         host_and_vcpu_must_have(avx512_vbmi);
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -188,8 +188,26 @@ enum x86emul_cache_op {
 enum x86emul_tlb_op {
     x86emul_invlpg,
     x86emul_invlpga,
+    x86emul_invpcid,
 };
 
+static inline unsigned int x86emul_invpcid_aux(unsigned int pcid,
+                                            unsigned int type)
+{
+    ASSERT(!(pcid & ~0xfff));
+    return (type << 12) | pcid;
+}
+
+static inline unsigned int x86emul_invpcid_pcid(unsigned int aux)
+{
+    return aux & 0xfff;
+}
+
+static inline unsigned int x86emul_invpcid_type(unsigned int aux)
+{
+    return aux >> 12;
+}
+
 struct x86_emulate_state;
 
 /*
@@ -483,6 +501,8 @@ struct x86_emulate_ops
      * @addr and @aux have @op-specific meaning:
      * - INVLPG:  @aux:@addr represent seg:offset
      * - INVLPGA: @addr is the linear address, @aux the ASID
+     * - INVPCID: @addr is the linear address, @aux the combination of
+     *            PCID and type (see x86emul_invpcid_*()).
      */
     int (*tlb_op)(
         enum x86emul_tlb_op op,


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v3 5/8] x86emul: support MOVDIR{I,64B} insns
Posted by Jan Beulich 4 years, 6 months ago
Note that SDM revision 070 doesn't specify exception behavior for
ModRM.mod != 0b11; assuming #UD here.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Update description.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -2196,6 +2196,36 @@ int main(int argc, char **argv)
         goto fail;
     printf("okay\n");
 
+    printf("%-40s", "Testing movdiri %edx,(%ecx)...");
+    instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf9; instr[3] = 0x11;
+    regs.eip = (unsigned long)&instr[0];
+    regs.ecx = (unsigned long)memset(res, -1, 16);
+    regs.edx = 0x44332211;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (regs.eip != (unsigned long)&instr[4]) ||
+         res[0] != 0x44332211 || ~res[1] )
+        goto fail;
+    printf("okay\n");
+
+    printf("%-40s", "Testing movdir64b 144(%edx),%ecx...");
+    instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf8;
+    instr[4] = 0x8a; instr[5] = 0x90; instr[8] = instr[7] = instr[6] = 0;
+    regs.eip = (unsigned long)&instr[0];
+    for ( i = 0; i < 64; ++i )
+        res[i] = i - 20;
+    regs.edx = (unsigned long)res;
+    regs.ecx = (unsigned long)(res + 16);
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (regs.eip != (unsigned long)&instr[9]) ||
+         res[15] != -5 || res[32] != 12 )
+        goto fail;
+    for ( i = 16; i < 32; ++i )
+        if ( res[i] != i )
+            goto fail;
+    printf("okay\n");
+
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
     {
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -76,6 +76,8 @@ bool emul_test_init(void)
     cp.feat.adx = true;
     cp.feat.avx512pf = cp.feat.avx512f;
     cp.feat.rdpid = true;
+    cp.feat.movdiri = true;
+    cp.feat.movdir64b = true;
     cp.extd.clzero = true;
 
     if ( cpu_has_xsave )
@@ -137,15 +139,15 @@ int emul_test_cpuid(
         res->c |= 1U << 22;
 
     /*
-     * The emulator doesn't itself use ADCX/ADOX/RDPID nor the S/G prefetch
-     * insns, so we can always run the respective tests.
+     * The emulator doesn't itself use ADCX/ADOX/RDPID/MOVDIR* nor the S/G
+     * prefetch insns, so we can always run the respective tests.
      */
     if ( leaf == 7 && subleaf == 0 )
     {
         res->b |= (1U << 10) | (1U << 19);
         if ( res->b & (1U << 16) )
             res->b |= 1U << 26;
-        res->c |= 1U << 22;
+        res->c |= (1U << 22) | (1U << 27) | (1U << 28);
     }
 
     /*
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -548,6 +548,8 @@ static const struct ext0f38_table {
     [0xf1] = { .to_mem = 1, .two_op = 1 },
     [0xf2 ... 0xf3] = {},
     [0xf5 ... 0xf7] = {},
+    [0xf8] = { .simd_size = simd_other },
+    [0xf9] = { .to_mem = 1 },
 };
 
 /* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */
@@ -1902,6 +1904,8 @@ in_protmode(
 #define vcpu_has_avx512_bitalg() (ctxt->cpuid->feat.avx512_bitalg)
 #define vcpu_has_avx512_vpopcntdq() (ctxt->cpuid->feat.avx512_vpopcntdq)
 #define vcpu_has_rdpid()       (ctxt->cpuid->feat.rdpid)
+#define vcpu_has_movdiri()     (ctxt->cpuid->feat.movdiri)
+#define vcpu_has_movdir64b()   (ctxt->cpuid->feat.movdir64b)
 #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
 #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
 
@@ -2693,10 +2697,12 @@ x86_decode_0f38(
     {
     case 0x00 ... 0xef:
     case 0xf2 ... 0xf5:
-    case 0xf7 ... 0xff:
+    case 0xf7 ... 0xf8:
+    case 0xfa ... 0xff:
         op_bytes = 0;
         /* fall through */
     case 0xf6: /* adcx / adox */
+    case 0xf9: /* movdiri */
         ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
         break;
 
@@ -9896,6 +9902,32 @@ x86_emulate(
                             : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
         break;
 
+    case X86EMUL_OPC_66(0x0f38, 0xf8): /* movdir64b r,m512 */
+        vcpu_must_have(movdir64b);
+        generate_exception_if(ea.type != OP_MEM, EXC_UD);
+        src.val = truncate_ea(*dst.reg);
+        generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, ops),
+                              EXC_GP, 0);
+        /* Ignore the non-temporal behavior for now. */
+        fail_if(!ops->write);
+        BUILD_BUG_ON(sizeof(*mmvalp) < 64);
+        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64,
+                             ctxt)) != X86EMUL_OKAY ||
+             (rc = ops->write(x86_seg_es, src.val, mmvalp, 64,
+                              ctxt)) != X86EMUL_OKAY )
+            goto done;
+        state->simd_size = simd_none;
+        sfence = true;
+        break;
+
+    case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
+        vcpu_must_have(movdiri);
+        generate_exception_if(dst.type != OP_MEM, EXC_UD);
+        /* Ignore the non-temporal behavior for now. */
+        dst.val = src.val;
+        sfence = true;
+        break;
+
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x00): /* vpermq $imm8,ymm/m256,ymm */
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x01): /* vpermpd $imm8,ymm/m256,ymm */
         generate_exception_if(!vex.l || !vex.w, EXC_UD);
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -237,6 +237,8 @@ XEN_CPUFEATURE(AVX512_BITALG, 6*32+12) /
 XEN_CPUFEATURE(AVX512_VPOPCNTDQ, 6*32+14) /*A  POPCNT for vectors of DW/QW */
 XEN_CPUFEATURE(RDPID,         6*32+22) /*A  RDPID instruction */
 XEN_CPUFEATURE(CLDEMOTE,      6*32+25) /*A  CLDEMOTE instruction */
+XEN_CPUFEATURE(MOVDIRI,       6*32+27) /*A  MOVDIRI instruction */
+XEN_CPUFEATURE(MOVDIR64B,     6*32+28) /*A  MOVDIR64B instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
 XEN_CPUFEATURE(ITSC,          7*32+ 8) /*   Invariant TSC */

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 5/8] x86emul: support MOVDIR{I, 64B} insns
Posted by Andrew Cooper 4 years, 6 months ago
On 03/09/2019 10:39, Jan Beulich wrote:
> Note that SDM revision 070 doesn't specify exception behavior for
> ModRM.mod != 0b11; assuming #UD here.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

What are we going to do about the ->write() hook atomicity?  I'm happy
to put it on the TODO list, but we can't simply ignore the problem.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 5/8] x86emul: support MOVDIR{I, 64B} insns
Posted by Jan Beulich 4 years, 6 months ago
On 03.09.2019 12:28, Andrew Cooper wrote:
> On 03/09/2019 10:39, Jan Beulich wrote:
>> Note that SDM revision 070 doesn't specify exception behavior for
>> ModRM.mod != 0b11; assuming #UD here.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> What are we going to do about the ->write() hook atomicity?  I'm happy
> to put it on the TODO list, but we can't simply ignore the problem.

So do you not agree with my assessment that our memcpy()
implementation satisfies the need, and it's just not very
nice that the ->write() hook is dependent upon this?

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 5/8] x86emul: support MOVDIR{I, 64B} insns
Posted by Andrew Cooper 4 years, 6 months ago
On 03/09/2019 13:25, Jan Beulich wrote:
> On 03.09.2019 12:28, Andrew Cooper wrote:
>> On 03/09/2019 10:39, Jan Beulich wrote:
>>> Note that SDM revision 070 doesn't specify exception behavior for
>>> ModRM.mod != 0b11; assuming #UD here.
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> What are we going to do about the ->write() hook atomicity?  I'm happy
>> to put it on the TODO list, but we can't simply ignore the problem.
> So do you not agree with my assessment that our memcpy()
> implementation satisfies the need, and it's just not very
> nice that the ->write() hook is dependent upon this?

We use __builtin_memcpy(), not necessarily our own local implementation.

Our own copy uses rep movsq followed by rep movsb of up to 7 bytes,
which doesn't handle 2 and 4 byte copies atomically.  More generally,
rep movs doesn't provide guarantees about the external visibility of
component writes, owing to the many different ways that such writes may
be implemented, and optimised.

Even if the above were fixed (and I'm not sure could make it correct
even for misaligned writes), we cannot depend on our own memcpy never
changing in the future.  For one, it should be a straight rep movsb on
most Intel hardware these days, for performance.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 5/8] x86emul: support MOVDIR{I, 64B} insns
Posted by Jan Beulich 4 years, 6 months ago
On 04.09.2019 12:19, Andrew Cooper wrote:
> On 03/09/2019 13:25, Jan Beulich wrote:
>> On 03.09.2019 12:28, Andrew Cooper wrote:
>>> On 03/09/2019 10:39, Jan Beulich wrote:
>>>> Note that SDM revision 070 doesn't specify exception behavior for
>>>> ModRM.mod != 0b11; assuming #UD here.
>>>>
>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>> What are we going to do about the ->write() hook atomicity?  I'm happy
>>> to put it on the TODO list, but we can't simply ignore the problem.
>> So do you not agree with my assessment that our memcpy()
>> implementation satisfies the need, and it's just not very
>> nice that the ->write() hook is dependent upon this?
> 
> We use __builtin_memcpy(), not necessarily our own local implementation.
> 
> Our own copy uses rep movsq followed by rep movsb of up to 7 bytes,
> which doesn't handle 2 and 4 byte copies atomically.  More generally,
> rep movs doesn't provide guarantees about the external visibility of
> component writes, owing to the many different ways that such writes may
> be implemented, and optimised.
> 
> Even if the above were fixed (and I'm not sure could make it correct
> even for misaligned writes), we cannot depend on our own memcpy never
> changing in the future.  For one, it should be a straight rep movsb on
> most Intel hardware these days, for performance.

Well, okay, I'll add a prepatch making HVM's ->write() hook not use
memcpy anymore for small aligned accesses. I guess I should also do
this for ->read() then, if it's not the case already.

However, as an implication this means that MOVDIR64B can't use the
->write() hook at all. We'd need to introduce a new hook, but to be
honest I have no good idea how to model it (i.e. what other
operations it might cover later on); possibly we want to come back
to this when deciding how to implement large memory block accessing
insns (FXSAVE/FXRSTOR etc).

Furthermore I don't think we currently have means to make the split
behavior of MOVDIRI correct: By using ->write(), we can't guarantee
it'll be exactly two writes.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v3 6/8] x86/HVM: scale MPERF values reported to guests (on AMD)
Posted by Jan Beulich 4 years, 6 months ago
AMD's PM specifies that MPERF (and its r/o counterpart) reads are
affected by the TSC ratio. Hence when processing such reads in software
we too should scale the values. While we don't currently (yet) expose
the underlying feature flags, besides us allowing the MSRs to be read
nevertheless, RDPRU is going to expose the values even to user space.

Furthermore, due to the not exposed feature flags, this change has the
effect of making properly inaccessible (for reads) the two MSRs.

Note that writes to MPERF (and APERF) continue to be unsupported.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: New.
---
I did consider whether to put the code in guest_rdmsr() instead, but
decided that it's better to have it next to TSC handling.

--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3413,6 +3413,22 @@ int hvm_msr_read_intercept(unsigned int
         *msr_content = v->arch.hvm.msr_tsc_adjust;
         break;
 
+    case MSR_MPERF_RD_ONLY:
+        if ( !d->arch.cpuid->extd.efro )
+        {
+            goto gp_fault;
+
+    case MSR_IA32_MPERF:
+            if ( !(d->arch.cpuid->basic.raw[6].c &
+                   CPUID6_ECX_APERFMPERF_CAPABILITY) )
+                goto gp_fault;
+        }
+        if ( rdmsr_safe(msr, *msr_content) )
+            goto gp_fault;
+        if ( d->arch.cpuid->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
+            *msr_content = hvm_get_guest_tsc_fixed(v, *msr_content);
+        break;
+
     case MSR_APIC_BASE:
         *msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
         break;
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -359,6 +359,9 @@
 #define MSR_IA32_MPERF			0x000000e7
 #define MSR_IA32_APERF			0x000000e8
 
+#define MSR_MPERF_RD_ONLY		0xc00000e7
+#define MSR_APERF_RD_ONLY		0xc00000e8
+
 #define MSR_IA32_THERM_CONTROL		0x0000019a
 #define MSR_IA32_THERM_INTERRUPT	0x0000019b
 #define MSR_IA32_THERM_STATUS		0x0000019c


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v3 7/8] x86emul: support RDPRU
Posted by Jan Beulich 4 years, 6 months ago
While the PM doesn't say so, this assumes that the MPERF value read this
way gets scaled similarly to its reading through RDMSR.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: New.

--- a/tools/libxl/libxl_cpuid.c
+++ b/tools/libxl/libxl_cpuid.c
@@ -257,6 +257,7 @@ int libxl_cpuid_parse_config(libxl_cpuid
 
         {"invtsc",       0x80000007, NA, CPUID_REG_EDX,  8,  1},
 
+        {"rdpru",        0x80000008, NA, CPUID_REG_EBX,  4,  1},
         {"wbnoinvd",     0x80000008, NA, CPUID_REG_EBX,  9,  1},
         {"ibpb",         0x80000008, NA, CPUID_REG_EBX, 12,  1},
         {"nc",           0x80000008, NA, CPUID_REG_ECX,  0,  8},
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -146,6 +146,8 @@ static const char *const str_e8b[32] =
 {
     [ 0] = "clzero",
 
+    [ 4] = "rdpru",
+
     /* [ 8] */            [ 9] = "wbnoinvd",
 
     [12] = "ibpb",
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -671,6 +671,13 @@ static int read_msr(
 {
     switch ( reg )
     {
+    case 0x000000e8: /* APERF */
+    case 0xc00000e8: /* APERF_RD_ONLY */
+#define APERF_LO_VALUE 0xAEAEAEAE
+#define APERF_HI_VALUE 0xEAEAEAEA
+        *val = ((uint64_t)APERF_HI_VALUE << 32) | APERF_LO_VALUE;
+        return X86EMUL_OKAY;
+
     case 0xc0000080: /* EFER */
         *val = ctxt->addr_size > 32 ? 0x500 /* LME|LMA */ : 0;
         return X86EMUL_OKAY;
@@ -2226,6 +2233,30 @@ int main(int argc, char **argv)
             goto fail;
     printf("okay\n");
 
+    printf("%-40s", "Testing rdpru...");
+    instr[0] = 0x0f; instr[1] = 0x01; instr[2] = 0xfd;
+    regs.eip = (unsigned long)&instr[0];
+    regs.ecx = 1;
+    regs.eflags = EFLAGS_ALWAYS_SET;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (regs.eax != APERF_LO_VALUE) || (regs.edx != APERF_HI_VALUE) ||
+         !(regs.eflags & X86_EFLAGS_CF) ||
+         (regs.eip != (unsigned long)&instr[3]) )
+        goto fail;
+    if ( ctxt.cpuid->extd.rdpru_max < 0xffff )
+    {
+        regs.eip = (unsigned long)&instr[0];
+        regs.ecx = ctxt.cpuid->extd.rdpru_max + 1;
+        regs.eflags = EFLAGS_ALWAYS_SET | X86_EFLAGS_CF;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.eax || regs.edx ||
+             (regs.eflags & X86_EFLAGS_CF) ||
+             (regs.eip != (unsigned long)&instr[3]) )
+            goto fail;
+    }
+    printf("okay\n");
+
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
     {
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -79,6 +79,8 @@ bool emul_test_init(void)
     cp.feat.movdiri = true;
     cp.feat.movdir64b = true;
     cp.extd.clzero = true;
+    cp.extd.rdpru = true;
+    cp.extd.rdpru_max = 1;
 
     if ( cpu_has_xsave )
     {
@@ -151,11 +153,11 @@ int emul_test_cpuid(
     }
 
     /*
-     * The emulator doesn't itself use CLZERO, so we can always run the
+     * The emulator doesn't itself use CLZERO/RDPRU, so we can always run the
      * respective test(s).
      */
     if ( leaf == 0x80000008 )
-        res->b |= 1U << 0;
+        res->b |= (1U << 0) | (1U << 4);
 
     return X86EMUL_OKAY;
 }
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -545,6 +545,11 @@ void recalculate_cpuid_policy(struct dom
 
     p->extd.maxlinaddr = p->extd.lm ? 48 : 32;
 
+    if ( p->extd.rdpru )
+        p->extd.rdpru_max = min(p->extd.rdpru_max, max->extd.rdpru_max);
+    else
+        p->extd.rdpru_max = 0;
+
     recalculate_xstate(p);
     recalculate_misc(p);
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1872,6 +1872,7 @@ in_protmode(
 #define vcpu_has_fma4()        (ctxt->cpuid->extd.fma4)
 #define vcpu_has_tbm()         (ctxt->cpuid->extd.tbm)
 #define vcpu_has_clzero()      (ctxt->cpuid->extd.clzero)
+#define vcpu_has_rdpru()       (ctxt->cpuid->extd.rdpru)
 #define vcpu_has_wbnoinvd()    (ctxt->cpuid->extd.wbnoinvd)
 
 #define vcpu_has_bmi1()        (ctxt->cpuid->feat.bmi1)
@@ -5670,6 +5671,52 @@ x86_emulate(
                 limit -= sizeof(zero);
             }
             break;
+
+        case 0xfd: /* rdpru */
+            vcpu_must_have(rdpru);
+
+            if ( !mode_ring0() )
+            {
+                fail_if(!ops->read_cr);
+                if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY )
+                    goto done;
+                generate_exception_if(cr4 & X86_CR4_TSD, EXC_UD);
+            }
+
+            switch ( _regs.ecx )
+            {
+            case 0:  n = MSR_IA32_MPERF; break;
+            case 1:  n = MSR_IA32_APERF; break;
+            default: n = 0; break;
+            }
+            if ( _regs.ecx > ctxt->cpuid->extd.rdpru_max )
+                n = 0;
+
+            _regs.eflags &= ~EFLAGS_MASK;
+            if ( n )
+            {
+                fail_if(!ops->read_msr);
+                switch ( rc = ops->read_msr(n, &msr_val, ctxt) )
+                {
+                case X86EMUL_OKAY:
+                    _regs.eflags |= X86_EFLAGS_CF;
+                    break;
+
+                case X86EMUL_EXCEPTION:
+                    x86_emul_reset_event(ctxt);
+                    rc = X86EMUL_OKAY;
+                    break;
+
+                default:
+                    goto done;
+                }
+            }
+
+            if ( !(_regs.eflags & X86_EFLAGS_CF) )
+                msr_val = 0;
+            _regs.r(dx) = msr_val >> 32;
+            _regs.r(ax) = (uint32_t)msr_val;
+            break;
         }
 
 #define _GRP7(mod, reg) \
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -246,6 +246,7 @@ XEN_CPUFEATURE(EFRO,          7*32+10) /
 
 /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */
 XEN_CPUFEATURE(CLZERO,        8*32+ 0) /*A  CLZERO instruction */
+XEN_CPUFEATURE(RDPRU,         8*32+ 4) /*A  RDPRU instruction */
 XEN_CPUFEATURE(WBNOINVD,      8*32+ 9) /*   WBNOINVD instruction */
 XEN_CPUFEATURE(IBPB,          8*32+12) /*A  IBPB support only (no IBRS, used by AMD) */
 
--- a/xen/include/xen/lib/x86/cpuid.h
+++ b/xen/include/xen/lib/x86/cpuid.h
@@ -259,7 +259,8 @@ struct cpuid_policy
                 uint32_t e8b;
                 struct { DECL_BITFIELD(e8b); };
             };
-            uint32_t /* c */:32, /* d */:32;
+            uint32_t /* c */:32;
+            uint16_t :16, rdpru_max;
         };
     } extd;
 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 7/8] x86emul: support RDPRU
Posted by Andrew Cooper 4 years, 6 months ago
On 03/09/2019 10:41, Jan Beulich wrote:
> While the PM doesn't say so, this assumes that the MPERF value read this
> way gets scaled similarly to its reading through RDMSR.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

This wants the following hunks merged, to at least keep the
intercept/exit codes up to date.  This is from my alternative series
which intercepted and terminated RDPRU with #UD.

diff --git a/xen/include/asm-x86/hvm/svm/vmcb.h
b/xen/include/asm-x86/hvm/svm/vmcb.h
index 5c710286f7..2bf0d50f6d 100644
--- a/xen/include/asm-x86/hvm/svm/vmcb.h
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -76,7 +76,8 @@ enum GenericIntercept2bits
     GENERAL2_INTERCEPT_MONITOR = 1 << 10,
     GENERAL2_INTERCEPT_MWAIT   = 1 << 11,
     GENERAL2_INTERCEPT_MWAIT_CONDITIONAL = 1 << 12,
-    GENERAL2_INTERCEPT_XSETBV  = 1 << 13
+    GENERAL2_INTERCEPT_XSETBV  = 1 << 13,
+    GENERAL2_INTERCEPT_RDPRU   = 1 << 14,
 };


@@ -300,6 +301,7 @@ enum VMEXIT_EXITCODE
     VMEXIT_MWAIT            = 139, /* 0x8b */
     VMEXIT_MWAIT_CONDITIONAL= 140, /* 0x8c */
     VMEXIT_XSETBV           = 141, /* 0x8d */
+    VMEXIT_RDPRU            = 142, /* 0x8e */
     VMEXIT_NPF              = 1024, /* 0x400, nested paging fault */
     VMEXIT_INVALID          =  -1
 };


> --- a/xen/arch/x86/cpuid.c
> +++ b/xen/arch/x86/cpuid.c
> @@ -545,6 +545,11 @@ void recalculate_cpuid_policy(struct dom
>  
>      p->extd.maxlinaddr = p->extd.lm ? 48 : 32;
>  
> +    if ( p->extd.rdpru )
> +        p->extd.rdpru_max = min(p->extd.rdpru_max, max->extd.rdpru_max);
> +    else
> +        p->extd.rdpru_max = 0;
> +
>      recalculate_xstate(p);
>      recalculate_misc(p);

The CPUID logic needs quite a bit more than this, and to be safe on
migrate.  For one, recalculate_xstate() unilaterally clobbers this to 0.

Shall I do a prep patch getting the CPUID side of things in order?

>  
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -5670,6 +5671,52 @@ x86_emulate(
>                  limit -= sizeof(zero);
>              }
>              break;
> +
> +        case 0xfd: /* rdpru */
> +            vcpu_must_have(rdpru);
> +
> +            if ( !mode_ring0() )
> +            {
> +                fail_if(!ops->read_cr);
> +                if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY )
> +                    goto done;
> +                generate_exception_if(cr4 & X86_CR4_TSD, EXC_UD);
> +            }
> +
> +            switch ( _regs.ecx )
> +            {
> +            case 0:  n = MSR_IA32_MPERF; break;
> +            case 1:  n = MSR_IA32_APERF; break;
> +            default: n = 0; break;
> +            }
> +            if ( _regs.ecx > ctxt->cpuid->extd.rdpru_max )
> +                n = 0;

This can be folded into the switch statement.  Something like (
_regs.ecx | -(_regs.ecx > ctxt->cpuid->extd.rdpru_max) )

Also, the sentinel might better be -1, which is not in a defined MSR
block.  MSR 0 is a P5-compat MCE MSR, even on AMD hardware.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 7/8] x86emul: support RDPRU
Posted by Jan Beulich 4 years, 6 months ago
On 03.09.2019 14:34, Andrew Cooper wrote:
> On 03/09/2019 10:41, Jan Beulich wrote:
>> While the PM doesn't say so, this assumes that the MPERF value read this
>> way gets scaled similarly to its reading through RDMSR.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> This wants the following hunks merged, to at least keep the
> intercept/exit codes up to date.  This is from my alternative series
> which intercepted and terminated RDPRU with #UD.
> 
> diff --git a/xen/include/asm-x86/hvm/svm/vmcb.h
> b/xen/include/asm-x86/hvm/svm/vmcb.h
> index 5c710286f7..2bf0d50f6d 100644
> --- a/xen/include/asm-x86/hvm/svm/vmcb.h
> +++ b/xen/include/asm-x86/hvm/svm/vmcb.h
> @@ -76,7 +76,8 @@ enum GenericIntercept2bits
>      GENERAL2_INTERCEPT_MONITOR = 1 << 10,
>      GENERAL2_INTERCEPT_MWAIT   = 1 << 11,
>      GENERAL2_INTERCEPT_MWAIT_CONDITIONAL = 1 << 12,
> -    GENERAL2_INTERCEPT_XSETBV  = 1 << 13
> +    GENERAL2_INTERCEPT_XSETBV  = 1 << 13,
> +    GENERAL2_INTERCEPT_RDPRU   = 1 << 14,
>  };
> 
> 
> @@ -300,6 +301,7 @@ enum VMEXIT_EXITCODE
>      VMEXIT_MWAIT            = 139, /* 0x8b */
>      VMEXIT_MWAIT_CONDITIONAL= 140, /* 0x8c */
>      VMEXIT_XSETBV           = 141, /* 0x8d */
> +    VMEXIT_RDPRU            = 142, /* 0x8e */
>      VMEXIT_NPF              = 1024, /* 0x400, nested paging fault */
>      VMEXIT_INVALID          =  -1
>  };

I wouldn't think this belongs here, but since you ask for it, I
can fold it in.

>> --- a/xen/arch/x86/cpuid.c
>> +++ b/xen/arch/x86/cpuid.c
>> @@ -545,6 +545,11 @@ void recalculate_cpuid_policy(struct dom
>>  
>>      p->extd.maxlinaddr = p->extd.lm ? 48 : 32;
>>  
>> +    if ( p->extd.rdpru )
>> +        p->extd.rdpru_max = min(p->extd.rdpru_max, max->extd.rdpru_max);
>> +    else
>> +        p->extd.rdpru_max = 0;
>> +
>>      recalculate_xstate(p);
>>      recalculate_misc(p);
> 
> The CPUID logic needs quite a bit more than this, and to be safe on
> migrate.  For one, recalculate_xstate() unilaterally clobbers this to 0.

Oh, recalculate_misc() - yes, I see this now. And I have to admit
I don't see the migration-unsafety, so ...

> Shall I do a prep patch getting the CPUID side of things in order?

... yes, I'd appreciate you doing so.

>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>> @@ -5670,6 +5671,52 @@ x86_emulate(
>>                  limit -= sizeof(zero);
>>              }
>>              break;
>> +
>> +        case 0xfd: /* rdpru */
>> +            vcpu_must_have(rdpru);
>> +
>> +            if ( !mode_ring0() )
>> +            {
>> +                fail_if(!ops->read_cr);
>> +                if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY )
>> +                    goto done;
>> +                generate_exception_if(cr4 & X86_CR4_TSD, EXC_UD);
>> +            }
>> +
>> +            switch ( _regs.ecx )
>> +            {
>> +            case 0:  n = MSR_IA32_MPERF; break;
>> +            case 1:  n = MSR_IA32_APERF; break;
>> +            default: n = 0; break;
>> +            }
>> +            if ( _regs.ecx > ctxt->cpuid->extd.rdpru_max )
>> +                n = 0;
> 
> This can be folded into the switch statement.  Something like (
> _regs.ecx | -(_regs.ecx > ctxt->cpuid->extd.rdpru_max) )

Will do.

> Also, the sentinel might better be -1, which is not in a defined MSR
> block.  MSR 0 is a P5-compat MCE MSR, even on AMD hardware.

I did consider this, but decided there's a vanishingly small risk
for them to expose this MSR (and if they did we could still change
the code along the lines of what you say). A sentinel of zero is
slightly cheaper to have, after all.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 7/8] x86emul: support RDPRU
Posted by Jan Beulich 4 years, 6 months ago
On 03.09.2019 14:34, Andrew Cooper wrote:
>> --- a/xen/arch/x86/cpuid.c
>> +++ b/xen/arch/x86/cpuid.c
>> @@ -545,6 +545,11 @@ void recalculate_cpuid_policy(struct dom
>>  
>>      p->extd.maxlinaddr = p->extd.lm ? 48 : 32;
>>  
>> +    if ( p->extd.rdpru )
>> +        p->extd.rdpru_max = min(p->extd.rdpru_max, max->extd.rdpru_max);
>> +    else
>> +        p->extd.rdpru_max = 0;
>> +
>>      recalculate_xstate(p);
>>      recalculate_misc(p);
> 
> The CPUID logic needs quite a bit more than this, and to be safe on
> migrate.  For one, recalculate_xstate() unilaterally clobbers this to 0.

I've looked again - recalculate_misc() clobbers .a, .b, and .c,
but not .d afaics. Anyway, just as a note, as you've said you'd
take care of this anyway, and I'll re-base afterwards.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 7/8] x86emul: support RDPRU
Posted by Andrew Cooper 4 years, 6 months ago
On 04/09/2019 10:26, Jan Beulich wrote:
> On 03.09.2019 14:34, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/cpuid.c
>>> +++ b/xen/arch/x86/cpuid.c
>>> @@ -545,6 +545,11 @@ void recalculate_cpuid_policy(struct dom
>>>  
>>>      p->extd.maxlinaddr = p->extd.lm ? 48 : 32;
>>>  
>>> +    if ( p->extd.rdpru )
>>> +        p->extd.rdpru_max = min(p->extd.rdpru_max, max->extd.rdpru_max);
>>> +    else
>>> +        p->extd.rdpru_max = 0;
>>> +
>>>      recalculate_xstate(p);
>>>      recalculate_misc(p);
>> The CPUID logic needs quite a bit more than this, and to be safe on
>> migrate.  For one, recalculate_xstate() unilaterally clobbers this to 0.
> I've looked again - recalculate_misc() clobbers .a, .b, and .c,
> but not .d afaics.

It is clobbered in the common section at the top.

...
    /* Most of Power/RAS hidden from guests. */
    p->extd.raw[0x7].a = p->extd.raw[0x7].b = p->extd.raw[0x7].c = 0;

    p->extd.raw[0x8].d = 0;

    switch ( p->x86_vendor )
    {
...

> Anyway, just as a note, as you've said you'd
> take care of this anyway, and I'll re-base afterwards.

I looked at this a bit yesterday, and it very ugly opencoding bits of
the policy work without that work.

I've got an idea for just enough skeleton policy work to avoid the
duplicated effort, which I think will be a more sensible way of making
progress.  It will certainly reduce the latency on being able to start
MSR_SPEC_CTRL work.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v3 8/8] x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads
Posted by Jan Beulich 4 years, 6 months ago
If the hardware can handle accesses, we should allow it to do so. This
way we can expose EFRO to HVM guests, and "all" that's left for exposing
APERF/MPERF is to figure out how to handle writes to these MSRs. (Note
that the leaf 6 guest CPUID checks will evaluate to false for now, as
recalculate_misc() zaps the entire leaf for now.)

For TSC I see little point in making the intercepts dynamic, hence they
get established right when a VMCB/VMCS gets created.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: New.

--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -53,6 +53,7 @@ static int update_domain_cpuid_info(stru
     const struct cpuid_leaf leaf = { ctl->eax, ctl->ebx, ctl->ecx, ctl->edx };
     int old_vendor = p->x86_vendor;
     unsigned int old_7d0 = p->feat.raw[0].d, old_e8b = p->extd.raw[8].b;
+    unsigned int old_6c = p->basic.raw[6].c, old_e7d = p->extd.raw[7].d;
     bool call_policy_changed = false; /* Avoid for_each_vcpu() unnecessarily */
 
     /*
@@ -209,6 +210,14 @@ static int update_domain_cpuid_info(stru
 
             d->arch.pv.cpuidmasks->_6c = mask;
         }
+
+        /*
+         * If the APERF/MPERF policy has changed, we need to recalculate the
+         * MSR interception bitmaps.
+         */
+        call_policy_changed = (is_hvm_domain(d) &&
+                               ((old_6c ^ p->basic.raw[6].c) &
+                                CPUID6_ECX_APERFMPERF_CAPABILITY));
         break;
 
     case 7:
@@ -314,6 +323,16 @@ static int update_domain_cpuid_info(stru
         }
         break;
 
+    case 0x80000007:
+        /*
+         * If the EFRO policy has changed, we need to recalculate the MSR
+         * interception bitmaps.
+         */
+        call_policy_changed = (is_hvm_domain(d) &&
+                               ((old_e7d ^ p->extd.raw[7].d) &
+                                cpufeat_mask(X86_FEATURE_EFRO)));
+        break;
+
     case 0x80000008:
         /*
          * If the IBPB policy has changed, we need to recalculate the MSR
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -613,6 +613,7 @@ static void svm_cpuid_policy_changed(str
     struct vmcb_struct *vmcb = svm->vmcb;
     const struct cpuid_policy *cp = v->domain->arch.cpuid;
     u32 bitmap = vmcb_get_exception_intercepts(vmcb);
+    unsigned int mode;
 
     if ( opt_hvm_fep ||
          (v->domain->arch.cpuid->x86_vendor != boot_cpu_data.x86_vendor) )
@@ -625,6 +626,17 @@ static void svm_cpuid_policy_changed(str
     /* Give access to MSR_PRED_CMD if the guest has been told about it. */
     svm_intercept_msr(v, MSR_PRED_CMD,
                       cp->extd.ibpb ? MSR_INTERCEPT_NONE : MSR_INTERCEPT_RW);
+
+    /* Allow direct reads from APERF/MPERF if permitted by the policy. */
+    mode = cp->basic.raw[6].c & CPUID6_ECX_APERFMPERF_CAPABILITY
+           ? MSR_INTERCEPT_WRITE : MSR_INTERCEPT_RW;
+    svm_intercept_msr(v, MSR_IA32_APERF, mode);
+    svm_intercept_msr(v, MSR_IA32_MPERF, mode);
+
+    /* Allow direct access to their r/o counterparts if permitted. */
+    mode = cp->extd.efro ? MSR_INTERCEPT_NONE : MSR_INTERCEPT_RW;
+    svm_intercept_msr(v, MSR_APERF_RD_ONLY, mode);
+    svm_intercept_msr(v, MSR_MPERF_RD_ONLY, mode);
 }
 
 void svm_sync_vmcb(struct vcpu *v, enum vmcb_sync_state new_state)
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -92,6 +92,7 @@ static int construct_vmcb(struct vcpu *v
         return -ENOMEM;
     memset(svm->msrpm, 0xff, MSRPM_SIZE);
 
+    svm_intercept_msr(v, MSR_IA32_TSC, MSR_INTERCEPT_WRITE);
     svm_disable_intercept_for_msr(v, MSR_FS_BASE);
     svm_disable_intercept_for_msr(v, MSR_GS_BASE);
     svm_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE);
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1081,6 +1081,7 @@ static int construct_vmcs(struct vcpu *v
         v->arch.hvm.vmx.msr_bitmap = msr_bitmap;
         __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
 
+        vmx_clear_msr_intercept(v, MSR_IA32_TSC, VMX_MSR_R);
         vmx_clear_msr_intercept(v, MSR_FS_BASE, VMX_MSR_RW);
         vmx_clear_msr_intercept(v, MSR_GS_BASE, VMX_MSR_RW);
         vmx_clear_msr_intercept(v, MSR_SHADOW_GS_BASE, VMX_MSR_RW);
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -578,6 +578,18 @@ static void vmx_cpuid_policy_changed(str
         vmx_clear_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW);
     else
         vmx_set_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW);
+
+    /* Allow direct reads from APERF/MPERF if permitted by the policy. */
+    if ( cp->basic.raw[6].c & CPUID6_ECX_APERFMPERF_CAPABILITY )
+    {
+        vmx_clear_msr_intercept(v, MSR_IA32_APERF, VMX_MSR_R);
+        vmx_clear_msr_intercept(v, MSR_IA32_MPERF, VMX_MSR_R);
+    }
+    else
+    {
+        vmx_set_msr_intercept(v, MSR_IA32_APERF, VMX_MSR_R);
+        vmx_set_msr_intercept(v, MSR_IA32_MPERF, VMX_MSR_R);
+    }
 }
 
 int vmx_guest_x86_mode(struct vcpu *v)
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -242,7 +242,7 @@ XEN_CPUFEATURE(MOVDIR64B,     6*32+28) /
 
 /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
 XEN_CPUFEATURE(ITSC,          7*32+ 8) /*   Invariant TSC */
-XEN_CPUFEATURE(EFRO,          7*32+10) /*   APERF/MPERF Read Only interface */
+XEN_CPUFEATURE(EFRO,          7*32+10) /*S  APERF/MPERF Read Only interface */
 
 /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */
 XEN_CPUFEATURE(CLZERO,        8*32+ 0) /*A  CLZERO instruction */


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 8/8] x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads
Posted by Boris Ostrovsky 4 years, 6 months ago
On 9/3/19 5:42 AM, Jan Beulich wrote:
>
> For TSC I see little point in making the intercepts dynamic, hence they
> get established right when a VMCB/VMCS gets created.

Why is this not treated in the same manner as rdtsc intercepts?

-boris

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v3 8/8] x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads
Posted by Jan Beulich 4 years, 6 months ago
On 05.09.2019 01:16, Boris Ostrovsky wrote:
> On 9/3/19 5:42 AM, Jan Beulich wrote:
>>
>> For TSC I see little point in making the intercepts dynamic, hence they
>> get established right when a VMCB/VMCS gets created.
> 
> Why is this not treated in the same manner as rdtsc intercepts?

Oh, indeed - I'll change this.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel