[XEN][PATCH 3/3] x86/hvm: vmx: refactor cache disable mode data

Grygorii Strashko posted 3 patches 1 month, 2 weeks ago
There is a newer version of this series
[XEN][PATCH 3/3] x86/hvm: vmx: refactor cache disable mode data
Posted by Grygorii Strashko 1 month, 2 weeks ago
From: Grygorii Strashko <grygorii_strashko@epam.com>

The Cache Disable mode data is used only by VMX code, so move it from
common HVM structures into VMX specific structures:
- move "uc_lock", "is_in_uc_mode" fields from struct hvm_domain to struct
vmx_domain;
- move "cache_mode" field from struct hvm_vcpu to struct vmx_vcpu.

Hence, the "is_in_uc_mode" field is used directly in mm/shadow/multi.c
_sh_propagate(), introduce the hvm_is_in_uc_mode() macro to avoid direct
access to this field and account for INTEL_VMX configuration.

Signed-off-by: Grygorii Strashko <grygorii_strashko@epam.com>
---
 xen/arch/x86/hvm/hvm.c                  |  1 -
 xen/arch/x86/hvm/vmx/vmx.c              | 29 +++++++++++++------------
 xen/arch/x86/include/asm/hvm/domain.h   |  6 -----
 xen/arch/x86/include/asm/hvm/hvm.h      |  3 +++
 xen/arch/x86/include/asm/hvm/vcpu.h     |  3 ---
 xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 14 ++++++++++++
 xen/arch/x86/include/asm/mtrr.h         |  3 ---
 xen/arch/x86/mm/shadow/multi.c          |  2 +-
 8 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 9caca93e5f56..c09fb2ba6873 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -608,7 +608,6 @@ int hvm_domain_initialise(struct domain *d,
     }
 
     spin_lock_init(&d->arch.hvm.irq_lock);
-    spin_lock_init(&d->arch.hvm.uc_lock);
     spin_lock_init(&d->arch.hvm.write_map.lock);
     rwlock_init(&d->arch.hvm.mmcfg_lock);
     INIT_LIST_HEAD(&d->arch.hvm.write_map.list);
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 05b394840e59..23dbf60ec37f 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -583,6 +583,7 @@ static int cf_check vmx_domain_initialise(struct domain *d)
     int rc;
 
     d->arch.ctxt_switch = &csw;
+    spin_lock_init(&d->arch.hvm.vmx.uc_lock);
 
     /*
      * Work around CVE-2018-12207?  The hardware domain is already permitted
@@ -1432,7 +1433,7 @@ static bool domain_exit_uc_mode(struct vcpu *v)
     {
         if ( (vs == v) || !vs->is_initialised )
             continue;
-        if ( (vs->arch.hvm.cache_mode == NO_FILL_CACHE_MODE) ||
+        if ( (vs->arch.hvm.vmx.cache_mode == NO_FILL_CACHE_MODE) ||
              mtrr_pat_not_equal(vs, v) )
             return false;
     }
@@ -1442,7 +1443,7 @@ static bool domain_exit_uc_mode(struct vcpu *v)
 
 static void hvm_set_uc_mode(struct vcpu *v, bool is_in_uc_mode)
 {
-    v->domain->arch.hvm.is_in_uc_mode = is_in_uc_mode;
+    v->domain->arch.hvm.vmx.is_in_uc_mode = is_in_uc_mode;
     shadow_blow_tables_per_domain(v->domain);
 }
 
@@ -1451,10 +1452,10 @@ static void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value)
     if ( value & X86_CR0_CD )
     {
         /* Entering no fill cache mode. */
-        spin_lock(&v->domain->arch.hvm.uc_lock);
-        v->arch.hvm.cache_mode = NO_FILL_CACHE_MODE;
+        spin_lock(&v->domain->arch.hvm.vmx.uc_lock);
+        v->arch.hvm.vmx.cache_mode = NO_FILL_CACHE_MODE;
 
-        if ( !v->domain->arch.hvm.is_in_uc_mode )
+        if ( !v->domain->arch.hvm.vmx.is_in_uc_mode )
         {
             domain_pause_nosync(v->domain);
 
@@ -1464,26 +1465,26 @@ static void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value)
 
             domain_unpause(v->domain);
         }
-        spin_unlock(&v->domain->arch.hvm.uc_lock);
+        spin_unlock(&v->domain->arch.hvm.vmx.uc_lock);
     }
     else if ( !(value & X86_CR0_CD) &&
-              (v->arch.hvm.cache_mode == NO_FILL_CACHE_MODE) )
+              (v->arch.hvm.vmx.cache_mode == NO_FILL_CACHE_MODE) )
     {
         /* Exit from no fill cache mode. */
-        spin_lock(&v->domain->arch.hvm.uc_lock);
-        v->arch.hvm.cache_mode = NORMAL_CACHE_MODE;
+        spin_lock(&v->domain->arch.hvm.vmx.uc_lock);
+        v->arch.hvm.vmx.cache_mode = NORMAL_CACHE_MODE;
 
         if ( domain_exit_uc_mode(v) )
             hvm_set_uc_mode(v, false);
 
-        spin_unlock(&v->domain->arch.hvm.uc_lock);
+        spin_unlock(&v->domain->arch.hvm.vmx.uc_lock);
     }
 }
 
 static int cf_check vmx_set_guest_pat(struct vcpu *v, u64 gpat)
 {
     if ( !paging_mode_hap(v->domain) ||
-         unlikely(v->arch.hvm.cache_mode == NO_FILL_CACHE_MODE) )
+         unlikely(v->arch.hvm.vmx.cache_mode == NO_FILL_CACHE_MODE) )
         return 0;
 
     vmx_vmcs_enter(v);
@@ -1495,7 +1496,7 @@ static int cf_check vmx_set_guest_pat(struct vcpu *v, u64 gpat)
 static int cf_check vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
 {
     if ( !paging_mode_hap(v->domain) ||
-         unlikely(v->arch.hvm.cache_mode == NO_FILL_CACHE_MODE) )
+         unlikely(v->arch.hvm.vmx.cache_mode == NO_FILL_CACHE_MODE) )
         return 0;
 
     vmx_vmcs_enter(v);
@@ -1541,11 +1542,11 @@ static void cf_check vmx_handle_cd(struct vcpu *v, unsigned long value)
 
             wbinvd();               /* flush possibly polluted cache */
             hvm_asid_flush_vcpu(v); /* invalidate memory type cached in TLB */
-            v->arch.hvm.cache_mode = NO_FILL_CACHE_MODE;
+            v->arch.hvm.vmx.cache_mode = NO_FILL_CACHE_MODE;
         }
         else
         {
-            v->arch.hvm.cache_mode = NORMAL_CACHE_MODE;
+            v->arch.hvm.vmx.cache_mode = NORMAL_CACHE_MODE;
             vmx_set_guest_pat(v, *pat);
             if ( !is_iommu_enabled(v->domain) || iommu_snoop )
                 vmx_clear_msr_intercept(v, MSR_IA32_CR_PAT, VMX_MSR_RW);
diff --git a/xen/arch/x86/include/asm/hvm/domain.h b/xen/arch/x86/include/asm/hvm/domain.h
index 95d9336a28f0..83be2bd1c29c 100644
--- a/xen/arch/x86/include/asm/hvm/domain.h
+++ b/xen/arch/x86/include/asm/hvm/domain.h
@@ -97,12 +97,6 @@ struct hvm_domain {
     /* VRAM dirty support.  Protect with the domain paging lock. */
     struct sh_dirty_vram *dirty_vram;
 
-    /* If one of vcpus of this domain is in no_fill_mode or
-     * mtrr/pat between vcpus is not the same, set is_in_uc_mode
-     */
-    spinlock_t             uc_lock;
-    bool                   is_in_uc_mode;
-
     bool                   is_s3_suspended;
 
     /* Compatibility setting for a bug in x2APIC LDR */
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index 6f174ef658f1..ce31401a5d53 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -392,6 +392,9 @@ static inline bool using_svm(void)
     return IS_ENABLED(CONFIG_AMD_SVM) && cpu_has_svm;
 }
 
+#define hvm_is_in_uc_mode(d) \
+    (using_vmx() && (d)->arch.hvm.vmx.is_in_uc_mode)
+
 #ifdef CONFIG_HVM
 
 #define hvm_get_guest_tsc(v) hvm_get_guest_tsc_fixed(v, 0)
diff --git a/xen/arch/x86/include/asm/hvm/vcpu.h b/xen/arch/x86/include/asm/hvm/vcpu.h
index 9ed9eaff3bc5..eae9ac53767b 100644
--- a/xen/arch/x86/include/asm/hvm/vcpu.h
+++ b/xen/arch/x86/include/asm/hvm/vcpu.h
@@ -168,9 +168,6 @@ struct hvm_vcpu {
 
     u8                  evtchn_upcall_vector;
 
-    /* Which cache mode is this VCPU in (CR0:CD/NW)? */
-    u8                  cache_mode;
-
     struct hvm_vcpu_io  hvm_io;
 
     /* Pending hw/sw interrupt (.vector = -1 means nothing pending). */
diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
index ecd91389302c..44a535d16207 100644
--- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
+++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
@@ -46,7 +46,9 @@ struct ept_data {
 
 #define _VMX_DOMAIN_PML_ENABLED    0
 #define VMX_DOMAIN_PML_ENABLED     (1ul << _VMX_DOMAIN_PML_ENABLED)
+
 struct vmx_domain {
+    spinlock_t uc_lock;
     mfn_t apic_access_mfn;
     /* VMX_DOMAIN_* */
     unsigned int status;
@@ -56,6 +58,12 @@ struct vmx_domain {
      * around CVE-2018-12207 as appropriate.
      */
     bool exec_sp;
+    /*
+     * If one of vcpus of this domain is in no_fill_mode or
+     * mtrr/pat between vcpus is not the same, set is_in_uc_mode.
+     * Protected by uc_lock.
+     */
+    bool is_in_uc_mode;
 };
 
 /*
@@ -93,6 +101,9 @@ struct pi_blocking_vcpu {
     spinlock_t           *lock;
 };
 
+#define NORMAL_CACHE_MODE          0
+#define NO_FILL_CACHE_MODE         2
+
 struct vmx_vcpu {
     /* Physical address of VMCS. */
     paddr_t              vmcs_pa;
@@ -156,6 +167,9 @@ struct vmx_vcpu {
 
     uint8_t              lbr_flags;
 
+    /* Which cache mode is this VCPU in (CR0:CD/NW)? */
+    uint8_t              cache_mode;
+
     /* Bitmask of segments that we can't safely use in virtual 8086 mode */
     uint16_t             vm86_segment_mask;
     /* Shadow CS, SS, DS, ES, FS, GS, TR while in virtual 8086 mode */
diff --git a/xen/arch/x86/include/asm/mtrr.h b/xen/arch/x86/include/asm/mtrr.h
index 25d442659df2..3a5b4f5b6eec 100644
--- a/xen/arch/x86/include/asm/mtrr.h
+++ b/xen/arch/x86/include/asm/mtrr.h
@@ -7,9 +7,6 @@
 #define MEMORY_NUM_TYPES     MTRR_NUM_TYPES
 #define NO_HARDCODE_MEM_TYPE MTRR_NUM_TYPES
 
-#define NORMAL_CACHE_MODE          0
-#define NO_FILL_CACHE_MODE         2
-
 #define INVALID_MEM_TYPE X86_NUM_MT
 
 /* In the Intel processor's MTRR interface, the MTRR type is always held in
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 7be9c180ec43..03be61e225c0 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -559,7 +559,7 @@ _sh_propagate(struct vcpu *v,
         if ( !mmio_mfn &&
              (type = hvm_get_mem_pinned_cacheattr(d, target_gfn, 0)) >= 0 )
             sflags |= pat_type_2_pte_flags(type);
-        else if ( d->arch.hvm.is_in_uc_mode )
+        else if ( hvm_is_in_uc_mode(d) )
             sflags |= pat_type_2_pte_flags(X86_MT_UC);
         else
             if ( iomem_access_permitted(d, mfn_x(target_mfn), mfn_x(target_mfn)) )
-- 
2.34.1
Re: [XEN][PATCH 3/3] x86/hvm: vmx: refactor cache disable mode data
Posted by Jan Beulich 1 month, 2 weeks ago
On 30.10.2025 00:54, Grygorii Strashko wrote:
> From: Grygorii Strashko <grygorii_strashko@epam.com>
> 
> The Cache Disable mode data is used only by VMX code, so move it from
> common HVM structures into VMX specific structures:
> - move "uc_lock", "is_in_uc_mode" fields from struct hvm_domain to struct
> vmx_domain;
> - move "cache_mode" field from struct hvm_vcpu to struct vmx_vcpu.
> 
> Hence, the "is_in_uc_mode" field is used directly in mm/shadow/multi.c
> _sh_propagate(), introduce the hvm_is_in_uc_mode() macro to avoid direct
> access to this field and account for INTEL_VMX configuration.
> 
> Signed-off-by: Grygorii Strashko <grygorii_strashko@epam.com>

Requested-by: Andrew ?

> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -583,6 +583,7 @@ static int cf_check vmx_domain_initialise(struct domain *d)
>      int rc;
>  
>      d->arch.ctxt_switch = &csw;
> +    spin_lock_init(&d->arch.hvm.vmx.uc_lock);

I don't think this is the best place; in any event it wants to be separated from
adjacent code by a blank line. I'd prefer if it was put ...

>      /*
>       * Work around CVE-2018-12207?  The hardware domain is already permitted

... below this CVE workaround.

> --- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
> +++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
> @@ -46,7 +46,9 @@ struct ept_data {
>  
>  #define _VMX_DOMAIN_PML_ENABLED    0
>  #define VMX_DOMAIN_PML_ENABLED     (1ul << _VMX_DOMAIN_PML_ENABLED)
> +
>  struct vmx_domain {
> +    spinlock_t uc_lock;
>      mfn_t apic_access_mfn;
>      /* VMX_DOMAIN_* */
>      unsigned int status;

Any reason to make this the very first field of the struct? It might better
live adjacent to the other field you move; there's going to be some padding
anyway, afaict.

> @@ -56,6 +58,12 @@ struct vmx_domain {
>       * around CVE-2018-12207 as appropriate.
>       */
>      bool exec_sp;
> +    /*
> +     * If one of vcpus of this domain is in no_fill_mode or
> +     * mtrr/pat between vcpus is not the same, set is_in_uc_mode.
> +     * Protected by uc_lock.
> +     */
> +    bool is_in_uc_mode;

Imo while moving, the is_ prefix could also be dropped. It doesn't convey any
extra information on top of the in_, and I think we prefer is_*() also as
typically function(-like) predicates. (I.e. in hvm_is_in_uc_mode() I'm fine
with the name.)

> @@ -93,6 +101,9 @@ struct pi_blocking_vcpu {
>      spinlock_t           *lock;
>  };
>  
> +#define NORMAL_CACHE_MODE          0
> +#define NO_FILL_CACHE_MODE         2

As you necessarily touch all use sites, could we switch to the more common
CACHE_MODE_* at this occasion? Also imo these want to live ...

> @@ -156,6 +167,9 @@ struct vmx_vcpu {
>  
>      uint8_t              lbr_flags;
>  
> +    /* Which cache mode is this VCPU in (CR0:CD/NW)? */
> +    uint8_t              cache_mode;

... right next to the field they belong to.

Jan
Re: [XEN][PATCH 3/3] x86/hvm: vmx: refactor cache disable mode data
Posted by Grygorii Strashko 1 month, 2 weeks ago
Hi Jan,

On 30.10.25 13:23, Jan Beulich wrote:
> On 30.10.2025 00:54, Grygorii Strashko wrote:
>> From: Grygorii Strashko <grygorii_strashko@epam.com>
>>
>> The Cache Disable mode data is used only by VMX code, so move it from
>> common HVM structures into VMX specific structures:
>> - move "uc_lock", "is_in_uc_mode" fields from struct hvm_domain to struct
>> vmx_domain;
>> - move "cache_mode" field from struct hvm_vcpu to struct vmx_vcpu.
>>
>> Hence, the "is_in_uc_mode" field is used directly in mm/shadow/multi.c
>> _sh_propagate(), introduce the hvm_is_in_uc_mode() macro to avoid direct
>> access to this field and account for INTEL_VMX configuration.
>>
>> Signed-off-by: Grygorii Strashko <grygorii_strashko@epam.com>
> 
> Requested-by: Andrew ?

ok

> 
>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>> @@ -583,6 +583,7 @@ static int cf_check vmx_domain_initialise(struct domain *d)
>>       int rc;
>>   
>>       d->arch.ctxt_switch = &csw;
>> +    spin_lock_init(&d->arch.hvm.vmx.uc_lock);
> 
> I don't think this is the best place; in any event it wants to be separated from
> adjacent code by a blank line. I'd prefer if it was put ...
> 
>>       /*
>>        * Work around CVE-2018-12207?  The hardware domain is already permitted
> 
> ... below this CVE workaround.

ok

> 
>> --- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
>> +++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
>> @@ -46,7 +46,9 @@ struct ept_data {
>>   
>>   #define _VMX_DOMAIN_PML_ENABLED    0
>>   #define VMX_DOMAIN_PML_ENABLED     (1ul << _VMX_DOMAIN_PML_ENABLED)
>> +
>>   struct vmx_domain {
>> +    spinlock_t uc_lock;
>>       mfn_t apic_access_mfn;
>>       /* VMX_DOMAIN_* */
>>       unsigned int status;
> 
> Any reason to make this the very first field of the struct? It might better
> live adjacent to the other field you move; there's going to be some padding
> anyway, afaict.

I've tried to put fields in holes and checked with pahole.

With current change it is:
struct vmx_domain {
	spinlock_t                 uc_lock;              /*     0     8 */
	mfn_t                      apic_access_mfn;      /*     8     8 */
	unsigned int               status;               /*    16     4 */
	_Bool                      exec_sp;              /*    20     1 */
	_Bool                      is_in_uc_mode;        /*    21     1 */

	/* size: 24, cachelines: 1, members: 5 */
	/* padding: 2 */
	/* last cacheline: 24 bytes */
};

It seems can be grouped like below?:
struct vmx_domain {
	mfn_t                      apic_access_mfn;      /*     0     8 */
	unsigned int               status;               /*     8     4 */
	_Bool                      exec_sp;              /*    12     1 */
	_Bool                      is_in_uc_mode;        /*    13     1 */

	/* XXX 2 bytes hole, try to pack */

	spinlock_t                 uc_lock;              /*    16     8 */

	/* size: 24, cachelines: 1, members: 5 */
	/* sum members: 22, holes: 1, sum holes: 2 */
	/* last cacheline: 24 bytes */
};

> 
>> @@ -56,6 +58,12 @@ struct vmx_domain {
>>        * around CVE-2018-12207 as appropriate.
>>        */
>>       bool exec_sp;
>> +    /*
>> +     * If one of vcpus of this domain is in no_fill_mode or
>> +     * mtrr/pat between vcpus is not the same, set is_in_uc_mode.
>> +     * Protected by uc_lock.
>> +     */
>> +    bool is_in_uc_mode;
> 
> Imo while moving, the is_ prefix could also be dropped. It doesn't convey any
> extra information on top of the in_, and I think we prefer is_*() also as
> typically function(-like) predicates. (I.e. in hvm_is_in_uc_mode() I'm fine
> with the name.)

ok

> 
>> @@ -93,6 +101,9 @@ struct pi_blocking_vcpu {
>>       spinlock_t           *lock;
>>   };
>>   
>> +#define NORMAL_CACHE_MODE          0
>> +#define NO_FILL_CACHE_MODE         2
> 
> As you necessarily touch all use sites, could we switch to the more common
> CACHE_MODE_* at this occasion? Also imo these want to live ...
> 
>> @@ -156,6 +167,9 @@ struct vmx_vcpu {
>>   
>>       uint8_t              lbr_flags;
>>   
>> +    /* Which cache mode is this VCPU in (CR0:CD/NW)? */
>> +    uint8_t              cache_mode;
> 
> ... right next to the field they belong to.

ok.

-- 
Best regards,
-grygorii
Re: [XEN][PATCH 3/3] x86/hvm: vmx: refactor cache disable mode data
Posted by Jan Beulich 1 month, 2 weeks ago
On 30.10.2025 13:26, Grygorii Strashko wrote:
> On 30.10.25 13:23, Jan Beulich wrote:
>> On 30.10.2025 00:54, Grygorii Strashko wrote:
>>> --- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
>>> +++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
>>> @@ -46,7 +46,9 @@ struct ept_data {
>>>   
>>>   #define _VMX_DOMAIN_PML_ENABLED    0
>>>   #define VMX_DOMAIN_PML_ENABLED     (1ul << _VMX_DOMAIN_PML_ENABLED)
>>> +
>>>   struct vmx_domain {
>>> +    spinlock_t uc_lock;
>>>       mfn_t apic_access_mfn;
>>>       /* VMX_DOMAIN_* */
>>>       unsigned int status;
>>
>> Any reason to make this the very first field of the struct? It might better
>> live adjacent to the other field you move; there's going to be some padding
>> anyway, afaict.
> 
> I've tried to put fields in holes and checked with pahole.
> 
> With current change it is:
> struct vmx_domain {
> 	spinlock_t                 uc_lock;              /*     0     8 */
> 	mfn_t                      apic_access_mfn;      /*     8     8 */
> 	unsigned int               status;               /*    16     4 */
> 	_Bool                      exec_sp;              /*    20     1 */
> 	_Bool                      is_in_uc_mode;        /*    21     1 */
> 
> 	/* size: 24, cachelines: 1, members: 5 */
> 	/* padding: 2 */
> 	/* last cacheline: 24 bytes */
> };
> 
> It seems can be grouped like below?:
> struct vmx_domain {
> 	mfn_t                      apic_access_mfn;      /*     0     8 */
> 	unsigned int               status;               /*     8     4 */
> 	_Bool                      exec_sp;              /*    12     1 */
> 	_Bool                      is_in_uc_mode;        /*    13     1 */
> 
> 	/* XXX 2 bytes hole, try to pack */
> 
> 	spinlock_t                 uc_lock;              /*    16     8 */
> 
> 	/* size: 24, cachelines: 1, members: 5 */
> 	/* sum members: 22, holes: 1, sum holes: 2 */
> 	/* last cacheline: 24 bytes */
> };

Yes, this is what I was thinking of.

Jan