From: Grygorii Strashko <grygorii_strashko@epam.com>
The Cache Disable mode data is used only by VMX code, so move it from
common HVM structures into VMX specific structures:
- move "uc_lock", "is_in_uc_mode" fields from struct hvm_domain to struct
vmx_domain;
- move "cache_mode" field from struct hvm_vcpu to struct vmx_vcpu.
Hence, the "is_in_uc_mode" field is used directly in mm/shadow/multi.c
_sh_propagate(), introduce the hvm_is_in_uc_mode() macro to avoid direct
access to this field and account for INTEL_VMX configuration.
Signed-off-by: Grygorii Strashko <grygorii_strashko@epam.com>
---
xen/arch/x86/hvm/hvm.c | 1 -
xen/arch/x86/hvm/vmx/vmx.c | 29 +++++++++++++------------
xen/arch/x86/include/asm/hvm/domain.h | 6 -----
xen/arch/x86/include/asm/hvm/hvm.h | 3 +++
xen/arch/x86/include/asm/hvm/vcpu.h | 3 ---
xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 14 ++++++++++++
xen/arch/x86/include/asm/mtrr.h | 3 ---
xen/arch/x86/mm/shadow/multi.c | 2 +-
8 files changed, 33 insertions(+), 28 deletions(-)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 9caca93e5f56..c09fb2ba6873 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -608,7 +608,6 @@ int hvm_domain_initialise(struct domain *d,
}
spin_lock_init(&d->arch.hvm.irq_lock);
- spin_lock_init(&d->arch.hvm.uc_lock);
spin_lock_init(&d->arch.hvm.write_map.lock);
rwlock_init(&d->arch.hvm.mmcfg_lock);
INIT_LIST_HEAD(&d->arch.hvm.write_map.list);
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 05b394840e59..23dbf60ec37f 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -583,6 +583,7 @@ static int cf_check vmx_domain_initialise(struct domain *d)
int rc;
d->arch.ctxt_switch = &csw;
+ spin_lock_init(&d->arch.hvm.vmx.uc_lock);
/*
* Work around CVE-2018-12207? The hardware domain is already permitted
@@ -1432,7 +1433,7 @@ static bool domain_exit_uc_mode(struct vcpu *v)
{
if ( (vs == v) || !vs->is_initialised )
continue;
- if ( (vs->arch.hvm.cache_mode == NO_FILL_CACHE_MODE) ||
+ if ( (vs->arch.hvm.vmx.cache_mode == NO_FILL_CACHE_MODE) ||
mtrr_pat_not_equal(vs, v) )
return false;
}
@@ -1442,7 +1443,7 @@ static bool domain_exit_uc_mode(struct vcpu *v)
static void hvm_set_uc_mode(struct vcpu *v, bool is_in_uc_mode)
{
- v->domain->arch.hvm.is_in_uc_mode = is_in_uc_mode;
+ v->domain->arch.hvm.vmx.is_in_uc_mode = is_in_uc_mode;
shadow_blow_tables_per_domain(v->domain);
}
@@ -1451,10 +1452,10 @@ static void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value)
if ( value & X86_CR0_CD )
{
/* Entering no fill cache mode. */
- spin_lock(&v->domain->arch.hvm.uc_lock);
- v->arch.hvm.cache_mode = NO_FILL_CACHE_MODE;
+ spin_lock(&v->domain->arch.hvm.vmx.uc_lock);
+ v->arch.hvm.vmx.cache_mode = NO_FILL_CACHE_MODE;
- if ( !v->domain->arch.hvm.is_in_uc_mode )
+ if ( !v->domain->arch.hvm.vmx.is_in_uc_mode )
{
domain_pause_nosync(v->domain);
@@ -1464,26 +1465,26 @@ static void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value)
domain_unpause(v->domain);
}
- spin_unlock(&v->domain->arch.hvm.uc_lock);
+ spin_unlock(&v->domain->arch.hvm.vmx.uc_lock);
}
else if ( !(value & X86_CR0_CD) &&
- (v->arch.hvm.cache_mode == NO_FILL_CACHE_MODE) )
+ (v->arch.hvm.vmx.cache_mode == NO_FILL_CACHE_MODE) )
{
/* Exit from no fill cache mode. */
- spin_lock(&v->domain->arch.hvm.uc_lock);
- v->arch.hvm.cache_mode = NORMAL_CACHE_MODE;
+ spin_lock(&v->domain->arch.hvm.vmx.uc_lock);
+ v->arch.hvm.vmx.cache_mode = NORMAL_CACHE_MODE;
if ( domain_exit_uc_mode(v) )
hvm_set_uc_mode(v, false);
- spin_unlock(&v->domain->arch.hvm.uc_lock);
+ spin_unlock(&v->domain->arch.hvm.vmx.uc_lock);
}
}
static int cf_check vmx_set_guest_pat(struct vcpu *v, u64 gpat)
{
if ( !paging_mode_hap(v->domain) ||
- unlikely(v->arch.hvm.cache_mode == NO_FILL_CACHE_MODE) )
+ unlikely(v->arch.hvm.vmx.cache_mode == NO_FILL_CACHE_MODE) )
return 0;
vmx_vmcs_enter(v);
@@ -1495,7 +1496,7 @@ static int cf_check vmx_set_guest_pat(struct vcpu *v, u64 gpat)
static int cf_check vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
{
if ( !paging_mode_hap(v->domain) ||
- unlikely(v->arch.hvm.cache_mode == NO_FILL_CACHE_MODE) )
+ unlikely(v->arch.hvm.vmx.cache_mode == NO_FILL_CACHE_MODE) )
return 0;
vmx_vmcs_enter(v);
@@ -1541,11 +1542,11 @@ static void cf_check vmx_handle_cd(struct vcpu *v, unsigned long value)
wbinvd(); /* flush possibly polluted cache */
hvm_asid_flush_vcpu(v); /* invalidate memory type cached in TLB */
- v->arch.hvm.cache_mode = NO_FILL_CACHE_MODE;
+ v->arch.hvm.vmx.cache_mode = NO_FILL_CACHE_MODE;
}
else
{
- v->arch.hvm.cache_mode = NORMAL_CACHE_MODE;
+ v->arch.hvm.vmx.cache_mode = NORMAL_CACHE_MODE;
vmx_set_guest_pat(v, *pat);
if ( !is_iommu_enabled(v->domain) || iommu_snoop )
vmx_clear_msr_intercept(v, MSR_IA32_CR_PAT, VMX_MSR_RW);
diff --git a/xen/arch/x86/include/asm/hvm/domain.h b/xen/arch/x86/include/asm/hvm/domain.h
index 95d9336a28f0..83be2bd1c29c 100644
--- a/xen/arch/x86/include/asm/hvm/domain.h
+++ b/xen/arch/x86/include/asm/hvm/domain.h
@@ -97,12 +97,6 @@ struct hvm_domain {
/* VRAM dirty support. Protect with the domain paging lock. */
struct sh_dirty_vram *dirty_vram;
- /* If one of vcpus of this domain is in no_fill_mode or
- * mtrr/pat between vcpus is not the same, set is_in_uc_mode
- */
- spinlock_t uc_lock;
- bool is_in_uc_mode;
-
bool is_s3_suspended;
/* Compatibility setting for a bug in x2APIC LDR */
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index 6f174ef658f1..ce31401a5d53 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -392,6 +392,9 @@ static inline bool using_svm(void)
return IS_ENABLED(CONFIG_AMD_SVM) && cpu_has_svm;
}
+#define hvm_is_in_uc_mode(d) \
+ (using_vmx() && (d)->arch.hvm.vmx.is_in_uc_mode)
+
#ifdef CONFIG_HVM
#define hvm_get_guest_tsc(v) hvm_get_guest_tsc_fixed(v, 0)
diff --git a/xen/arch/x86/include/asm/hvm/vcpu.h b/xen/arch/x86/include/asm/hvm/vcpu.h
index 9ed9eaff3bc5..eae9ac53767b 100644
--- a/xen/arch/x86/include/asm/hvm/vcpu.h
+++ b/xen/arch/x86/include/asm/hvm/vcpu.h
@@ -168,9 +168,6 @@ struct hvm_vcpu {
u8 evtchn_upcall_vector;
- /* Which cache mode is this VCPU in (CR0:CD/NW)? */
- u8 cache_mode;
-
struct hvm_vcpu_io hvm_io;
/* Pending hw/sw interrupt (.vector = -1 means nothing pending). */
diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
index ecd91389302c..44a535d16207 100644
--- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
+++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
@@ -46,7 +46,9 @@ struct ept_data {
#define _VMX_DOMAIN_PML_ENABLED 0
#define VMX_DOMAIN_PML_ENABLED (1ul << _VMX_DOMAIN_PML_ENABLED)
+
struct vmx_domain {
+ spinlock_t uc_lock;
mfn_t apic_access_mfn;
/* VMX_DOMAIN_* */
unsigned int status;
@@ -56,6 +58,12 @@ struct vmx_domain {
* around CVE-2018-12207 as appropriate.
*/
bool exec_sp;
+ /*
+ * If one of vcpus of this domain is in no_fill_mode or
+ * mtrr/pat between vcpus is not the same, set is_in_uc_mode.
+ * Protected by uc_lock.
+ */
+ bool is_in_uc_mode;
};
/*
@@ -93,6 +101,9 @@ struct pi_blocking_vcpu {
spinlock_t *lock;
};
+#define NORMAL_CACHE_MODE 0
+#define NO_FILL_CACHE_MODE 2
+
struct vmx_vcpu {
/* Physical address of VMCS. */
paddr_t vmcs_pa;
@@ -156,6 +167,9 @@ struct vmx_vcpu {
uint8_t lbr_flags;
+ /* Which cache mode is this VCPU in (CR0:CD/NW)? */
+ uint8_t cache_mode;
+
/* Bitmask of segments that we can't safely use in virtual 8086 mode */
uint16_t vm86_segment_mask;
/* Shadow CS, SS, DS, ES, FS, GS, TR while in virtual 8086 mode */
diff --git a/xen/arch/x86/include/asm/mtrr.h b/xen/arch/x86/include/asm/mtrr.h
index 25d442659df2..3a5b4f5b6eec 100644
--- a/xen/arch/x86/include/asm/mtrr.h
+++ b/xen/arch/x86/include/asm/mtrr.h
@@ -7,9 +7,6 @@
#define MEMORY_NUM_TYPES MTRR_NUM_TYPES
#define NO_HARDCODE_MEM_TYPE MTRR_NUM_TYPES
-#define NORMAL_CACHE_MODE 0
-#define NO_FILL_CACHE_MODE 2
-
#define INVALID_MEM_TYPE X86_NUM_MT
/* In the Intel processor's MTRR interface, the MTRR type is always held in
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 7be9c180ec43..03be61e225c0 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -559,7 +559,7 @@ _sh_propagate(struct vcpu *v,
if ( !mmio_mfn &&
(type = hvm_get_mem_pinned_cacheattr(d, target_gfn, 0)) >= 0 )
sflags |= pat_type_2_pte_flags(type);
- else if ( d->arch.hvm.is_in_uc_mode )
+ else if ( hvm_is_in_uc_mode(d) )
sflags |= pat_type_2_pte_flags(X86_MT_UC);
else
if ( iomem_access_permitted(d, mfn_x(target_mfn), mfn_x(target_mfn)) )
--
2.34.1
On 30.10.2025 00:54, Grygorii Strashko wrote:
> From: Grygorii Strashko <grygorii_strashko@epam.com>
>
> The Cache Disable mode data is used only by VMX code, so move it from
> common HVM structures into VMX specific structures:
> - move "uc_lock", "is_in_uc_mode" fields from struct hvm_domain to struct
> vmx_domain;
> - move "cache_mode" field from struct hvm_vcpu to struct vmx_vcpu.
>
> Hence, the "is_in_uc_mode" field is used directly in mm/shadow/multi.c
> _sh_propagate(), introduce the hvm_is_in_uc_mode() macro to avoid direct
> access to this field and account for INTEL_VMX configuration.
>
> Signed-off-by: Grygorii Strashko <grygorii_strashko@epam.com>
Requested-by: Andrew ?
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -583,6 +583,7 @@ static int cf_check vmx_domain_initialise(struct domain *d)
> int rc;
>
> d->arch.ctxt_switch = &csw;
> + spin_lock_init(&d->arch.hvm.vmx.uc_lock);
I don't think this is the best place; in any event it wants to be separated from
adjacent code by a blank line. I'd prefer if it was put ...
> /*
> * Work around CVE-2018-12207? The hardware domain is already permitted
... below this CVE workaround.
> --- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
> +++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
> @@ -46,7 +46,9 @@ struct ept_data {
>
> #define _VMX_DOMAIN_PML_ENABLED 0
> #define VMX_DOMAIN_PML_ENABLED (1ul << _VMX_DOMAIN_PML_ENABLED)
> +
> struct vmx_domain {
> + spinlock_t uc_lock;
> mfn_t apic_access_mfn;
> /* VMX_DOMAIN_* */
> unsigned int status;
Any reason to make this the very first field of the struct? It might better
live adjacent to the other field you move; there's going to be some padding
anyway, afaict.
> @@ -56,6 +58,12 @@ struct vmx_domain {
> * around CVE-2018-12207 as appropriate.
> */
> bool exec_sp;
> + /*
> + * If one of vcpus of this domain is in no_fill_mode or
> + * mtrr/pat between vcpus is not the same, set is_in_uc_mode.
> + * Protected by uc_lock.
> + */
> + bool is_in_uc_mode;
Imo while moving, the is_ prefix could also be dropped. It doesn't convey any
extra information on top of the in_, and I think we prefer is_*() also as
typically function(-like) predicates. (I.e. in hvm_is_in_uc_mode() I'm fine
with the name.)
> @@ -93,6 +101,9 @@ struct pi_blocking_vcpu {
> spinlock_t *lock;
> };
>
> +#define NORMAL_CACHE_MODE 0
> +#define NO_FILL_CACHE_MODE 2
As you necessarily touch all use sites, could we switch to the more common
CACHE_MODE_* at this occasion? Also imo these want to live ...
> @@ -156,6 +167,9 @@ struct vmx_vcpu {
>
> uint8_t lbr_flags;
>
> + /* Which cache mode is this VCPU in (CR0:CD/NW)? */
> + uint8_t cache_mode;
... right next to the field they belong to.
Jan
Hi Jan,
On 30.10.25 13:23, Jan Beulich wrote:
> On 30.10.2025 00:54, Grygorii Strashko wrote:
>> From: Grygorii Strashko <grygorii_strashko@epam.com>
>>
>> The Cache Disable mode data is used only by VMX code, so move it from
>> common HVM structures into VMX specific structures:
>> - move "uc_lock", "is_in_uc_mode" fields from struct hvm_domain to struct
>> vmx_domain;
>> - move "cache_mode" field from struct hvm_vcpu to struct vmx_vcpu.
>>
>> Hence, the "is_in_uc_mode" field is used directly in mm/shadow/multi.c
>> _sh_propagate(), introduce the hvm_is_in_uc_mode() macro to avoid direct
>> access to this field and account for INTEL_VMX configuration.
>>
>> Signed-off-by: Grygorii Strashko <grygorii_strashko@epam.com>
>
> Requested-by: Andrew ?
ok
>
>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>> @@ -583,6 +583,7 @@ static int cf_check vmx_domain_initialise(struct domain *d)
>> int rc;
>>
>> d->arch.ctxt_switch = &csw;
>> + spin_lock_init(&d->arch.hvm.vmx.uc_lock);
>
> I don't think this is the best place; in any event it wants to be separated from
> adjacent code by a blank line. I'd prefer if it was put ...
>
>> /*
>> * Work around CVE-2018-12207? The hardware domain is already permitted
>
> ... below this CVE workaround.
ok
>
>> --- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
>> +++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
>> @@ -46,7 +46,9 @@ struct ept_data {
>>
>> #define _VMX_DOMAIN_PML_ENABLED 0
>> #define VMX_DOMAIN_PML_ENABLED (1ul << _VMX_DOMAIN_PML_ENABLED)
>> +
>> struct vmx_domain {
>> + spinlock_t uc_lock;
>> mfn_t apic_access_mfn;
>> /* VMX_DOMAIN_* */
>> unsigned int status;
>
> Any reason to make this the very first field of the struct? It might better
> live adjacent to the other field you move; there's going to be some padding
> anyway, afaict.
I've tried to put fields in holes and checked with pahole.
With current change it is:
struct vmx_domain {
spinlock_t uc_lock; /* 0 8 */
mfn_t apic_access_mfn; /* 8 8 */
unsigned int status; /* 16 4 */
_Bool exec_sp; /* 20 1 */
_Bool is_in_uc_mode; /* 21 1 */
/* size: 24, cachelines: 1, members: 5 */
/* padding: 2 */
/* last cacheline: 24 bytes */
};
It seems can be grouped like below?:
struct vmx_domain {
mfn_t apic_access_mfn; /* 0 8 */
unsigned int status; /* 8 4 */
_Bool exec_sp; /* 12 1 */
_Bool is_in_uc_mode; /* 13 1 */
/* XXX 2 bytes hole, try to pack */
spinlock_t uc_lock; /* 16 8 */
/* size: 24, cachelines: 1, members: 5 */
/* sum members: 22, holes: 1, sum holes: 2 */
/* last cacheline: 24 bytes */
};
>
>> @@ -56,6 +58,12 @@ struct vmx_domain {
>> * around CVE-2018-12207 as appropriate.
>> */
>> bool exec_sp;
>> + /*
>> + * If one of vcpus of this domain is in no_fill_mode or
>> + * mtrr/pat between vcpus is not the same, set is_in_uc_mode.
>> + * Protected by uc_lock.
>> + */
>> + bool is_in_uc_mode;
>
> Imo while moving, the is_ prefix could also be dropped. It doesn't convey any
> extra information on top of the in_, and I think we prefer is_*() also as
> typically function(-like) predicates. (I.e. in hvm_is_in_uc_mode() I'm fine
> with the name.)
ok
>
>> @@ -93,6 +101,9 @@ struct pi_blocking_vcpu {
>> spinlock_t *lock;
>> };
>>
>> +#define NORMAL_CACHE_MODE 0
>> +#define NO_FILL_CACHE_MODE 2
>
> As you necessarily touch all use sites, could we switch to the more common
> CACHE_MODE_* at this occasion? Also imo these want to live ...
>
>> @@ -156,6 +167,9 @@ struct vmx_vcpu {
>>
>> uint8_t lbr_flags;
>>
>> + /* Which cache mode is this VCPU in (CR0:CD/NW)? */
>> + uint8_t cache_mode;
>
> ... right next to the field they belong to.
ok.
--
Best regards,
-grygorii
On 30.10.2025 13:26, Grygorii Strashko wrote:
> On 30.10.25 13:23, Jan Beulich wrote:
>> On 30.10.2025 00:54, Grygorii Strashko wrote:
>>> --- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
>>> +++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
>>> @@ -46,7 +46,9 @@ struct ept_data {
>>>
>>> #define _VMX_DOMAIN_PML_ENABLED 0
>>> #define VMX_DOMAIN_PML_ENABLED (1ul << _VMX_DOMAIN_PML_ENABLED)
>>> +
>>> struct vmx_domain {
>>> + spinlock_t uc_lock;
>>> mfn_t apic_access_mfn;
>>> /* VMX_DOMAIN_* */
>>> unsigned int status;
>>
>> Any reason to make this the very first field of the struct? It might better
>> live adjacent to the other field you move; there's going to be some padding
>> anyway, afaict.
>
> I've tried to put fields in holes and checked with pahole.
>
> With current change it is:
> struct vmx_domain {
> spinlock_t uc_lock; /* 0 8 */
> mfn_t apic_access_mfn; /* 8 8 */
> unsigned int status; /* 16 4 */
> _Bool exec_sp; /* 20 1 */
> _Bool is_in_uc_mode; /* 21 1 */
>
> /* size: 24, cachelines: 1, members: 5 */
> /* padding: 2 */
> /* last cacheline: 24 bytes */
> };
>
> It seems can be grouped like below?:
> struct vmx_domain {
> mfn_t apic_access_mfn; /* 0 8 */
> unsigned int status; /* 8 4 */
> _Bool exec_sp; /* 12 1 */
> _Bool is_in_uc_mode; /* 13 1 */
>
> /* XXX 2 bytes hole, try to pack */
>
> spinlock_t uc_lock; /* 16 8 */
>
> /* size: 24, cachelines: 1, members: 5 */
> /* sum members: 22, holes: 1, sum holes: 2 */
> /* last cacheline: 24 bytes */
> };
Yes, this is what I was thinking of.
Jan
© 2016 - 2025 Red Hat, Inc.