hvm_{get,set}_guest_bndcfgs() are thin wrappers around accessing MSR_BNDCFGS.
MPX was implemented on Skylake uarch CPUs and dropped in subsequent CPUs, and
is disabled by default in Xen VMs.
It would be nice to move all the logic into vmx_msr_{read,write}_intercept(),
but the common HVM migration code uses guest_{rd,wr}msr(). Therefore, use
{get,set}_regs() to reduce the quantity of "common" HVM code.
In lieu of having hvm_set_guest_bndcfgs() split out, use some #ifdef
CONFIG_HVM in guest_wrmsr(). In vmx_{get,set}_regs(), split the switch
statements into two depending on whether the require remote VMCS acquisition
or not.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Jun Nakajima <jun.nakajima@intel.com>
CC: Kevin Tian <kevin.tian@intel.com>
This counteracts the hvm_funcs size increase from {get,set}_regs(), and shows
how to use the new functionality to clean the HVM logic up.
---
xen/arch/x86/hvm/hvm.c | 37 --------------------------
xen/arch/x86/hvm/vmx/vmx.c | 54 ++++++++++++++++++--------------------
xen/arch/x86/include/asm/hvm/hvm.h | 12 ---------
xen/arch/x86/msr.c | 34 +++++++++++++++++++-----
4 files changed, 53 insertions(+), 84 deletions(-)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index b530e986e86c..d7d3299b431e 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -324,43 +324,6 @@ int hvm_set_guest_pat(struct vcpu *v, uint64_t guest_pat)
return 1;
}
-bool hvm_set_guest_bndcfgs(struct vcpu *v, u64 val)
-{
- if ( !hvm_funcs.set_guest_bndcfgs ||
- !is_canonical_address(val) ||
- (val & IA32_BNDCFGS_RESERVED) )
- return false;
-
- /*
- * While MPX instructions are supposed to be gated on XCR0.BND*, let's
- * nevertheless force the relevant XCR0 bits on when the feature is being
- * enabled in BNDCFGS.
- */
- if ( (val & IA32_BNDCFGS_ENABLE) &&
- !(v->arch.xcr0_accum & (X86_XCR0_BNDREGS | X86_XCR0_BNDCSR)) )
- {
- uint64_t xcr0 = get_xcr0();
- int rc;
-
- if ( v != current )
- return false;
-
- rc = handle_xsetbv(XCR_XFEATURE_ENABLED_MASK,
- xcr0 | X86_XCR0_BNDREGS | X86_XCR0_BNDCSR);
-
- if ( rc )
- {
- HVM_DBG_LOG(DBG_LEVEL_1, "Failed to force XCR0.BND*: %d", rc);
- return false;
- }
-
- if ( handle_xsetbv(XCR_XFEATURE_ENABLED_MASK, xcr0) )
- /* nothing, best effort only */;
- }
-
- return alternative_call(hvm_funcs.set_guest_bndcfgs, v, val);
-}
-
/*
* Get the ratio to scale host TSC frequency to gtsc_khz. zero will be
* returned if TSC scaling is unavailable or ratio cannot be handled
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 69e38d0fa8f9..8c55e56cbddb 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1212,28 +1212,6 @@ static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
return 1;
}
-static bool vmx_set_guest_bndcfgs(struct vcpu *v, u64 val)
-{
- ASSERT(cpu_has_mpx && cpu_has_vmx_mpx);
-
- vmx_vmcs_enter(v);
- __vmwrite(GUEST_BNDCFGS, val);
- vmx_vmcs_exit(v);
-
- return true;
-}
-
-static bool vmx_get_guest_bndcfgs(struct vcpu *v, u64 *val)
-{
- ASSERT(cpu_has_mpx && cpu_has_vmx_mpx);
-
- vmx_vmcs_enter(v);
- __vmread(GUEST_BNDCFGS, val);
- vmx_vmcs_exit(v);
-
- return true;
-}
-
static void vmx_handle_cd(struct vcpu *v, unsigned long value)
{
if ( !paging_mode_hap(v->domain) )
@@ -2432,6 +2410,7 @@ static uint64_t vmx_get_reg(struct vcpu *v, unsigned int reg)
uint64_t val = 0;
int rc;
+ /* Logic which doesn't require remote VMCS acquisition. */
switch ( reg )
{
case MSR_SPEC_CTRL:
@@ -2443,13 +2422,25 @@ static uint64_t vmx_get_reg(struct vcpu *v, unsigned int reg)
domain_crash(d);
}
return val;
+ }
+
+ /* Logic which maybe requires remote VMCS acquisition. */
+ vmx_vmcs_enter(v);
+ switch ( reg )
+ {
+ case MSR_IA32_BNDCFGS:
+ __vmread(GUEST_BNDCFGS, &val);
+ break;
default:
printk(XENLOG_G_ERR "%s(%pv, 0x%08x) Bad register\n",
__func__, v, reg);
domain_crash(d);
- return 0;
+ break;
}
+ vmx_vmcs_exit(v);
+
+ return val;
}
static void vmx_set_reg(struct vcpu *v, unsigned int reg, uint64_t val)
@@ -2457,6 +2448,7 @@ static void vmx_set_reg(struct vcpu *v, unsigned int reg, uint64_t val)
struct domain *d = v->domain;
int rc;
+ /* Logic which doesn't require remote VMCS acquisition. */
switch ( reg )
{
case MSR_SPEC_CTRL:
@@ -2467,6 +2459,15 @@ static void vmx_set_reg(struct vcpu *v, unsigned int reg, uint64_t val)
__func__, v, reg, rc);
domain_crash(d);
}
+ return;
+ }
+
+ /* Logic which maybe requires remote VMCS acquisition. */
+ vmx_vmcs_enter(v);
+ switch ( reg )
+ {
+ case MSR_IA32_BNDCFGS:
+ __vmwrite(GUEST_BNDCFGS, val);
break;
default:
@@ -2474,6 +2475,7 @@ static void vmx_set_reg(struct vcpu *v, unsigned int reg, uint64_t val)
__func__, v, reg, val);
domain_crash(d);
}
+ vmx_vmcs_exit(v);
}
static struct hvm_function_table __initdata vmx_function_table = {
@@ -2796,12 +2798,6 @@ const struct hvm_function_table * __init start_vmx(void)
vmx_function_table.tsc_scaling.setup = vmx_setup_tsc_scaling;
}
- if ( cpu_has_mpx && cpu_has_vmx_mpx )
- {
- vmx_function_table.set_guest_bndcfgs = vmx_set_guest_bndcfgs;
- vmx_function_table.get_guest_bndcfgs = vmx_get_guest_bndcfgs;
- }
-
lbr_tsx_fixup_check();
ler_to_fixup_check();
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index c8b62b514b42..7bb7d0b77d32 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -148,9 +148,6 @@ struct hvm_function_table {
int (*get_guest_pat)(struct vcpu *v, u64 *);
int (*set_guest_pat)(struct vcpu *v, u64);
- bool (*get_guest_bndcfgs)(struct vcpu *v, u64 *);
- bool (*set_guest_bndcfgs)(struct vcpu *v, u64);
-
void (*set_tsc_offset)(struct vcpu *v, u64 offset, u64 at_tsc);
void (*inject_event)(const struct x86_event *event);
@@ -291,8 +288,6 @@ void hvm_set_segment_register(struct vcpu *v, enum x86_segment seg,
void hvm_set_info_guest(struct vcpu *v);
-bool hvm_set_guest_bndcfgs(struct vcpu *v, u64 val);
-
int hvm_vmexit_cpuid(struct cpu_user_regs *regs, unsigned int inst_len);
void hvm_migrate_timers(struct vcpu *v);
void hvm_do_resume(struct vcpu *v);
@@ -479,12 +474,6 @@ static inline unsigned long hvm_get_shadow_gs_base(struct vcpu *v)
return alternative_call(hvm_funcs.get_shadow_gs_base, v);
}
-static inline bool hvm_get_guest_bndcfgs(struct vcpu *v, u64 *val)
-{
- return hvm_funcs.get_guest_bndcfgs &&
- alternative_call(hvm_funcs.get_guest_bndcfgs, v, val);
-}
-
#define has_hvm_params(d) \
((d)->arch.hvm.params != NULL)
@@ -768,7 +757,6 @@ int hvm_guest_x86_mode(struct vcpu *v);
unsigned long hvm_get_shadow_gs_base(struct vcpu *v);
void hvm_cpuid_policy_changed(struct vcpu *v);
void hvm_set_tsc_offset(struct vcpu *v, uint64_t offset, uint64_t at_tsc);
-bool hvm_get_guest_bndcfgs(struct vcpu *v, uint64_t *val);
/* End of prototype list */
diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
index fd4012808472..9e22404eb24a 100644
--- a/xen/arch/x86/msr.c
+++ b/xen/arch/x86/msr.c
@@ -30,6 +30,7 @@
#include <asm/msr.h>
#include <asm/pv/domain.h>
#include <asm/setup.h>
+#include <asm/xstate.h>
#include <public/hvm/params.h>
@@ -323,10 +324,9 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
break;
case MSR_IA32_BNDCFGS:
- if ( !cp->feat.mpx || !is_hvm_domain(d) ||
- !hvm_get_guest_bndcfgs(v, val) )
+ if ( !cp->feat.mpx ) /* Implies Intel HVM only */
goto gp_fault;
- break;
+ goto get_reg;
case MSR_IA32_XSS:
if ( !cp->xstate.xsaves )
@@ -594,11 +594,33 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
ret = guest_wrmsr_x2apic(v, msr, val);
break;
+#ifdef CONFIG_HVM
case MSR_IA32_BNDCFGS:
- if ( !cp->feat.mpx || !is_hvm_domain(d) ||
- !hvm_set_guest_bndcfgs(v, val) )
+ if ( !cp->feat.mpx || /* Implies Intel HVM only */
+ !is_canonical_address(val) || (val & IA32_BNDCFGS_RESERVED) )
goto gp_fault;
- break;
+
+ /*
+ * While MPX instructions are supposed to be gated on XCR0.BND*, let's
+ * nevertheless force the relevant XCR0 bits on when the feature is
+ * being enabled in BNDCFGS.
+ */
+ if ( (val & IA32_BNDCFGS_ENABLE) &&
+ !(v->arch.xcr0_accum & (X86_XCR0_BNDREGS | X86_XCR0_BNDCSR)) )
+ {
+ uint64_t xcr0 = get_xcr0();
+
+ if ( v != current ||
+ handle_xsetbv(XCR_XFEATURE_ENABLED_MASK,
+ xcr0 | X86_XCR0_BNDREGS | X86_XCR0_BNDCSR) )
+ goto gp_fault;
+
+ if ( handle_xsetbv(XCR_XFEATURE_ENABLED_MASK, xcr0) )
+ /* nothing, best effort only */;
+ }
+
+ goto set_reg;
+#endif /* CONFIG_HVM */
case MSR_IA32_XSS:
if ( !cp->xstate.xsaves )
--
2.11.0
On 17.01.2022 20:25, Andrew Cooper wrote:
> hvm_{get,set}_guest_bndcfgs() are thin wrappers around accessing MSR_BNDCFGS.
>
> MPX was implemented on Skylake uarch CPUs and dropped in subsequent CPUs, and
> is disabled by default in Xen VMs.
>
> It would be nice to move all the logic into vmx_msr_{read,write}_intercept(),
> but the common HVM migration code uses guest_{rd,wr}msr(). Therefore, use
> {get,set}_regs() to reduce the quantity of "common" HVM code.
>
> In lieu of having hvm_set_guest_bndcfgs() split out, use some #ifdef
> CONFIG_HVM in guest_wrmsr(). In vmx_{get,set}_regs(), split the switch
> statements into two depending on whether the require remote VMCS acquisition
> or not.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
One remark:
> @@ -323,10 +324,9 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
> break;
>
> case MSR_IA32_BNDCFGS:
> - if ( !cp->feat.mpx || !is_hvm_domain(d) ||
> - !hvm_get_guest_bndcfgs(v, val) )
> + if ( !cp->feat.mpx ) /* Implies Intel HVM only */
Wouldn't it make sense to accompany this comment by ...
> goto gp_fault;
> - break;
ASSERT(is_hvm_domain(d));
(and then the same on the "set" path)?
Jan
On 19/01/2022 13:50, Jan Beulich wrote:
> On 17.01.2022 20:25, Andrew Cooper wrote:
>> hvm_{get,set}_guest_bndcfgs() are thin wrappers around accessing MSR_BNDCFGS.
>>
>> MPX was implemented on Skylake uarch CPUs and dropped in subsequent CPUs, and
>> is disabled by default in Xen VMs.
>>
>> It would be nice to move all the logic into vmx_msr_{read,write}_intercept(),
>> but the common HVM migration code uses guest_{rd,wr}msr(). Therefore, use
>> {get,set}_regs() to reduce the quantity of "common" HVM code.
>>
>> In lieu of having hvm_set_guest_bndcfgs() split out, use some #ifdef
>> CONFIG_HVM in guest_wrmsr(). In vmx_{get,set}_regs(), split the switch
>> statements into two depending on whether the require remote VMCS acquisition
>> or not.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>
> One remark:
>
>> @@ -323,10 +324,9 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>> break;
>>
>> case MSR_IA32_BNDCFGS:
>> - if ( !cp->feat.mpx || !is_hvm_domain(d) ||
>> - !hvm_get_guest_bndcfgs(v, val) )
>> + if ( !cp->feat.mpx ) /* Implies Intel HVM only */
> Wouldn't it make sense to accompany this comment by ...
>
>> goto gp_fault;
>> - break;
> ASSERT(is_hvm_domain(d));
>
> (and then the same on the "set" path)?
So this is the reason for the default logic in the {get,set}_reg()
path. The absence of MSR_BNDCFGS in the PV and SVM paths will cause the
VM to be crashed cleanly. If you're on a VMX on a non-MPX capable
system, the VMREAD/VMWRITE will hit a BUG (which in due course I want to
downgrade to a domain crash).
It's a bit more friendly than an ASSERT() (doesn't take the system
down), is present in release builds too, and more precise as it excludes
SVM too.
~Andrew
P.S. I'm still trying to decide on an acceptable name to hide {
ASSERT_UNREACHABLE(); gprink(); domain_crash() } behind, so we can
downgrade more BUG()/etc to more runtime-friendly options.
On 19.01.2022 17:53, Andrew Cooper wrote:
> On 19/01/2022 13:50, Jan Beulich wrote:
>> On 17.01.2022 20:25, Andrew Cooper wrote:
>>> @@ -323,10 +324,9 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>>> break;
>>>
>>> case MSR_IA32_BNDCFGS:
>>> - if ( !cp->feat.mpx || !is_hvm_domain(d) ||
>>> - !hvm_get_guest_bndcfgs(v, val) )
>>> + if ( !cp->feat.mpx ) /* Implies Intel HVM only */
>> Wouldn't it make sense to accompany this comment by ...
>>
>>> goto gp_fault;
>>> - break;
>> ASSERT(is_hvm_domain(d));
>>
>> (and then the same on the "set" path)?
>
> So this is the reason for the default logic in the {get,set}_reg()
> path. The absence of MSR_BNDCFGS in the PV and SVM paths will cause the
> VM to be crashed cleanly. If you're on a VMX on a non-MPX capable
> system, the VMREAD/VMWRITE will hit a BUG (which in due course I want to
> downgrade to a domain crash).
>
> It's a bit more friendly than an ASSERT() (doesn't take the system
> down), is present in release builds too, and more precise as it excludes
> SVM too.
I see, makes sense.
Jan
© 2016 - 2026 Red Hat, Inc.