ERETU, unlike IRET, requires the sticky-1 bit (bit 2) be set, and reserved
bits to be clear. Notably this means that dom0_construct() must set
X86_EFLAGS_MBS it in order for a PV dom0 to start.
Xen has been overly lax with reserved bit handling. Adjust
arch_set_info_guest*() and hypercall_iret() which consume flags to clamp the
reserved bits for all guest types.
This is a minor ABI change, but by the same argument as commit
9f892f84c279 ("x86/domctl: Stop using XLAT_cpu_user_regs()"), the reserved
bits would get clamped naturally by hardware when the vCPU is run.
This allows PV guests to start when Xen is using FRED mode.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
v3:
* Rewrite the commit message.
v2:
* New
The handling of VM is complicated.
It turns out that it's simply ignored by IRET in Long Mode (i.e. clearing it
commit 0e47f92b0725 ("x86: force EFLAGS.IF on when exiting to PV guests")
wasn't actually necessary) but ERETU does care.
But, it's unclear how to handle this in in arch_set_info(). We must preserve
it for HVM guests (which can use vm86 mode). PV32 has special handling but
only in hypercall_iret(), not in arch_set_info().
---
xen/arch/x86/domain.c | 4 ++--
xen/arch/x86/hvm/domain.c | 4 ++--
xen/arch/x86/include/asm/x86-defns.h | 7 +++++++
xen/arch/x86/pv/dom0_build.c | 2 +-
xen/arch/x86/pv/iret.c | 8 +++++---
5 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index ce08f91be3af..423d0a6af4f3 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1273,7 +1273,7 @@ int arch_set_info_guest(
v->arch.user_regs.rax = c.nat->user_regs.rax;
v->arch.user_regs.rip = c.nat->user_regs.rip;
v->arch.user_regs.cs = c.nat->user_regs.cs;
- v->arch.user_regs.rflags = c.nat->user_regs.rflags;
+ v->arch.user_regs.rflags = (c.nat->user_regs.rflags & X86_EFLAGS_ALL) | X86_EFLAGS_MBS;
v->arch.user_regs.rsp = c.nat->user_regs.rsp;
v->arch.user_regs.ss = c.nat->user_regs.ss;
v->arch.pv.es = c.nat->user_regs.es;
@@ -1297,7 +1297,7 @@ int arch_set_info_guest(
v->arch.user_regs.eax = c.cmp->user_regs.eax;
v->arch.user_regs.eip = c.cmp->user_regs.eip;
v->arch.user_regs.cs = c.cmp->user_regs.cs;
- v->arch.user_regs.eflags = c.cmp->user_regs.eflags;
+ v->arch.user_regs.eflags = (c.cmp->user_regs.eflags & X86_EFLAGS_ALL) | X86_EFLAGS_MBS;
v->arch.user_regs.esp = c.cmp->user_regs.esp;
v->arch.user_regs.ss = c.cmp->user_regs.ss;
v->arch.pv.es = c.cmp->user_regs.es;
diff --git a/xen/arch/x86/hvm/domain.c b/xen/arch/x86/hvm/domain.c
index 048f29ae4911..1e874d598952 100644
--- a/xen/arch/x86/hvm/domain.c
+++ b/xen/arch/x86/hvm/domain.c
@@ -194,7 +194,7 @@ int arch_set_info_hvm_guest(struct vcpu *v, const struct vcpu_hvm_context *ctx)
uregs->rsi = regs->esi;
uregs->rdi = regs->edi;
uregs->rip = regs->eip;
- uregs->rflags = regs->eflags;
+ uregs->rflags = (regs->eflags & X86_EFLAGS_ALL) | X86_EFLAGS_MBS;
v->arch.hvm.guest_cr[0] = regs->cr0;
v->arch.hvm.guest_cr[3] = regs->cr3;
@@ -245,7 +245,7 @@ int arch_set_info_hvm_guest(struct vcpu *v, const struct vcpu_hvm_context *ctx)
uregs->rsi = regs->rsi;
uregs->rdi = regs->rdi;
uregs->rip = regs->rip;
- uregs->rflags = regs->rflags;
+ uregs->rflags = (regs->rflags & X86_EFLAGS_ALL) | X86_EFLAGS_MBS;
v->arch.hvm.guest_cr[0] = regs->cr0;
v->arch.hvm.guest_cr[3] = regs->cr3;
diff --git a/xen/arch/x86/include/asm/x86-defns.h b/xen/arch/x86/include/asm/x86-defns.h
index 0a0ba83de786..edeb0b4ff95a 100644
--- a/xen/arch/x86/include/asm/x86-defns.h
+++ b/xen/arch/x86/include/asm/x86-defns.h
@@ -27,6 +27,13 @@
(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | \
X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)
+#define X86_EFLAGS_ALL \
+ (X86_EFLAGS_ARITH_MASK | X86_EFLAGS_TF | X86_EFLAGS_IF | \
+ X86_EFLAGS_DF | X86_EFLAGS_OF | X86_EFLAGS_IOPL | \
+ X86_EFLAGS_NT | X86_EFLAGS_RF | X86_EFLAGS_VM | \
+ X86_EFLAGS_AC | X86_EFLAGS_VIF | X86_EFLAGS_VIP | \
+ X86_EFLAGS_ID)
+
/*
* Intel CPU flags in CR0
*/
diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
index 21158ce1812e..f9bbbea2ff70 100644
--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -1021,7 +1021,7 @@ static int __init dom0_construct(const struct boot_domain *bd)
regs->rip = parms.virt_entry;
regs->rsp = vstack_end;
regs->rsi = vstartinfo_start;
- regs->eflags = X86_EFLAGS_IF;
+ regs->eflags = X86_EFLAGS_IF | X86_EFLAGS_MBS;
/*
* We don't call arch_set_info_guest(), so some initialisation needs doing
diff --git a/xen/arch/x86/pv/iret.c b/xen/arch/x86/pv/iret.c
index d3a1fb2c685b..39ce316b8d91 100644
--- a/xen/arch/x86/pv/iret.c
+++ b/xen/arch/x86/pv/iret.c
@@ -80,8 +80,9 @@ long do_iret(void)
regs->rip = iret_saved.rip;
regs->cs = iret_saved.cs | 3; /* force guest privilege */
- regs->rflags = ((iret_saved.rflags & ~(X86_EFLAGS_IOPL|X86_EFLAGS_VM))
- | X86_EFLAGS_IF);
+ regs->rflags = ((iret_saved.rflags & X86_EFLAGS_ALL &
+ ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) |
+ X86_EFLAGS_IF | X86_EFLAGS_MBS);
regs->rsp = iret_saved.rsp;
regs->ss = iret_saved.ss | 3; /* force guest privilege */
@@ -143,7 +144,8 @@ int compat_iret(void)
if ( VM_ASSIST(v->domain, architectural_iopl) )
v->arch.pv.iopl = eflags & X86_EFLAGS_IOPL;
- regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
+ regs->eflags = ((eflags & X86_EFLAGS_ALL & ~X86_EFLAGS_IOPL) |
+ X86_EFLAGS_IF | X86_EFLAGS_MBS);
if ( unlikely(eflags & X86_EFLAGS_VM) )
{
--
2.39.5