Some VM types have characteristics in common; in fact, the only use
of VM types right now is kvm_arch_has_private_mem and it assumes that
_all_ VM types have private memory.
So, let the low bits specify the characteristics of the VM type. As
of we have two special things: whether memory is private, and whether
guest state is protected. The latter is similar to
kvm->arch.guest_state_protected, but the latter is only set on a fully
initialized VM. If both are set, ioctls to set registers will cause
an error---SEV-ES did not do so, which is a problematic API.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20240209183743.22030-7-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 9 +++-
arch/x86/kvm/x86.c | 93 +++++++++++++++++++++++++++------
2 files changed, 85 insertions(+), 17 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0bcd9ae16097..15db2697863c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2135,8 +2135,15 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
int tdp_max_root_level, int tdp_huge_page_level);
+
+/* Low bits of VM types provide confidential computing capabilities. */
+#define __KVM_X86_PRIVATE_MEM_TYPE 1
+#define __KVM_X86_PROTECTED_STATE_TYPE 2
+#define __KVM_X86_VM_TYPE_FEATURES 3
+static_assert((KVM_X86_SW_PROTECTED_VM & __KVM_X86_VM_TYPE_FEATURES) == __KVM_X86_PRIVATE_MEM_TYPE);
+
#ifdef CONFIG_KVM_PRIVATE_MEM
-#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.vm_type != KVM_X86_DEFAULT_VM)
+#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.vm_type & __KVM_X86_PRIVATE_MEM_TYPE)
#else
#define kvm_arch_has_private_mem(kvm) false
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8746530930d5..e634e5b67516 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5526,21 +5526,30 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
return 0;
}
-static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
- struct kvm_debugregs *dbgregs)
+static int kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
+ struct kvm_debugregs *dbgregs)
{
unsigned long val;
+ if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
memset(dbgregs, 0, sizeof(*dbgregs));
memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
kvm_get_dr(vcpu, 6, &val);
dbgregs->dr6 = val;
dbgregs->dr7 = vcpu->arch.dr7;
+ return 0;
}
static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
struct kvm_debugregs *dbgregs)
{
+ if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
if (dbgregs->flags)
return -EINVAL;
@@ -5559,9 +5568,13 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
}
-static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
- u8 *state, unsigned int size)
+static int kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
+ u8 *state, unsigned int size)
{
+ if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
+ fpstate_is_confidential(&vcpu->arch.guest_fpu))
+ return -EINVAL;
+
/*
* Only copy state for features that are enabled for the guest. The
* state itself isn't problematic, but setting bits in the header for
@@ -5578,22 +5591,27 @@ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
XFEATURE_MASK_FPSSE;
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
- return;
+ return 0;
fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
supported_xcr0, vcpu->arch.pkru);
+ return 0;
}
-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
- struct kvm_xsave *guest_xsave)
+static int kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *guest_xsave)
{
- kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
- sizeof(guest_xsave->region));
+ return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
+ sizeof(guest_xsave->region));
}
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
+ if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
+ fpstate_is_confidential(&vcpu->arch.guest_fpu))
+ return -EINVAL;
+
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return 0;
@@ -5603,18 +5621,23 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
&vcpu->arch.pkru);
}
-static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
- struct kvm_xcrs *guest_xcrs)
+static int kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
+ struct kvm_xcrs *guest_xcrs)
{
+ if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
guest_xcrs->nr_xcrs = 0;
- return;
+ return 0;
}
guest_xcrs->nr_xcrs = 1;
guest_xcrs->flags = 0;
guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
+ return 0;
}
static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
@@ -5622,6 +5645,10 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
{
int i, r = 0;
+ if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
if (!boot_cpu_has(X86_FEATURE_XSAVE))
return -EINVAL;
@@ -6010,7 +6037,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_GET_DEBUGREGS: {
struct kvm_debugregs dbgregs;
- kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
+ r = kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
+ if (r < 0)
+ break;
r = -EFAULT;
if (copy_to_user(argp, &dbgregs,
@@ -6040,7 +6069,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (!u.xsave)
break;
- kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
+ r = kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
+ if (r < 0)
+ break;
r = -EFAULT;
if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
@@ -6069,7 +6100,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (!u.xsave)
break;
- kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size);
+ r = kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size);
+ if (r < 0)
+ break;
r = -EFAULT;
if (copy_to_user(argp, u.xsave, size))
@@ -6085,7 +6118,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (!u.xcrs)
break;
- kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
+ r = kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
+ if (r < 0)
+ break;
r = -EFAULT;
if (copy_to_user(argp, u.xcrs,
@@ -6229,6 +6264,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_GET_SREGS2: {
+ r = -EINVAL;
+ if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
+ vcpu->arch.guest_state_protected)
+ goto out;
+
u.sregs2 = kzalloc(sizeof(struct kvm_sregs2), GFP_KERNEL);
r = -ENOMEM;
if (!u.sregs2)
@@ -6241,6 +6281,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_SET_SREGS2: {
+ r = -EINVAL;
+ if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
+ vcpu->arch.guest_state_protected)
+ goto out;
+
u.sregs2 = memdup_user(argp, sizeof(struct kvm_sregs2));
if (IS_ERR(u.sregs2)) {
r = PTR_ERR(u.sregs2);
@@ -11466,6 +11511,10 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
vcpu_load(vcpu);
__get_regs(vcpu, regs);
vcpu_put(vcpu);
@@ -11507,6 +11556,10 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
vcpu_load(vcpu);
__set_regs(vcpu, regs);
vcpu_put(vcpu);
@@ -11579,6 +11632,10 @@ static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
vcpu_load(vcpu);
__get_sregs(vcpu, sregs);
vcpu_put(vcpu);
@@ -11846,6 +11903,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
{
int ret;
+ if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
vcpu_load(vcpu);
ret = __set_sregs(vcpu, sregs);
vcpu_put(vcpu);
--
2.39.1
On Fri, Feb 23, 2024, Paolo Bonzini wrote:
> Some VM types have characteristics in common; in fact, the only use
> of VM types right now is kvm_arch_has_private_mem and it assumes that
> _all_ VM types have private memory.
>
> So, let the low bits specify the characteristics of the VM type. As
> of we have two special things: whether memory is private, and whether
> guest state is protected. The latter is similar to
> kvm->arch.guest_state_protected, but the latter is only set on a fully
> initialized VM. If both are set, ioctls to set registers will cause
> an error---SEV-ES did not do so, which is a problematic API.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Message-Id: <20240209183743.22030-7-pbonzini@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> arch/x86/include/asm/kvm_host.h | 9 +++-
> arch/x86/kvm/x86.c | 93 +++++++++++++++++++++++++++------
> 2 files changed, 85 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 0bcd9ae16097..15db2697863c 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -2135,8 +2135,15 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
> void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
> int tdp_max_root_level, int tdp_huge_page_level);
>
> +
> +/* Low bits of VM types provide confidential computing capabilities. */
> +#define __KVM_X86_PRIVATE_MEM_TYPE 1
> +#define __KVM_X86_PROTECTED_STATE_TYPE 2
> +#define __KVM_X86_VM_TYPE_FEATURES 3
> +static_assert((KVM_X86_SW_PROTECTED_VM & __KVM_X86_VM_TYPE_FEATURES) == __KVM_X86_PRIVATE_MEM_TYPE);
Aliasing bit 0 to KVM_X86_SW_PROTECTED_VM is gross, e.g. this
#define KVM_X86_DEFAULT_VM 0
#define KVM_X86_SW_PROTECTED_VM 1
+#define KVM_X86_SEV_VM 8
+#define KVM_X86_SEV_ES_VM 10
is _super_ confusing and bound to cause problems.
Oh, good gravy, you're also aliasing __KVM_X86_PROTECTED_STATE_TYPE into SEV_ES_VM.
Curse you and your Rami Code induced decimal-based bitwise shenanigans!!!
I don't see any reason to bleed the flags into KVM's ABI. Even shoving the flags
into kvm->arch.vm_type is unncessary. Aha! As is storing vm_type as an "unsigned
long", since (a) it can't ever be bigger than u32, and (b) in practice a u8 will
suffice since as Mike pointed out we're effectively limited to 31 types before
kvm_vm_ioctl_check_extension() starts returning error codes.
So I vote to skip the aliasing and simply do:
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ff23712f1f3f..27265ff5fd29 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1279,7 +1279,9 @@ enum kvm_apicv_inhibit {
};
struct kvm_arch {
- unsigned long vm_type;
+ u8 vm_type;
+ bool has_private_memory;
+ bool has_protected_state;
unsigned long n_used_mmu_pages;
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
and then just use straight incrementing values for types, i.e.
#define KVM_X86_DEFAULT_VM 0
#define KVM_X86_SW_PROTECTED_VM 1
#define KVM_X86_SEV_VM 2
#define KVM_X86_SEV_ES_VM 3
It'll require a bit of extra work in kvm_arch_init_vm(), but I think the end result
will be signifincatly easier to follow.
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8746530930d5..e634e5b67516 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5526,21 +5526,30 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
> return 0;
> }
>
> -static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
> - struct kvm_debugregs *dbgregs)
> +static int kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
> + struct kvm_debugregs *dbgregs)
> {
> unsigned long val;
>
> + if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
> + vcpu->arch.guest_state_protected)
> + return -EINVAL;
> +
> memset(dbgregs, 0, sizeof(*dbgregs));
> memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
> kvm_get_dr(vcpu, 6, &val);
> dbgregs->dr6 = val;
> dbgregs->dr7 = vcpu->arch.dr7;
> + return 0;
> }
> static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
> @@ -5622,6 +5645,10 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
> {
> int i, r = 0;
>
> + if ((vcpu->kvm->arch.vm_type & __KVM_X86_PROTECTED_STATE_TYPE) &&
> + vcpu->arch.guest_state_protected)
> + return -EINVAL;
> +
> if (!boot_cpu_has(X86_FEATURE_XSAVE))
> return -EINVAL;
>
> @@ -6010,7 +6037,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> case KVM_GET_DEBUGREGS: {
> struct kvm_debugregs dbgregs;
>
> - kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
> + r = kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
> + if (r < 0)
I would strongly prefer to just do
if (r)
as "r < 0" implies that postive return values are possible/allowed.
That said, rather than a mix of open coding checks in kvm_arch_vcpu_ioctl() versus
burying checks in helpers, what about adding a dedicated helper to take care of
everything in one shot? E.g.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3bc69b0c9822..f5ca78e75eec 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5864,6 +5864,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
}
}
+static bool kvm_ioctl_accesses_guest_state(unsigned int ioctl)
+{
+ switch (ioctl) {
+ case <...>:
+ return true;
+ default:
+ return false:
+ }
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -5878,6 +5888,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
void *buffer;
} u;
+ if (vcpu->kvm->arch.has_protected_state &&
+ vcpu->arch.guest_state_protected &&
+ kvm_ioctl_accesses_guest_state(ioctl))
+ return -EINVAL;
+
vcpu_load(vcpu);
u.buffer = NULL;
It'll be a much smaller diff, and hopefully easier to audit, too.
On Fri, Feb 23, 2024 at 5:46 PM Sean Christopherson <seanjc@google.com> wrote: > > On Fri, Feb 23, 2024, Paolo Bonzini wrote: > > Some VM types have characteristics in common; in fact, the only use > > of VM types right now is kvm_arch_has_private_mem and it assumes that > > _all_ VM types have private memory. > > > > So, let the low bits specify the characteristics of the VM type. As > > of we have two special things: whether memory is private, and whether > > guest state is protected. The latter is similar to > > kvm->arch.guest_state_protected, but the latter is only set on a fully > > initialized VM. If both are set, ioctls to set registers will cause > > an error---SEV-ES did not do so, which is a problematic API. > > > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > Message-Id: <20240209183743.22030-7-pbonzini@redhat.com> > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > --- > > arch/x86/include/asm/kvm_host.h | 9 +++- > > arch/x86/kvm/x86.c | 93 +++++++++++++++++++++++++++------ > > 2 files changed, 85 insertions(+), 17 deletions(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index 0bcd9ae16097..15db2697863c 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -2135,8 +2135,15 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); > > void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, > > int tdp_max_root_level, int tdp_huge_page_level); > > > > + > > +/* Low bits of VM types provide confidential computing capabilities. */ > > +#define __KVM_X86_PRIVATE_MEM_TYPE 1 > > +#define __KVM_X86_PROTECTED_STATE_TYPE 2 > > +#define __KVM_X86_VM_TYPE_FEATURES 3 > > +static_assert((KVM_X86_SW_PROTECTED_VM & __KVM_X86_VM_TYPE_FEATURES) == __KVM_X86_PRIVATE_MEM_TYPE); > > Aliasing bit 0 to KVM_X86_SW_PROTECTED_VM is gross, e.g. this > > #define KVM_X86_DEFAULT_VM 0 > #define KVM_X86_SW_PROTECTED_VM 1 > +#define KVM_X86_SEV_VM 8 > +#define KVM_X86_SEV_ES_VM 10 > > is _super_ confusing and bound to cause problems. > > Oh, good gravy, you're also aliasing __KVM_X86_PROTECTED_STATE_TYPE into SEV_ES_VM. > Curse you and your Rami Code induced decimal-based bitwise shenanigans!!! v1 was less gross but Mike talked me into this one. :) > I don't see any reason to bleed the flags into KVM's ABI. Even shoving the flags > into kvm->arch.vm_type is unncessary. Aha! As is storing vm_type as an "unsigned > long", since (a) it can't ever be bigger than u32, and (b) in practice a u8 will > suffice since as Mike pointed out we're effectively limited to 31 types before > kvm_vm_ioctl_check_extension() starts returning error codes. > > So I vote to skip the aliasing and simply do: Fair enough. Paolo
© 2016 - 2026 Red Hat, Inc.