[PATCH v14 01/22] KVM: x86: Introduce KVM_{G,S}ET_ONE_REG uAPIs support

Chao Gao posted 22 patches 3 weeks, 2 days ago
[PATCH v14 01/22] KVM: x86: Introduce KVM_{G,S}ET_ONE_REG uAPIs support
Posted by Chao Gao 3 weeks, 2 days ago
From: Yang Weijiang <weijiang.yang@intel.com>

Enable KVM_{G,S}ET_ONE_REG uAPIs so that userspace can access MSRs and
other non-MSR registers through them.

This is in preparation for allowing userspace to read/write the guest SSP
register, which is needed for the upcoming CET virtualization support.

Currently, two types of registers are supported: KVM_X86_REG_TYPE_MSR and
KVM_X86_REG_TYPE_KVM. All MSRs are in the former type; the latter type is
added for registers that lack existing KVM uAPIs to access them. The "KVM"
in the name is intended to be vague to give KVM flexibility to include
other potential registers. We considered some specific names, like
"SYNTHETIC" and "SYNTHETIC_MSR" before, but both are confusing and may put
KVM itself into a corner.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
Link: https://lore.kernel.org/all/20240219074733.122080-18-weijiang.yang@intel.com/ [1]
Tested-by: Mathias Krause <minipli@grsecurity.net>
Tested-by: John Allen <john.allen@amd.com>
Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Chao Gao <chao.gao@intel.com>
---
v14:
- Rename the group type of guest SSP register to KVM_X86_REG_KVM
- Add docs for id patterns for x86 in api.rst
- Update commit message
---
 Documentation/virt/kvm/api.rst  |  2 +
 arch/x86/include/uapi/asm/kvm.h | 26 +++++++++++
 arch/x86/kvm/x86.c              | 78 +++++++++++++++++++++++++++++++++
 3 files changed, 106 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 6aa40ee05a4a..28fc12b46eeb 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -2908,6 +2908,8 @@ such as set vcpu counter or reset vcpu, and they have the following id bit patte
 
   0x9030 0000 0002 <reg:16>
 
+x86 MSR registers have the following id bit patterns::
+  0x2030 0002 <msr number:32>
 
 4.69 KVM_GET_ONE_REG
 --------------------
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 0f15d683817d..508b713ca52e 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -411,6 +411,32 @@ struct kvm_xcrs {
 	__u64 padding[16];
 };
 
+#define KVM_X86_REG_TYPE_MSR		2
+#define KVM_X86_REG_TYPE_KVM		3
+
+#define KVM_X86_KVM_REG_SIZE(reg)						\
+({										\
+	reg == KVM_REG_GUEST_SSP ? KVM_REG_SIZE_U64 : 0;			\
+})
+
+#define KVM_X86_REG_TYPE_SIZE(type, reg)					\
+({										\
+	__u64 type_size = (__u64)type << 32;					\
+										\
+	type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 :		\
+		     type == KVM_X86_REG_TYPE_KVM ? KVM_X86_KVM_REG_SIZE(reg) :	\
+		     0;								\
+	type_size;								\
+})
+
+#define KVM_X86_REG_ENCODE(type, index)				\
+	(KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type, index) | index)
+
+#define KVM_X86_REG_MSR(index)					\
+	KVM_X86_REG_ENCODE(KVM_X86_REG_TYPE_MSR, index)
+#define KVM_X86_REG_KVM(index)					\
+	KVM_X86_REG_ENCODE(KVM_X86_REG_TYPE_KVM, index)
+
 #define KVM_SYNC_X86_REGS      (1UL << 0)
 #define KVM_SYNC_X86_SREGS     (1UL << 1)
 #define KVM_SYNC_X86_EVENTS    (1UL << 2)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7ba2cdfdac44..f32d3edfc7b1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2254,6 +2254,31 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
 	return kvm_set_msr_ignored_check(vcpu, index, *data, true);
 }
 
+static int kvm_get_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *value)
+{
+	u64 val;
+	int r;
+
+	r = do_get_msr(vcpu, msr, &val);
+	if (r)
+		return r;
+
+	if (put_user(val, value))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int kvm_set_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *value)
+{
+	u64 val;
+
+	if (get_user(val, value))
+		return -EFAULT;
+
+	return do_set_msr(vcpu, msr, &val);
+}
+
 #ifdef CONFIG_X86_64
 struct pvclock_clock {
 	int vclock_mode;
@@ -4737,6 +4762,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_IRQFD_RESAMPLE:
 	case KVM_CAP_MEMORY_FAULT_INFO:
 	case KVM_CAP_X86_GUEST_MODE:
+	case KVM_CAP_ONE_REG:
 		r = 1;
 		break;
 	case KVM_CAP_PRE_FAULT_MEMORY:
@@ -5915,6 +5941,20 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 	}
 }
 
+struct kvm_x86_reg_id {
+	__u32 index;
+	__u8  type;
+	__u8  rsvd1;
+	__u8  rsvd2:4;
+	__u8  size:4;
+	__u8  x86;
+};
+
+static int kvm_translate_kvm_reg(struct kvm_x86_reg_id *reg)
+{
+	return -EINVAL;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -6031,6 +6071,44 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		break;
 	}
+	case KVM_GET_ONE_REG:
+	case KVM_SET_ONE_REG: {
+		struct kvm_x86_reg_id *id;
+		struct kvm_one_reg reg;
+		u64 __user *value;
+
+		r = -EFAULT;
+		if (copy_from_user(&reg, argp, sizeof(reg)))
+			break;
+
+		r = -EINVAL;
+		if ((reg.id & KVM_REG_ARCH_MASK) != KVM_REG_X86)
+			break;
+
+		id = (struct kvm_x86_reg_id *)&reg.id;
+		if (id->rsvd1 || id->rsvd2)
+			break;
+
+		if (id->type == KVM_X86_REG_TYPE_KVM) {
+			r = kvm_translate_kvm_reg(id);
+			if (r)
+				break;
+		}
+
+		r = -EINVAL;
+		if (id->type != KVM_X86_REG_TYPE_MSR)
+			break;
+
+		if ((reg.id & KVM_REG_SIZE_MASK) != KVM_REG_SIZE_U64)
+			break;
+
+		value = u64_to_user_ptr(reg.addr);
+		if (ioctl == KVM_GET_ONE_REG)
+			r = kvm_get_one_msr(vcpu, id->index, value);
+		else
+			r = kvm_set_one_msr(vcpu, id->index, value);
+		break;
+	}
 	case KVM_TPR_ACCESS_REPORTING: {
 		struct kvm_tpr_access_ctl tac;
 
-- 
2.47.3
Re: [PATCH v14 01/22] KVM: x86: Introduce KVM_{G,S}ET_ONE_REG uAPIs support
Posted by Sean Christopherson 3 weeks, 1 day ago
On Tue, Sep 09, 2025, Chao Gao wrote:
> @@ -6031,6 +6071,44 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
>  		srcu_read_unlock(&vcpu->kvm->srcu, idx);
>  		break;
>  	}
> +	case KVM_GET_ONE_REG:
> +	case KVM_SET_ONE_REG: {
> +		struct kvm_x86_reg_id *id;
> +		struct kvm_one_reg reg;
> +		u64 __user *value;
> +
> +		r = -EFAULT;
> +		if (copy_from_user(&reg, argp, sizeof(reg)))
> +			break;
> +
> +		r = -EINVAL;
> +		if ((reg.id & KVM_REG_ARCH_MASK) != KVM_REG_X86)
> +			break;
> +
> +		id = (struct kvm_x86_reg_id *)&reg.id;
> +		if (id->rsvd1 || id->rsvd2)
> +			break;
> +
> +		if (id->type == KVM_X86_REG_TYPE_KVM) {
> +			r = kvm_translate_kvm_reg(id);
> +			if (r)
> +				break;
> +		}
> +
> +		r = -EINVAL;
> +		if (id->type != KVM_X86_REG_TYPE_MSR)
> +			break;
> +
> +		if ((reg.id & KVM_REG_SIZE_MASK) != KVM_REG_SIZE_U64)
> +			break;
> +

Almost forgot.  I think it makes sense to grab kvm->srcu here.  I'm not entirely
positive that's necessary these days, e.g. after commit 3617c0ee7dec ("KVM: x86/xen:
Only write Xen hypercall page for guest writes to MSR")but there are path, but
_proving_ that there are no memory or MSR/PMU filter accesses is practically
impossible given how much code is reachable via MSR emulation.

If someone wants to put in the effort to prove SRCU isn't needed, then we should
also drop SRCU protection from KVM_{G,S}ET_MSRS.

> +		value = u64_to_user_ptr(reg.addr);
> +		if (ioctl == KVM_GET_ONE_REG)
> +			r = kvm_get_one_msr(vcpu, id->index, value);
> +		else
> +			r = kvm_set_one_msr(vcpu, id->index, value);
> +		break;
> +	}
>  	case KVM_TPR_ACCESS_REPORTING: {
>  		struct kvm_tpr_access_ctl tac;
>  
> -- 
> 2.47.3
>
Re: [PATCH v14 01/22] KVM: x86: Introduce KVM_{G,S}ET_ONE_REG uAPIs support
Posted by Sean Christopherson 3 weeks, 1 day ago
On Tue, Sep 09, 2025, Chao Gao wrote:
> +static int kvm_set_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *value)
> +{
> +	u64 val;
> +
> +	if (get_user(val, value))
> +		return -EFAULT;
> +
> +	return do_set_msr(vcpu, msr, &val);

This needs to explicitly return -EINVAL on failure, otherwise KVM will return
semi-arbitrary positive values to userspace.

> +}
> +
>  #ifdef CONFIG_X86_64
>  struct pvclock_clock {
>  	int vclock_mode;
> @@ -4737,6 +4762,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_IRQFD_RESAMPLE:
>  	case KVM_CAP_MEMORY_FAULT_INFO:
>  	case KVM_CAP_X86_GUEST_MODE:
> +	case KVM_CAP_ONE_REG:

We should add (partial) support for KVM_GET_REG_LIST, otherwise the ABI for
handling GUEST_SSP is effectively undefined.  And because the ioctl is per-vCPU,
utilizing KVM_GET_REG_LIST gives us the opportunity to avoid the horrors we
created with KVM_GET_MSR_INDEX_LIST, where KVM enumerates MSRs that aren't fully
supported by the vCPU.

If we don't enumerate GUEST_SSP via KVM_GET_REG_LIST, then trying to do
KVM_{G,S}ET_ONE_REG will "unexpectedly" fail if the vCPU doesn't have SHSTK.
By enumerating GUEST_SSP in KVM_GET_REG_LIST _if and only if_ it's fully supported,
we'll have a much more explicit ABI than we do for MSRs.  And if we don't do that,
we'd have to special case MSR_KVM_INTERNAL_GUEST_SSP in kvm_is_advertised_msr().

As for MSRs, that's where "partial" support comes in.  For MSRs, I think the least
awful option is to keep using KVM_GET_MSR_INDEX_LIST for enumerating MSRs, and
document that any MSRs that can be accessed via KVM_{G,S}ET_MSRS can be accessed
via KVM_{G,S}ET_ONE_REG.  That avoids having to bake in different behavior for
MSR vs. ONE_REG accesses (and avoids having to add a pile of code to precisely
enumerate support for per-vCPU MSRs).

>  		r = 1;
>  		break;
>  	case KVM_CAP_PRE_FAULT_MEMORY:
> @@ -5915,6 +5941,20 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
>  	}
>  }
>  
> +struct kvm_x86_reg_id {
> +	__u32 index;
> +	__u8  type;
> +	__u8  rsvd1;
> +	__u8  rsvd2:4;
> +	__u8  size:4;
> +	__u8  x86;
> +};
> +
> +static int kvm_translate_kvm_reg(struct kvm_x86_reg_id *reg)
> +{
> +	return -EINVAL;
> +}
> +
>  long kvm_arch_vcpu_ioctl(struct file *filp,
>  			 unsigned int ioctl, unsigned long arg)
>  {
> @@ -6031,6 +6071,44 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
>  		srcu_read_unlock(&vcpu->kvm->srcu, idx);
>  		break;
>  	}
> +	case KVM_GET_ONE_REG:
> +	case KVM_SET_ONE_REG: {
> +		struct kvm_x86_reg_id *id;
> +		struct kvm_one_reg reg;
> +		u64 __user *value;
> +
> +		r = -EFAULT;
> +		if (copy_from_user(&reg, argp, sizeof(reg)))
> +			break;
> +
> +		r = -EINVAL;
> +		if ((reg.id & KVM_REG_ARCH_MASK) != KVM_REG_X86)
> +			break;
> +
> +		id = (struct kvm_x86_reg_id *)&reg.id;
> +		if (id->rsvd1 || id->rsvd2)
> +			break;
> +
> +		if (id->type == KVM_X86_REG_TYPE_KVM) {
> +			r = kvm_translate_kvm_reg(id);
> +			if (r)
> +				break;
> +		}
> +
> +		r = -EINVAL;
> +		if (id->type != KVM_X86_REG_TYPE_MSR)
> +			break;
> +
> +		if ((reg.id & KVM_REG_SIZE_MASK) != KVM_REG_SIZE_U64)
> +			break;
> +
> +		value = u64_to_user_ptr(reg.addr);
> +		if (ioctl == KVM_GET_ONE_REG)
> +			r = kvm_get_one_msr(vcpu, id->index, value);
> +		else
> +			r = kvm_set_one_msr(vcpu, id->index, value);
> +		break;
> +	}

I think it makes sense to put this in a separate helper, if only so that the
error returns are more obvious.


>  	case KVM_TPR_ACCESS_REPORTING: {
>  		struct kvm_tpr_access_ctl tac;
>  
> -- 
> 2.47.3
>
Re: [PATCH v14 01/22] KVM: x86: Introduce KVM_{G,S}ET_ONE_REG uAPIs support
Posted by Xiaoyao Li 3 weeks, 1 day ago
On 9/9/2025 5:39 PM, Chao Gao wrote:
> From: Yang Weijiang <weijiang.yang@intel.com>
> 
> Enable KVM_{G,S}ET_ONE_REG uAPIs so that userspace can access MSRs and
> other non-MSR registers through them.
> 
> This is in preparation for allowing userspace to read/write the guest SSP
> register, which is needed for the upcoming CET virtualization support.
> 
> Currently, two types of registers are supported: KVM_X86_REG_TYPE_MSR and
> KVM_X86_REG_TYPE_KVM. All MSRs are in the former type; the latter type is
> added for registers that lack existing KVM uAPIs to access them. The "KVM"
> in the name is intended to be vague to give KVM flexibility to include
> other potential registers. We considered some specific names, like
> "SYNTHETIC" and "SYNTHETIC_MSR" before, but both are confusing and may put
> KVM itself into a corner.
> 
> Suggested-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> Link: https://lore.kernel.org/all/20240219074733.122080-18-weijiang.yang@intel.com/ [1]
> Tested-by: Mathias Krause <minipli@grsecurity.net>
> Tested-by: John Allen <john.allen@amd.com>
> Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Chao Gao <chao.gao@intel.com>

Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>