[RFC PATCH 1/7] KVM: SVM: Implement GET_AP_APIC_IDS NAE event

Tom Lendacky posted 7 patches 1 year, 3 months ago
[RFC PATCH 1/7] KVM: SVM: Implement GET_AP_APIC_IDS NAE event
Posted by Tom Lendacky 1 year, 3 months ago
Implement the GET_APIC_IDS NAE event to gather and return the list of
APIC IDs for all vCPUs in the guest.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/include/asm/sev-common.h |  1 +
 arch/x86/include/uapi/asm/svm.h   |  1 +
 arch/x86/kvm/svm/sev.c            | 84 ++++++++++++++++++++++++++++++-
 3 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 98726c2b04f8..d63c861ef91f 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -136,6 +136,7 @@ enum psc_op {
 
 #define GHCB_HV_FT_SNP			BIT_ULL(0)
 #define GHCB_HV_FT_SNP_AP_CREATION	BIT_ULL(1)
+#define GHCB_HV_FT_APIC_ID_LIST		BIT_ULL(4)
 #define GHCB_HV_FT_SNP_MULTI_VMPL	BIT_ULL(5)
 
 /*
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 1814b413fd57..f8fa3c4c0322 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -115,6 +115,7 @@
 #define SVM_VMGEXIT_AP_CREATE_ON_INIT		0
 #define SVM_VMGEXIT_AP_CREATE			1
 #define SVM_VMGEXIT_AP_DESTROY			2
+#define SVM_VMGEXIT_GET_APIC_IDS		0x80000017
 #define SVM_VMGEXIT_SNP_RUN_VMPL		0x80000018
 #define SVM_VMGEXIT_HV_FEATURES			0x8000fffd
 #define SVM_VMGEXIT_TERM_REQUEST		0x8000fffe
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 532df12b43c5..199bdc7c7db1 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -39,7 +39,9 @@
 #define GHCB_VERSION_DEFAULT	2ULL
 #define GHCB_VERSION_MIN	1ULL
 
-#define GHCB_HV_FT_SUPPORTED	(GHCB_HV_FT_SNP | GHCB_HV_FT_SNP_AP_CREATION)
+#define GHCB_HV_FT_SUPPORTED	(GHCB_HV_FT_SNP			| \
+				 GHCB_HV_FT_SNP_AP_CREATION	| \
+				 GHCB_HV_FT_APIC_ID_LIST)
 
 /* enable/disable SEV support */
 static bool sev_enabled = true;
@@ -3390,6 +3392,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 			if (!kvm_ghcb_rax_is_valid(svm))
 				goto vmgexit_err;
 		break;
+	case SVM_VMGEXIT_GET_APIC_IDS:
+		if (!kvm_ghcb_rax_is_valid(svm))
+			goto vmgexit_err;
+		break;
 	case SVM_VMGEXIT_NMI_COMPLETE:
 	case SVM_VMGEXIT_AP_HLT_LOOP:
 	case SVM_VMGEXIT_AP_JUMP_TABLE:
@@ -4124,6 +4130,77 @@ static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t r
 	return 1; /* resume guest */
 }
 
+struct sev_apic_id_desc {
+	u32	num_entries;
+	u32	apic_ids[];
+};
+
+static void sev_get_apic_ids(struct vcpu_svm *svm)
+{
+	struct ghcb *ghcb = svm->sev_es.ghcb;
+	struct kvm_vcpu *vcpu = &svm->vcpu, *loop_vcpu;
+	struct kvm *kvm = vcpu->kvm;
+	unsigned int id_desc_size;
+	struct sev_apic_id_desc *desc;
+	kvm_pfn_t pfn;
+	gpa_t gpa;
+	u64 pages;
+	unsigned long i;
+	int n;
+
+	pages = vcpu->arch.regs[VCPU_REGS_RAX];
+
+	/* Each APIC ID is 32-bits in size, so make sure there is room */
+	n = atomic_read(&kvm->online_vcpus);
+	/*TODO: is this possible? */
+	if (n < 0)
+		return;
+
+	id_desc_size = sizeof(*desc);
+	id_desc_size += n * sizeof(desc->apic_ids[0]);
+	if (id_desc_size > (pages * PAGE_SIZE)) {
+		vcpu->arch.regs[VCPU_REGS_RAX] = PFN_UP(id_desc_size);
+		return;
+	}
+
+	gpa = svm->vmcb->control.exit_info_1;
+
+	ghcb_set_sw_exit_info_1(ghcb, 2);
+	ghcb_set_sw_exit_info_2(ghcb, 5);
+
+	if (!page_address_valid(vcpu, gpa))
+		return;
+
+	pfn = gfn_to_pfn(kvm, gpa_to_gfn(gpa));
+	if (is_error_noslot_pfn(pfn))
+		return;
+
+	if (!pages)
+		return;
+
+	/* Allocate a buffer to hold the APIC IDs */
+	desc = kvzalloc(id_desc_size, GFP_KERNEL_ACCOUNT);
+	if (!desc)
+		return;
+
+	desc->num_entries = n;
+	kvm_for_each_vcpu(i, loop_vcpu, kvm) {
+		/*TODO: is this possible? */
+		if (i > n)
+			break;
+
+		desc->apic_ids[i] = loop_vcpu->vcpu_id;
+	}
+
+	if (!kvm_write_guest(kvm, gpa, desc, id_desc_size)) {
+		/* IDs were successfully written */
+		ghcb_set_sw_exit_info_1(ghcb, 0);
+		ghcb_set_sw_exit_info_2(ghcb, 0);
+	}
+
+	kvfree(desc);
+}
+
 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -4404,6 +4481,11 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 	case SVM_VMGEXIT_EXT_GUEST_REQUEST:
 		ret = snp_handle_ext_guest_req(svm, control->exit_info_1, control->exit_info_2);
 		break;
+	case SVM_VMGEXIT_GET_APIC_IDS:
+		sev_get_apic_ids(svm);
+
+		ret = 1;
+		break;
 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
 		vcpu_unimpl(vcpu,
 			    "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
-- 
2.43.2
Re: [RFC PATCH 1/7] KVM: SVM: Implement GET_AP_APIC_IDS NAE event
Posted by Borislav Petkov 1 year ago
On Tue, Aug 27, 2024 at 04:59:25PM -0500, Tom Lendacky wrote:
> @@ -4124,6 +4130,77 @@ static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t r
>  	return 1; /* resume guest */
>  }
>  
> +struct sev_apic_id_desc {
> +	u32	num_entries;

"count" - like in the spec. :-P

> +	u32	apic_ids[];
> +};
> +
> +static void sev_get_apic_ids(struct vcpu_svm *svm)
> +{
> +	struct ghcb *ghcb = svm->sev_es.ghcb;
> +	struct kvm_vcpu *vcpu = &svm->vcpu, *loop_vcpu;
> +	struct kvm *kvm = vcpu->kvm;
> +	unsigned int id_desc_size;
> +	struct sev_apic_id_desc *desc;
> +	kvm_pfn_t pfn;
> +	gpa_t gpa;
> +	u64 pages;
> +	unsigned long i;
> +	int n;
> +
> +	pages = vcpu->arch.regs[VCPU_REGS_RAX];

Probably should be "num_pages" and a comment should explain what it is:

"State to Hypervisor: is the
number of guest contiguous pages
provided to hold the list of APIC
IDs"

Makes it much easier to follow the code.

> +	/* Each APIC ID is 32-bits in size, so make sure there is room */
> +	n = atomic_read(&kvm->online_vcpus);
> +	/*TODO: is this possible? */
> +	if (n < 0)
> +		return;

It doesn't look like it but if you wanna be real paranoid you can slap
a WARN_ONCE() here or so to scream loudly.

> +	id_desc_size = sizeof(*desc);
> +	id_desc_size += n * sizeof(desc->apic_ids[0]);
> +	if (id_desc_size > (pages * PAGE_SIZE)) {
> +		vcpu->arch.regs[VCPU_REGS_RAX] = PFN_UP(id_desc_size);
> +		return;
> +	}
> +
> +	gpa = svm->vmcb->control.exit_info_1;
> +
> +	ghcb_set_sw_exit_info_1(ghcb, 2);
> +	ghcb_set_sw_exit_info_2(ghcb, 5);

Uuh, more magic numbers. I guess we need this:

https://lore.kernel.org/r/20241113204425.889854-1-huibo.wang@amd.com

and more.

And can we write those only once at the end of the function?

> +	if (!page_address_valid(vcpu, gpa))
> +		return;
> +
> +	pfn = gfn_to_pfn(kvm, gpa_to_gfn(gpa));

Looking at the tree, that gfn_to_pfn() thing is gone now and we're supposed to
it this way.  Not tested ofc:

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 5af227ba15a3..47e1f72a574d 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -4134,7 +4134,7 @@ static void sev_get_apic_ids(struct vcpu_svm *svm)
 	struct kvm *kvm = vcpu->kvm;
 	unsigned int id_desc_size;
 	struct sev_apic_id_desc *desc;
-	kvm_pfn_t pfn;
+	struct page *page;
 	gpa_t gpa;
 	u64 pages;
 	unsigned long i;
@@ -4163,8 +4163,8 @@ static void sev_get_apic_ids(struct vcpu_svm *svm)
 	if (!page_address_valid(vcpu, gpa))
 		return;
 
-	pfn = gfn_to_pfn(kvm, gpa_to_gfn(gpa));
-	if (is_error_noslot_pfn(pfn))
+	page = gfn_to_page(kvm, gpa_to_gfn(gpa));
+	if (!page)
 		return;
 
 	if (!pages)

> +	if (is_error_noslot_pfn(pfn))
> +		return;
> +
> +	if (!pages)
> +		return;

That test needs to go right under the assignment of "pages".

> +	/* Allocate a buffer to hold the APIC IDs */
> +	desc = kvzalloc(id_desc_size, GFP_KERNEL_ACCOUNT);
> +	if (!desc)
> +		return;
> +
> +	desc->num_entries = n;
> +	kvm_for_each_vcpu(i, loop_vcpu, kvm) {
> +		/*TODO: is this possible? */

Well:

#define kvm_for_each_vcpu(idx, vcpup, kvm)                 \
        xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \
                          (atomic_read(&kvm->online_vcpus) - 1))
			   ^^^^^^^^^^^^^^

but, what's stopping kvm_vm_ioctl_create_vcpu() from incrementing it?

I'm guessing this would happen when you start the guest only but I haz no
idea.

> +		if (i > n)
> +			break;
> +
> +		desc->apic_ids[i] = loop_vcpu->vcpu_id;
> +	}
> +
> +	if (!kvm_write_guest(kvm, gpa, desc, id_desc_size)) {
> +		/* IDs were successfully written */
> +		ghcb_set_sw_exit_info_1(ghcb, 0);
> +		ghcb_set_sw_exit_info_2(ghcb, 0);
> +	}
> +
> +	kvfree(desc);
> +}


-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette