[v1] KVM: iommu: Overhaul device posted IRQs support

[PATCH 33/67] KVM: x86: Dedup AVIC vs. PI code for identifying target vCPU

Posted by Sean Christopherson 10 months, 1 week ago

Hoist the logic for identifying the target vCPU for a posted interrupt
into common x86.  The code is functionally identical between Intel and
AMD.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +-
 arch/x86/kvm/svm/avic.c         | 83 ++++++++-------------------------
 arch/x86/kvm/svm/svm.h          |  3 +-
 arch/x86/kvm/vmx/posted_intr.c  | 56 ++++++----------------
 arch/x86/kvm/vmx/posted_intr.h  |  3 +-
 arch/x86/kvm/x86.c              | 46 +++++++++++++++---
 6 files changed, 81 insertions(+), 113 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 85f45fc5156d..cb98d8d3c6c2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1838,7 +1838,8 @@ struct kvm_x86_ops {
 
 	int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 			      unsigned int host_irq, uint32_t guest_irq,
-			      struct kvm_kernel_irq_routing_entry *new);
+			      struct kvm_kernel_irq_routing_entry *new,
+			      struct kvm_vcpu *vcpu, u32 vector);
 	void (*pi_start_assignment)(struct kvm *kvm);
 	void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
 	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index ea6eae72b941..666f518340a7 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -812,52 +812,13 @@ static int svm_ir_list_add(struct vcpu_svm *svm,
 	return 0;
 }
 
-/*
- * Note:
- * The HW cannot support posting multicast/broadcast
- * interrupts to a vCPU. So, we still use legacy interrupt
- * remapping for these kind of interrupts.
- *
- * For lowest-priority interrupts, we only support
- * those with single CPU as the destination, e.g. user
- * configures the interrupts via /proc/irq or uses
- * irqbalance to make the interrupts single-CPU.
- */
-static int
-get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
-		 struct vcpu_data *vcpu_info, struct kvm_vcpu **vcpu)
-{
-	struct kvm_lapic_irq irq;
-	*vcpu = NULL;
-
-	kvm_set_msi_irq(kvm, e, &irq);
-
-	if (!kvm_intr_is_single_vcpu(kvm, &irq, vcpu) ||
-	    !kvm_irq_is_postable(&irq)) {
-		pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
-			 __func__, irq.vector);
-		return -1;
-	}
-
-	pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
-		 irq.vector);
-	vcpu_info->vector = irq.vector;
-
-	return 0;
-}
-
 int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 			unsigned int host_irq, uint32_t guest_irq,
-			struct kvm_kernel_irq_routing_entry *new)
+			struct kvm_kernel_irq_routing_entry *new,
+			struct kvm_vcpu *vcpu, u32 vector)
 {
-	bool enable_remapped_mode = true;
-	struct vcpu_data vcpu_info;
-	struct kvm_vcpu *vcpu = NULL;
 	int ret = 0;
 
-	if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass())
-		return 0;
-
 	/*
 	 * If the IRQ was affined to a different vCPU, remove the IRTE metadata
 	 * from the *previous* vCPU's list.
@@ -865,7 +826,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 	svm_ir_list_del(irqfd);
 
 	pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
-		 __func__, host_irq, guest_irq, !!new);
+		 __func__, host_irq, guest_irq, !!vcpu);
 
 	/**
 	 * Here, we setup with legacy mode in the following cases:
@@ -874,23 +835,23 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 	 * 3. APIC virtualization is disabled for the vcpu.
 	 * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
 	 */
-	if (new && new && new->type == KVM_IRQ_ROUTING_MSI &&
-	    !get_pi_vcpu_info(kvm, new, &vcpu_info, &vcpu) &&
-	    kvm_vcpu_apicv_active(vcpu)) {
-		struct amd_iommu_pi_data pi;
-
-		enable_remapped_mode = false;
-
-		vcpu_info.pi_desc_addr = avic_get_backing_page_address(to_svm(vcpu));
-
+	if (vcpu && kvm_vcpu_apicv_active(vcpu)) {
 		/*
 		 * Try to enable guest_mode in IRTE.  Note, the address
 		 * of the vCPU's AVIC backing page is passed to the
 		 * IOMMU via vcpu_info->pi_desc_addr.
 		 */
-		pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, vcpu->vcpu_id);
-		pi.is_guest_mode = true;
-		pi.vcpu_data = &vcpu_info;
+		struct vcpu_data vcpu_info = {
+			.pi_desc_addr = avic_get_backing_page_address(to_svm(vcpu)),
+			.vector = vector,
+		};
+
+		struct amd_iommu_pi_data pi = {
+			.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, vcpu->vcpu_id),
+			.is_guest_mode = true,
+			.vcpu_data = &vcpu_info,
+		};
+
 		ret = irq_set_vcpu_affinity(host_irq, &pi);
 
 		/**
@@ -902,12 +863,11 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 		 */
 		if (!ret)
 			ret = svm_ir_list_add(to_svm(vcpu), irqfd, &pi);
-	}
 
-	if (!ret && vcpu) {
-		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id,
-					 guest_irq, vcpu_info.vector,
-					 vcpu_info.pi_desc_addr, !!new);
+		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
+					 vector, vcpu_info.pi_desc_addr, true);
+	} else {
+		ret = irq_set_vcpu_affinity(host_irq, NULL);
 	}
 
 	if (ret < 0) {
@@ -915,10 +875,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 		goto out;
 	}
 
-	if (enable_remapped_mode)
-		ret = irq_set_vcpu_affinity(host_irq, NULL);
-	else
-		ret = 0;
+	ret = 0;
 out:
 	return ret;
 }
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 6ad0aa86f78d..5ce240085ee0 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -741,7 +741,8 @@ void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
 void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
 int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 			unsigned int host_irq, uint32_t guest_irq,
-			struct kvm_kernel_irq_routing_entry *new);
+			struct kvm_kernel_irq_routing_entry *new,
+			struct kvm_vcpu *vcpu, u32 vector);
 void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
 void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
 void avic_ring_doorbell(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index 786912cee3f8..fd5f6a125614 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -266,46 +266,20 @@ void vmx_pi_start_assignment(struct kvm *kvm)
 
 int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 		       unsigned int host_irq, uint32_t guest_irq,
-		       struct kvm_kernel_irq_routing_entry *new)
+		       struct kvm_kernel_irq_routing_entry *new,
+		       struct kvm_vcpu *vcpu, u32 vector)
 {
-	struct kvm_lapic_irq irq;
-	struct kvm_vcpu *vcpu;
-	struct vcpu_data vcpu_info;
-
-	if (!vmx_can_use_vtd_pi(kvm))
-		return 0;
-
-	/*
-	 * VT-d PI cannot support posting multicast/broadcast
-	 * interrupts to a vCPU, we still use interrupt remapping
-	 * for these kind of interrupts.
-	 *
-	 * For lowest-priority interrupts, we only support
-	 * those with single CPU as the destination, e.g. user
-	 * configures the interrupts via /proc/irq or uses
-	 * irqbalance to make the interrupts single-CPU.
-	 *
-	 * We will support full lowest-priority interrupt later.
-	 *
-	 * In addition, we can only inject generic interrupts using
-	 * the PI mechanism, refuse to route others through it.
-	 */
-	if (!new || new->type != KVM_IRQ_ROUTING_MSI)
-		goto do_remapping;
-
-	kvm_set_msi_irq(kvm, new, &irq);
-
-	if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
-	    !kvm_irq_is_postable(&irq))
-		goto do_remapping;
-
-	vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
-	vcpu_info.vector = irq.vector;
-
-	trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
-				 vcpu_info.vector, vcpu_info.pi_desc_addr, true);
-
-	return irq_set_vcpu_affinity(host_irq, &vcpu_info);
-do_remapping:
-	return irq_set_vcpu_affinity(host_irq, NULL);
+	if (vcpu) {
+		struct vcpu_data vcpu_info = {
+			.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)),
+			.vector = vector,
+		};
+
+		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
+					 vcpu_info.vector, vcpu_info.pi_desc_addr, true);
+
+		return irq_set_vcpu_affinity(host_irq, &vcpu_info);
+	} else {
+		return irq_set_vcpu_affinity(host_irq, NULL);
+	}
 }
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
index a586d6aaf862..ee3e19e976ac 100644
--- a/arch/x86/kvm/vmx/posted_intr.h
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -15,7 +15,8 @@ void __init pi_init_cpu(int cpu);
 bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
 int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 		       unsigned int host_irq, uint32_t guest_irq,
-		       struct kvm_kernel_irq_routing_entry *new);
+		       struct kvm_kernel_irq_routing_entry *new,
+		       struct kvm_vcpu *vcpu, u32 vector);
 void vmx_pi_start_assignment(struct kvm *kvm);
 
 static inline int pi_find_highest_vector(struct pi_desc *pi_desc)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b8b259847d05..0ab818bba743 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13567,6 +13567,43 @@ bool kvm_arch_has_irq_bypass(void)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_irq_bypass);
 
+static int kvm_pi_update_irte(struct kvm_kernel_irqfd *irqfd,
+			      struct kvm_kernel_irq_routing_entry *old,
+			      struct kvm_kernel_irq_routing_entry *new)
+{
+	struct kvm *kvm = irqfd->kvm;
+	struct kvm_vcpu *vcpu = NULL;
+	struct kvm_lapic_irq irq;
+
+	if (!irqchip_in_kernel(kvm) ||
+	    !kvm_arch_has_irq_bypass() ||
+	    !kvm_arch_has_assigned_device(kvm))
+		return 0;
+
+	if (new && new->type == KVM_IRQ_ROUTING_MSI) {
+		kvm_set_msi_irq(kvm, new, &irq);
+
+		/*
+		 * Force remapped mode if hardware doesn't support posting the
+		 * virtual interrupt to a vCPU.  Only IRQs are postable (NMIs,
+		 * SMIs, etc. are not), and neither AMD nor Intel IOMMUs support
+		 * posting multicast/broadcast IRQs.  If the interrupt can't be
+		 * posted, the device MSI needs to be routed to the host so that
+		 * the guest's desired interrupt can be synthesized by KVM.
+		 *
+		 * This means that KVM can only post lowest-priority interrupts
+		 * if they have a single CPU as the destination, e.g. only if
+		 * the guest has affined the interrupt to a single vCPU.
+		 */
+		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
+		    !kvm_irq_is_postable(&irq))
+			vcpu = NULL;
+	}
+
+	return kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, irqfd->producer->irq,
+					    irqfd->gsi, new, vcpu, irq.vector);
+}
+
 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
 				      struct irq_bypass_producer *prod)
 {
@@ -13581,8 +13618,7 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
 	irqfd->producer = prod;
 
 	if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
-		ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
-						   irqfd->gsi, &irqfd->irq_entry);
+		ret = kvm_pi_update_irte(irqfd, NULL, &irqfd->irq_entry);
 		if (ret)
 			kvm_arch_end_assignment(irqfd->kvm);
 	}
@@ -13610,8 +13646,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
 	spin_lock_irq(&kvm->irqfds.lock);
 
 	if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
-		ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
-						   irqfd->gsi, NULL);
+		ret = kvm_pi_update_irte(irqfd, &irqfd->irq_entry, NULL);
 		if (ret)
 			pr_info("irq bypass consumer (token %p) unregistration fails: %d\n",
 				irqfd->consumer.token, ret);
@@ -13628,8 +13663,7 @@ int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
 				  struct kvm_kernel_irq_routing_entry *old,
 				  struct kvm_kernel_irq_routing_entry *new)
 {
-	return kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, irqfd->producer->irq,
-					    irqfd->gsi, new);
+	return kvm_pi_update_irte(irqfd, old, new);
 }
 
 bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
-- 
2.49.0.504.g3bcea36a83-goog

Re: [PATCH 33/67] KVM: x86: Dedup AVIC vs. PI code for identifying target vCPU

Posted by Sairaj Kodilkar 9 months, 3 weeks ago

On 4/5/2025 1:08 AM, Sean Christopherson wrote:
> Hoist the logic for identifying the target vCPU for a posted interrupt
> into common x86.  The code is functionally identical between Intel and
> AMD.
> 
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>   arch/x86/include/asm/kvm_host.h |  3 +-
>   arch/x86/kvm/svm/avic.c         | 83 ++++++++-------------------------
>   arch/x86/kvm/svm/svm.h          |  3 +-
>   arch/x86/kvm/vmx/posted_intr.c  | 56 ++++++----------------
>   arch/x86/kvm/vmx/posted_intr.h  |  3 +-
>   arch/x86/kvm/x86.c              | 46 +++++++++++++++---
>   6 files changed, 81 insertions(+), 113 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 85f45fc5156d..cb98d8d3c6c2 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1838,7 +1838,8 @@ struct kvm_x86_ops {
>   
>   	int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   			      unsigned int host_irq, uint32_t guest_irq,
> -			      struct kvm_kernel_irq_routing_entry *new);
> +			      struct kvm_kernel_irq_routing_entry *new,
> +			      struct kvm_vcpu *vcpu, u32 vector);
>   	void (*pi_start_assignment)(struct kvm *kvm);
>   	void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
>   	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index ea6eae72b941..666f518340a7 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -812,52 +812,13 @@ static int svm_ir_list_add(struct vcpu_svm *svm,
>   	return 0;
>   }
>   
> -/*
> - * Note:
> - * The HW cannot support posting multicast/broadcast
> - * interrupts to a vCPU. So, we still use legacy interrupt
> - * remapping for these kind of interrupts.
> - *
> - * For lowest-priority interrupts, we only support
> - * those with single CPU as the destination, e.g. user
> - * configures the interrupts via /proc/irq or uses
> - * irqbalance to make the interrupts single-CPU.
> - */
> -static int
> -get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
> -		 struct vcpu_data *vcpu_info, struct kvm_vcpu **vcpu)
> -{
> -	struct kvm_lapic_irq irq;
> -	*vcpu = NULL;
> -
> -	kvm_set_msi_irq(kvm, e, &irq);
> -
> -	if (!kvm_intr_is_single_vcpu(kvm, &irq, vcpu) ||
> -	    !kvm_irq_is_postable(&irq)) {
> -		pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
> -			 __func__, irq.vector);
> -		return -1;
> -	}
> -
> -	pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
> -		 irq.vector);
> -	vcpu_info->vector = irq.vector;
> -
> -	return 0;
> -}
> -
>   int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   			unsigned int host_irq, uint32_t guest_irq,
> -			struct kvm_kernel_irq_routing_entry *new)
> +			struct kvm_kernel_irq_routing_entry *new,
> +			struct kvm_vcpu *vcpu, u32 vector)
>   {
> -	bool enable_remapped_mode = true;
> -	struct vcpu_data vcpu_info;
> -	struct kvm_vcpu *vcpu = NULL;
>   	int ret = 0;
>   
> -	if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass())
> -		return 0;
> -
>   	/*
>   	 * If the IRQ was affined to a different vCPU, remove the IRTE metadata
>   	 * from the *previous* vCPU's list.
> @@ -865,7 +826,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   	svm_ir_list_del(irqfd);
>   
>   	pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
> -		 __func__, host_irq, guest_irq, !!new);
> +		 __func__, host_irq, guest_irq, !!vcpu);
>   
>   	/**
>   	 * Here, we setup with legacy mode in the following cases:
> @@ -874,23 +835,23 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   	 * 3. APIC virtualization is disabled for the vcpu.
>   	 * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
>   	 */
> -	if (new && new && new->type == KVM_IRQ_ROUTING_MSI &&
> -	    !get_pi_vcpu_info(kvm, new, &vcpu_info, &vcpu) &&
> -	    kvm_vcpu_apicv_active(vcpu)) {
> -		struct amd_iommu_pi_data pi;
> -
> -		enable_remapped_mode = false;
> -
> -		vcpu_info.pi_desc_addr = avic_get_backing_page_address(to_svm(vcpu));
> -
> +	if (vcpu && kvm_vcpu_apicv_active(vcpu)) {
>   		/*
>   		 * Try to enable guest_mode in IRTE.  Note, the address
>   		 * of the vCPU's AVIC backing page is passed to the
>   		 * IOMMU via vcpu_info->pi_desc_addr.
>   		 */
> -		pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, vcpu->vcpu_id);
> -		pi.is_guest_mode = true;
> -		pi.vcpu_data = &vcpu_info;
> +		struct vcpu_data vcpu_info = {
> +			.pi_desc_addr = avic_get_backing_page_address(to_svm(vcpu)),
> +			.vector = vector,
> +		};
> +
> +		struct amd_iommu_pi_data pi = {
> +			.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, vcpu->vcpu_id),
> +			.is_guest_mode = true,
> +			.vcpu_data = &vcpu_info,
> +		};
> +
>   		ret = irq_set_vcpu_affinity(host_irq, &pi);
>   
>   		/**
> @@ -902,12 +863,11 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   		 */
>   		if (!ret)
>   			ret = svm_ir_list_add(to_svm(vcpu), irqfd, &pi);
> -	}
>   
> -	if (!ret && vcpu) {
> -		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id,
> -					 guest_irq, vcpu_info.vector,
> -					 vcpu_info.pi_desc_addr, !!new);
> +		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
> +					 vector, vcpu_info.pi_desc_addr, true);
> +	} else {
> +		ret = irq_set_vcpu_affinity(host_irq, NULL);
>   	}
>   
>   	if (ret < 0) {
> @@ -915,10 +875,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   		goto out;
>   	}
>   
> -	if (enable_remapped_mode)
> -		ret = irq_set_vcpu_affinity(host_irq, NULL);
> -	else
> -		ret = 0;
> +	ret = 0;
>   out:
>   	return ret;
>   }
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 6ad0aa86f78d..5ce240085ee0 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -741,7 +741,8 @@ void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
>   void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
>   int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   			unsigned int host_irq, uint32_t guest_irq,
> -			struct kvm_kernel_irq_routing_entry *new);
> +			struct kvm_kernel_irq_routing_entry *new,
> +			struct kvm_vcpu *vcpu, u32 vector);
>   void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
>   void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
>   void avic_ring_doorbell(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
> index 786912cee3f8..fd5f6a125614 100644
> --- a/arch/x86/kvm/vmx/posted_intr.c
> +++ b/arch/x86/kvm/vmx/posted_intr.c
> @@ -266,46 +266,20 @@ void vmx_pi_start_assignment(struct kvm *kvm)
>   
>   int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   		       unsigned int host_irq, uint32_t guest_irq,
> -		       struct kvm_kernel_irq_routing_entry *new)
> +		       struct kvm_kernel_irq_routing_entry *new,
> +		       struct kvm_vcpu *vcpu, u32 vector)
>   {
> -	struct kvm_lapic_irq irq;
> -	struct kvm_vcpu *vcpu;
> -	struct vcpu_data vcpu_info;
> -
> -	if (!vmx_can_use_vtd_pi(kvm))
> -		return 0;
> -
> -	/*
> -	 * VT-d PI cannot support posting multicast/broadcast
> -	 * interrupts to a vCPU, we still use interrupt remapping
> -	 * for these kind of interrupts.
> -	 *
> -	 * For lowest-priority interrupts, we only support
> -	 * those with single CPU as the destination, e.g. user
> -	 * configures the interrupts via /proc/irq or uses
> -	 * irqbalance to make the interrupts single-CPU.
> -	 *
> -	 * We will support full lowest-priority interrupt later.
> -	 *
> -	 * In addition, we can only inject generic interrupts using
> -	 * the PI mechanism, refuse to route others through it.
> -	 */
> -	if (!new || new->type != KVM_IRQ_ROUTING_MSI)
> -		goto do_remapping;
> -
> -	kvm_set_msi_irq(kvm, new, &irq);
> -
> -	if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
> -	    !kvm_irq_is_postable(&irq))
> -		goto do_remapping;
> -
> -	vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
> -	vcpu_info.vector = irq.vector;
> -
> -	trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
> -				 vcpu_info.vector, vcpu_info.pi_desc_addr, true);
> -
> -	return irq_set_vcpu_affinity(host_irq, &vcpu_info);
> -do_remapping:
> -	return irq_set_vcpu_affinity(host_irq, NULL);
> +	if (vcpu) {
> +		struct vcpu_data vcpu_info = {
> +			.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)),
> +			.vector = vector,
> +		};
> +
> +		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
> +					 vcpu_info.vector, vcpu_info.pi_desc_addr, true);
> +
> +		return irq_set_vcpu_affinity(host_irq, &vcpu_info);
> +	} else {
> +		return irq_set_vcpu_affinity(host_irq, NULL);
> +	}
>   }
> diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
> index a586d6aaf862..ee3e19e976ac 100644
> --- a/arch/x86/kvm/vmx/posted_intr.h
> +++ b/arch/x86/kvm/vmx/posted_intr.h
> @@ -15,7 +15,8 @@ void __init pi_init_cpu(int cpu);
>   bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
>   int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   		       unsigned int host_irq, uint32_t guest_irq,
> -		       struct kvm_kernel_irq_routing_entry *new);
> +		       struct kvm_kernel_irq_routing_entry *new,
> +		       struct kvm_vcpu *vcpu, u32 vector);
>   void vmx_pi_start_assignment(struct kvm *kvm);
>   
>   static inline int pi_find_highest_vector(struct pi_desc *pi_desc)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index b8b259847d05..0ab818bba743 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -13567,6 +13567,43 @@ bool kvm_arch_has_irq_bypass(void)
>   }
>   EXPORT_SYMBOL_GPL(kvm_arch_has_irq_bypass);
>   
> +static int kvm_pi_update_irte(struct kvm_kernel_irqfd *irqfd,
> +			      struct kvm_kernel_irq_routing_entry *old,

the argument 'old' is redundant in this function.

Regards
Sairaj Kodilkar

> +			      struct kvm_kernel_irq_routing_entry *new)
> +{
> +	struct kvm *kvm = irqfd->kvm;
> +	struct kvm_vcpu *vcpu = NULL;
> +	struct kvm_lapic_irq irq;
> +
> +	if (!irqchip_in_kernel(kvm) ||
> +	    !kvm_arch_has_irq_bypass() ||
> +	    !kvm_arch_has_assigned_device(kvm))
> +		return 0;
> +
> +	if (new && new->type == KVM_IRQ_ROUTING_MSI) {
> +		kvm_set_msi_irq(kvm, new, &irq);
> +
> +		/*
> +		 * Force remapped mode if hardware doesn't support posting the
> +		 * virtual interrupt to a vCPU.  Only IRQs are postable (NMIs,
> +		 * SMIs, etc. are not), and neither AMD nor Intel IOMMUs support
> +		 * posting multicast/broadcast IRQs.  If the interrupt can't be
> +		 * posted, the device MSI needs to be routed to the host so that
> +		 * the guest's desired interrupt can be synthesized by KVM.
> +		 *
> +		 * This means that KVM can only post lowest-priority interrupts
> +		 * if they have a single CPU as the destination, e.g. only if
> +		 * the guest has affined the interrupt to a single vCPU.
> +		 */
> +		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
> +		    !kvm_irq_is_postable(&irq))
> +			vcpu = NULL;
> +	}
> +
> +	return kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, irqfd->producer->irq,
> +					    irqfd->gsi, new, vcpu, irq.vector);
> +}
> +
>   int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
>   				      struct irq_bypass_producer *prod)
>   {
> @@ -13581,8 +13618,7 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
>   	irqfd->producer = prod;
>   
>   	if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
> -		ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
> -						   irqfd->gsi, &irqfd->irq_entry);
> +		ret = kvm_pi_update_irte(irqfd, NULL, &irqfd->irq_entry);
>   		if (ret)
>   			kvm_arch_end_assignment(irqfd->kvm);
>   	}
> @@ -13610,8 +13646,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
>   	spin_lock_irq(&kvm->irqfds.lock);
>   
>   	if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
> -		ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
> -						   irqfd->gsi, NULL);
> +		ret = kvm_pi_update_irte(irqfd, &irqfd->irq_entry, NULL);
>   		if (ret)
>   			pr_info("irq bypass consumer (token %p) unregistration fails: %d\n",
>   				irqfd->consumer.token, ret);
> @@ -13628,8 +13663,7 @@ int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
>   				  struct kvm_kernel_irq_routing_entry *old,
>   				  struct kvm_kernel_irq_routing_entry *new)
>   {
> -	return kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, irqfd->producer->irq,
> -					    irqfd->gsi, new);
> +	return kvm_pi_update_irte(irqfd, old, new);
>   }
>   
>   bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,

Re: [PATCH 33/67] KVM: x86: Dedup AVIC vs. PI code for identifying target vCPU

Posted by Sean Christopherson 9 months, 2 weeks ago

On Thu, Apr 24, 2025, Sairaj Kodilkar wrote:
> On 4/5/2025 1:08 AM, Sean Christopherson wrote:
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index b8b259847d05..0ab818bba743 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -13567,6 +13567,43 @@ bool kvm_arch_has_irq_bypass(void)
> >   }
> >   EXPORT_SYMBOL_GPL(kvm_arch_has_irq_bypass);
> > +static int kvm_pi_update_irte(struct kvm_kernel_irqfd *irqfd,
> > +			      struct kvm_kernel_irq_routing_entry *old,
> 
> the argument 'old' is redundant in this function.

Ooh, and @new to kvm_x86_ops.pi_update_irte is also unused.  I'll get rid of them
both.  I went through multiple iterations of hacking to figure out how to dedup
the code, and (obviously) missed a few things when tidying up after the fact.

Good eyes, and thanks again for the reviews!

P.S. Please trim your replies.

Re: [PATCH 33/67] KVM: x86: Dedup AVIC vs. PI code for identifying target vCPU

Posted by Paolo Bonzini 10 months ago

On 4/4/25 21:38, Sean Christopherson wrote:
> Hoist the logic for identifying the target vCPU for a posted interrupt
> into common x86.  The code is functionally identical between Intel and
> AMD.
> 
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>   arch/x86/include/asm/kvm_host.h |  3 +-
>   arch/x86/kvm/svm/avic.c         | 83 ++++++++-------------------------
>   arch/x86/kvm/svm/svm.h          |  3 +-
>   arch/x86/kvm/vmx/posted_intr.c  | 56 ++++++----------------
>   arch/x86/kvm/vmx/posted_intr.h  |  3 +-
>   arch/x86/kvm/x86.c              | 46 +++++++++++++++---

Please use irq.c, since (for once) there is a file other than x86.c that 
can be used.

Bonus points for merging irq_comm.c into irq.c (IIRC irq_comm.c was 
"common" between ia64 and x86 :)).

Paolo

>   6 files changed, 81 insertions(+), 113 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 85f45fc5156d..cb98d8d3c6c2 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1838,7 +1838,8 @@ struct kvm_x86_ops {
>   
>   	int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   			      unsigned int host_irq, uint32_t guest_irq,
> -			      struct kvm_kernel_irq_routing_entry *new);
> +			      struct kvm_kernel_irq_routing_entry *new,
> +			      struct kvm_vcpu *vcpu, u32 vector);
>   	void (*pi_start_assignment)(struct kvm *kvm);
>   	void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
>   	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index ea6eae72b941..666f518340a7 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -812,52 +812,13 @@ static int svm_ir_list_add(struct vcpu_svm *svm,
>   	return 0;
>   }
>   
> -/*
> - * Note:
> - * The HW cannot support posting multicast/broadcast
> - * interrupts to a vCPU. So, we still use legacy interrupt
> - * remapping for these kind of interrupts.
> - *
> - * For lowest-priority interrupts, we only support
> - * those with single CPU as the destination, e.g. user
> - * configures the interrupts via /proc/irq or uses
> - * irqbalance to make the interrupts single-CPU.
> - */
> -static int
> -get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
> -		 struct vcpu_data *vcpu_info, struct kvm_vcpu **vcpu)
> -{
> -	struct kvm_lapic_irq irq;
> -	*vcpu = NULL;
> -
> -	kvm_set_msi_irq(kvm, e, &irq);
> -
> -	if (!kvm_intr_is_single_vcpu(kvm, &irq, vcpu) ||
> -	    !kvm_irq_is_postable(&irq)) {
> -		pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
> -			 __func__, irq.vector);
> -		return -1;
> -	}
> -
> -	pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
> -		 irq.vector);
> -	vcpu_info->vector = irq.vector;
> -
> -	return 0;
> -}
> -
>   int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   			unsigned int host_irq, uint32_t guest_irq,
> -			struct kvm_kernel_irq_routing_entry *new)
> +			struct kvm_kernel_irq_routing_entry *new,
> +			struct kvm_vcpu *vcpu, u32 vector)
>   {
> -	bool enable_remapped_mode = true;
> -	struct vcpu_data vcpu_info;
> -	struct kvm_vcpu *vcpu = NULL;
>   	int ret = 0;
>   
> -	if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass())
> -		return 0;
> -
>   	/*
>   	 * If the IRQ was affined to a different vCPU, remove the IRTE metadata
>   	 * from the *previous* vCPU's list.
> @@ -865,7 +826,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   	svm_ir_list_del(irqfd);
>   
>   	pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
> -		 __func__, host_irq, guest_irq, !!new);
> +		 __func__, host_irq, guest_irq, !!vcpu);
>   
>   	/**
>   	 * Here, we setup with legacy mode in the following cases:
> @@ -874,23 +835,23 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   	 * 3. APIC virtualization is disabled for the vcpu.
>   	 * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
>   	 */
> -	if (new && new && new->type == KVM_IRQ_ROUTING_MSI &&
> -	    !get_pi_vcpu_info(kvm, new, &vcpu_info, &vcpu) &&
> -	    kvm_vcpu_apicv_active(vcpu)) {
> -		struct amd_iommu_pi_data pi;
> -
> -		enable_remapped_mode = false;
> -
> -		vcpu_info.pi_desc_addr = avic_get_backing_page_address(to_svm(vcpu));
> -
> +	if (vcpu && kvm_vcpu_apicv_active(vcpu)) {
>   		/*
>   		 * Try to enable guest_mode in IRTE.  Note, the address
>   		 * of the vCPU's AVIC backing page is passed to the
>   		 * IOMMU via vcpu_info->pi_desc_addr.
>   		 */
> -		pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, vcpu->vcpu_id);
> -		pi.is_guest_mode = true;
> -		pi.vcpu_data = &vcpu_info;
> +		struct vcpu_data vcpu_info = {
> +			.pi_desc_addr = avic_get_backing_page_address(to_svm(vcpu)),
> +			.vector = vector,
> +		};
> +
> +		struct amd_iommu_pi_data pi = {
> +			.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, vcpu->vcpu_id),
> +			.is_guest_mode = true,
> +			.vcpu_data = &vcpu_info,
> +		};
> +
>   		ret = irq_set_vcpu_affinity(host_irq, &pi);
>   
>   		/**
> @@ -902,12 +863,11 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   		 */
>   		if (!ret)
>   			ret = svm_ir_list_add(to_svm(vcpu), irqfd, &pi);
> -	}
>   
> -	if (!ret && vcpu) {
> -		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id,
> -					 guest_irq, vcpu_info.vector,
> -					 vcpu_info.pi_desc_addr, !!new);
> +		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
> +					 vector, vcpu_info.pi_desc_addr, true);
> +	} else {
> +		ret = irq_set_vcpu_affinity(host_irq, NULL);
>   	}
>   
>   	if (ret < 0) {
> @@ -915,10 +875,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   		goto out;
>   	}
>   
> -	if (enable_remapped_mode)
> -		ret = irq_set_vcpu_affinity(host_irq, NULL);
> -	else
> -		ret = 0;
> +	ret = 0;
>   out:
>   	return ret;
>   }
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 6ad0aa86f78d..5ce240085ee0 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -741,7 +741,8 @@ void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
>   void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
>   int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   			unsigned int host_irq, uint32_t guest_irq,
> -			struct kvm_kernel_irq_routing_entry *new);
> +			struct kvm_kernel_irq_routing_entry *new,
> +			struct kvm_vcpu *vcpu, u32 vector);
>   void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
>   void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
>   void avic_ring_doorbell(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
> index 786912cee3f8..fd5f6a125614 100644
> --- a/arch/x86/kvm/vmx/posted_intr.c
> +++ b/arch/x86/kvm/vmx/posted_intr.c
> @@ -266,46 +266,20 @@ void vmx_pi_start_assignment(struct kvm *kvm)
>   
>   int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   		       unsigned int host_irq, uint32_t guest_irq,
> -		       struct kvm_kernel_irq_routing_entry *new)
> +		       struct kvm_kernel_irq_routing_entry *new,
> +		       struct kvm_vcpu *vcpu, u32 vector)
>   {
> -	struct kvm_lapic_irq irq;
> -	struct kvm_vcpu *vcpu;
> -	struct vcpu_data vcpu_info;
> -
> -	if (!vmx_can_use_vtd_pi(kvm))
> -		return 0;
> -
> -	/*
> -	 * VT-d PI cannot support posting multicast/broadcast
> -	 * interrupts to a vCPU, we still use interrupt remapping
> -	 * for these kind of interrupts.
> -	 *
> -	 * For lowest-priority interrupts, we only support
> -	 * those with single CPU as the destination, e.g. user
> -	 * configures the interrupts via /proc/irq or uses
> -	 * irqbalance to make the interrupts single-CPU.
> -	 *
> -	 * We will support full lowest-priority interrupt later.
> -	 *
> -	 * In addition, we can only inject generic interrupts using
> -	 * the PI mechanism, refuse to route others through it.
> -	 */
> -	if (!new || new->type != KVM_IRQ_ROUTING_MSI)
> -		goto do_remapping;
> -
> -	kvm_set_msi_irq(kvm, new, &irq);
> -
> -	if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
> -	    !kvm_irq_is_postable(&irq))
> -		goto do_remapping;
> -
> -	vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
> -	vcpu_info.vector = irq.vector;
> -
> -	trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
> -				 vcpu_info.vector, vcpu_info.pi_desc_addr, true);
> -
> -	return irq_set_vcpu_affinity(host_irq, &vcpu_info);
> -do_remapping:
> -	return irq_set_vcpu_affinity(host_irq, NULL);
> +	if (vcpu) {
> +		struct vcpu_data vcpu_info = {
> +			.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)),
> +			.vector = vector,
> +		};
> +
> +		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
> +					 vcpu_info.vector, vcpu_info.pi_desc_addr, true);
> +
> +		return irq_set_vcpu_affinity(host_irq, &vcpu_info);
> +	} else {
> +		return irq_set_vcpu_affinity(host_irq, NULL);
> +	}
>   }
> diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
> index a586d6aaf862..ee3e19e976ac 100644
> --- a/arch/x86/kvm/vmx/posted_intr.h
> +++ b/arch/x86/kvm/vmx/posted_intr.h
> @@ -15,7 +15,8 @@ void __init pi_init_cpu(int cpu);
>   bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
>   int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   		       unsigned int host_irq, uint32_t guest_irq,
> -		       struct kvm_kernel_irq_routing_entry *new);
> +		       struct kvm_kernel_irq_routing_entry *new,
> +		       struct kvm_vcpu *vcpu, u32 vector);
>   void vmx_pi_start_assignment(struct kvm *kvm);
>   
>   static inline int pi_find_highest_vector(struct pi_desc *pi_desc)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index b8b259847d05..0ab818bba743 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -13567,6 +13567,43 @@ bool kvm_arch_has_irq_bypass(void)
>   }
>   EXPORT_SYMBOL_GPL(kvm_arch_has_irq_bypass);
>   
> +static int kvm_pi_update_irte(struct kvm_kernel_irqfd *irqfd,
> +			      struct kvm_kernel_irq_routing_entry *old,
> +			      struct kvm_kernel_irq_routing_entry *new)
> +{
> +	struct kvm *kvm = irqfd->kvm;
> +	struct kvm_vcpu *vcpu = NULL;
> +	struct kvm_lapic_irq irq;
> +
> +	if (!irqchip_in_kernel(kvm) ||
> +	    !kvm_arch_has_irq_bypass() ||
> +	    !kvm_arch_has_assigned_device(kvm))
> +		return 0;
> +
> +	if (new && new->type == KVM_IRQ_ROUTING_MSI) {
> +		kvm_set_msi_irq(kvm, new, &irq);
> +
> +		/*
> +		 * Force remapped mode if hardware doesn't support posting the
> +		 * virtual interrupt to a vCPU.  Only IRQs are postable (NMIs,
> +		 * SMIs, etc. are not), and neither AMD nor Intel IOMMUs support
> +		 * posting multicast/broadcast IRQs.  If the interrupt can't be
> +		 * posted, the device MSI needs to be routed to the host so that
> +		 * the guest's desired interrupt can be synthesized by KVM.
> +		 *
> +		 * This means that KVM can only post lowest-priority interrupts
> +		 * if they have a single CPU as the destination, e.g. only if
> +		 * the guest has affined the interrupt to a single vCPU.
> +		 */
> +		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
> +		    !kvm_irq_is_postable(&irq))
> +			vcpu = NULL;
> +	}
> +
> +	return kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, irqfd->producer->irq,
> +					    irqfd->gsi, new, vcpu, irq.vector);
> +}
> +
>   int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
>   				      struct irq_bypass_producer *prod)
>   {
> @@ -13581,8 +13618,7 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
>   	irqfd->producer = prod;
>   
>   	if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
> -		ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
> -						   irqfd->gsi, &irqfd->irq_entry);
> +		ret = kvm_pi_update_irte(irqfd, NULL, &irqfd->irq_entry);
>   		if (ret)
>   			kvm_arch_end_assignment(irqfd->kvm);
>   	}
> @@ -13610,8 +13646,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
>   	spin_lock_irq(&kvm->irqfds.lock);
>   
>   	if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
> -		ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
> -						   irqfd->gsi, NULL);
> +		ret = kvm_pi_update_irte(irqfd, &irqfd->irq_entry, NULL);
>   		if (ret)
>   			pr_info("irq bypass consumer (token %p) unregistration fails: %d\n",
>   				irqfd->consumer.token, ret);
> @@ -13628,8 +13663,7 @@ int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
>   				  struct kvm_kernel_irq_routing_entry *old,
>   				  struct kvm_kernel_irq_routing_entry *new)
>   {
> -	return kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, irqfd->producer->irq,
> -					    irqfd->gsi, new);
> +	return kvm_pi_update_irte(irqfd, old, new);
>   }
>   
>   bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,

Re: [PATCH 33/67] KVM: x86: Dedup AVIC vs. PI code for identifying target vCPU

Posted by Sean Christopherson 10 months ago

On Tue, Apr 08, 2025, Paolo Bonzini wrote:
> On 4/4/25 21:38, Sean Christopherson wrote:
> > Hoist the logic for identifying the target vCPU for a posted interrupt
> > into common x86.  The code is functionally identical between Intel and
> > AMD.
> > 
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> >   arch/x86/include/asm/kvm_host.h |  3 +-
> >   arch/x86/kvm/svm/avic.c         | 83 ++++++++-------------------------
> >   arch/x86/kvm/svm/svm.h          |  3 +-
> >   arch/x86/kvm/vmx/posted_intr.c  | 56 ++++++----------------
> >   arch/x86/kvm/vmx/posted_intr.h  |  3 +-
> >   arch/x86/kvm/x86.c              | 46 +++++++++++++++---
> 
> Please use irq.c, since (for once) there is a file other than x86.c that can
> be used.

Hah, will do.  I honestly forget that irq.c and irq_comm.c exist on a regular
basis.

> Bonus points for merging irq_comm.c into irq.c (IIRC irq_comm.c was "common"
> between ia64 and x86 :)).

With pleasure :-)