[PATCH 30/67] KVM: VMX: Stop walking list of routing table entries when updating IRTE

Sean Christopherson posted 67 patches 8 months, 2 weeks ago
There is a newer version of this series
[PATCH 30/67] KVM: VMX: Stop walking list of routing table entries when updating IRTE
Posted by Sean Christopherson 8 months, 2 weeks ago
Now that KVM provides the to-be-updated routing entry, stop walking the
routing table to find that entry.  KVM, via setup_routing_entry() and
sanity checked by kvm_get_msi_route(), disallows having a GSI configured
to trigger multiple MSIs, i.e. the for-loop can only process one entry.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/posted_intr.c | 100 +++++++++++----------------------
 1 file changed, 33 insertions(+), 67 deletions(-)

diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index 00818ca30ee0..786912cee3f8 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -268,78 +268,44 @@ int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 		       unsigned int host_irq, uint32_t guest_irq,
 		       struct kvm_kernel_irq_routing_entry *new)
 {
-	struct kvm_kernel_irq_routing_entry *e;
-	struct kvm_irq_routing_table *irq_rt;
-	bool enable_remapped_mode = true;
 	struct kvm_lapic_irq irq;
 	struct kvm_vcpu *vcpu;
 	struct vcpu_data vcpu_info;
-	bool set = !!new;
-	int idx, ret = 0;
 
 	if (!vmx_can_use_vtd_pi(kvm))
 		return 0;
 
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	if (guest_irq >= irq_rt->nr_rt_entries ||
-	    hlist_empty(&irq_rt->map[guest_irq])) {
-		pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
-			     guest_irq, irq_rt->nr_rt_entries);
-		goto out;
-	}
-
-	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
-		if (e->type != KVM_IRQ_ROUTING_MSI)
-			continue;
-
-		WARN_ON_ONCE(new && memcmp(e, new, sizeof(*new)));
-
-		/*
-		 * VT-d PI cannot support posting multicast/broadcast
-		 * interrupts to a vCPU, we still use interrupt remapping
-		 * for these kind of interrupts.
-		 *
-		 * For lowest-priority interrupts, we only support
-		 * those with single CPU as the destination, e.g. user
-		 * configures the interrupts via /proc/irq or uses
-		 * irqbalance to make the interrupts single-CPU.
-		 *
-		 * We will support full lowest-priority interrupt later.
-		 *
-		 * In addition, we can only inject generic interrupts using
-		 * the PI mechanism, refuse to route others through it.
-		 */
-
-		kvm_set_msi_irq(kvm, e, &irq);
-		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
-		    !kvm_irq_is_postable(&irq))
-			continue;
-
-		vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
-		vcpu_info.vector = irq.vector;
-
-		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
-				vcpu_info.vector, vcpu_info.pi_desc_addr, set);
-
-		if (!set)
-			continue;
-
-		enable_remapped_mode = false;
-
-		ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
-		if (ret < 0) {
-			printk(KERN_INFO "%s: failed to update PI IRTE\n",
-					__func__);
-			goto out;
-		}
-	}
-
-	if (enable_remapped_mode)
-		ret = irq_set_vcpu_affinity(host_irq, NULL);
-
-	ret = 0;
-out:
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-	return ret;
+	/*
+	 * VT-d PI cannot support posting multicast/broadcast
+	 * interrupts to a vCPU, we still use interrupt remapping
+	 * for these kind of interrupts.
+	 *
+	 * For lowest-priority interrupts, we only support
+	 * those with single CPU as the destination, e.g. user
+	 * configures the interrupts via /proc/irq or uses
+	 * irqbalance to make the interrupts single-CPU.
+	 *
+	 * We will support full lowest-priority interrupt later.
+	 *
+	 * In addition, we can only inject generic interrupts using
+	 * the PI mechanism, refuse to route others through it.
+	 */
+	if (!new || new->type != KVM_IRQ_ROUTING_MSI)
+		goto do_remapping;
+
+	kvm_set_msi_irq(kvm, new, &irq);
+
+	if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
+	    !kvm_irq_is_postable(&irq))
+		goto do_remapping;
+
+	vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
+	vcpu_info.vector = irq.vector;
+
+	trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
+				 vcpu_info.vector, vcpu_info.pi_desc_addr, true);
+
+	return irq_set_vcpu_affinity(host_irq, &vcpu_info);
+do_remapping:
+	return irq_set_vcpu_affinity(host_irq, NULL);
 }
-- 
2.49.0.504.g3bcea36a83-goog
Re: [PATCH 30/67] KVM: VMX: Stop walking list of routing table entries when updating IRTE
Posted by Paolo Bonzini 8 months, 1 week ago
On 4/4/25 21:38, Sean Christopherson wrote:
> Now that KVM provides the to-be-updated routing entry, stop walking the
> routing table to find that entry.  KVM, via setup_routing_entry() and
> sanity checked by kvm_get_msi_route(), disallows having a GSI configured
> to trigger multiple MSIs, i.e. the for-loop can only process one entry.
> 
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>   arch/x86/kvm/vmx/posted_intr.c | 100 +++++++++++----------------------
>   1 file changed, 33 insertions(+), 67 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
> index 00818ca30ee0..786912cee3f8 100644
> --- a/arch/x86/kvm/vmx/posted_intr.c
> +++ b/arch/x86/kvm/vmx/posted_intr.c
> @@ -268,78 +268,44 @@ int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   		       unsigned int host_irq, uint32_t guest_irq,
>   		       struct kvm_kernel_irq_routing_entry *new)
>   {
> -	struct kvm_kernel_irq_routing_entry *e;
> -	struct kvm_irq_routing_table *irq_rt;
> -	bool enable_remapped_mode = true;
>   	struct kvm_lapic_irq irq;
>   	struct kvm_vcpu *vcpu;
>   	struct vcpu_data vcpu_info;
> -	bool set = !!new;
> -	int idx, ret = 0;
>   
>   	if (!vmx_can_use_vtd_pi(kvm))
>   		return 0;
>   
> -	idx = srcu_read_lock(&kvm->irq_srcu);
> -	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
> -	if (guest_irq >= irq_rt->nr_rt_entries ||
> -	    hlist_empty(&irq_rt->map[guest_irq])) {
> -		pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
> -			     guest_irq, irq_rt->nr_rt_entries);
> -		goto out;
> -	}
> -
> -	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
> -		if (e->type != KVM_IRQ_ROUTING_MSI)
> -			continue;
> -
> -		WARN_ON_ONCE(new && memcmp(e, new, sizeof(*new)));

Alternatively, if you want to keep patches 28/29 separate, you could add 
this WARN_ON_ONCE to avic.c in the exact same place after checking 
e->type -- not so much for asserting purposes, but more to document 
what's going on for the reviewer.

Paolo
Re: [PATCH 30/67] KVM: VMX: Stop walking list of routing table entries when updating IRTE
Posted by Sean Christopherson 7 months ago
On Tue, Apr 08, 2025, Paolo Bonzini wrote:
> On 4/4/25 21:38, Sean Christopherson wrote:
> > Now that KVM provides the to-be-updated routing entry, stop walking the
> > routing table to find that entry.  KVM, via setup_routing_entry() and
> > sanity checked by kvm_get_msi_route(), disallows having a GSI configured
> > to trigger multiple MSIs, i.e. the for-loop can only process one entry.
> > 
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> >   arch/x86/kvm/vmx/posted_intr.c | 100 +++++++++++----------------------
> >   1 file changed, 33 insertions(+), 67 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
> > index 00818ca30ee0..786912cee3f8 100644
> > --- a/arch/x86/kvm/vmx/posted_intr.c
> > +++ b/arch/x86/kvm/vmx/posted_intr.c
> > @@ -268,78 +268,44 @@ int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
> >   		       unsigned int host_irq, uint32_t guest_irq,
> >   		       struct kvm_kernel_irq_routing_entry *new)
> >   {
> > -	struct kvm_kernel_irq_routing_entry *e;
> > -	struct kvm_irq_routing_table *irq_rt;
> > -	bool enable_remapped_mode = true;
> >   	struct kvm_lapic_irq irq;
> >   	struct kvm_vcpu *vcpu;
> >   	struct vcpu_data vcpu_info;
> > -	bool set = !!new;
> > -	int idx, ret = 0;
> >   	if (!vmx_can_use_vtd_pi(kvm))
> >   		return 0;
> > -	idx = srcu_read_lock(&kvm->irq_srcu);
> > -	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
> > -	if (guest_irq >= irq_rt->nr_rt_entries ||
> > -	    hlist_empty(&irq_rt->map[guest_irq])) {
> > -		pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
> > -			     guest_irq, irq_rt->nr_rt_entries);
> > -		goto out;
> > -	}
> > -
> > -	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
> > -		if (e->type != KVM_IRQ_ROUTING_MSI)
> > -			continue;
> > -
> > -		WARN_ON_ONCE(new && memcmp(e, new, sizeof(*new)));
> 
> Alternatively, if you want to keep patches 28/29 separate, you could add
> this WARN_ON_ONCE to avic.c in the exact same place after checking e->type
> -- not so much for asserting purposes, but more to document what's going on
> for the reviewer.

FWIW, AVIC already has the same WARN, they were both added by "KVM: x86: Pass new
routing entries and irqfd when updating IRTEs".

That said, I agree that squashing 28/29 is the way to go, especially since I didn't
isolate the changes for VMX (I've no idea why I did for SVM but not VMX).