[PATCH v2 38/59] iommu/amd: KVM: SVM: Infer IsRun from validity of pCPU destination

Sean Christopherson posted 59 patches 6 months, 3 weeks ago
There is a newer version of this series
[PATCH v2 38/59] iommu/amd: KVM: SVM: Infer IsRun from validity of pCPU destination
Posted by Sean Christopherson 6 months, 3 weeks ago
Infer whether or not a vCPU should be marked running from the validity of
the pCPU on which it is running.  amd_iommu_update_ga() already skips the
IRTE update if the pCPU is invalid, i.e. passing %true for is_run with an
invalid pCPU would be a blatant and egregrious KVM bug.

Tested-by: Sairaj Kodilkar <sarunkod@amd.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/svm/avic.c   | 11 +++++------
 drivers/iommu/amd/iommu.c | 14 +++++++++-----
 include/linux/amd-iommu.h |  6 ++----
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 4747fb09aca4..c79648d96752 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -832,7 +832,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 		entry = svm->avic_physical_id_entry;
 		if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
 			amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
-					    true, pi_data.ir_data);
+					    pi_data.ir_data);
 
 		irqfd->irq_bypass_data = pi_data.ir_data;
 		list_add(&irqfd->vcpu_list, &svm->ir_list);
@@ -841,8 +841,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 	return irq_set_vcpu_affinity(host_irq, NULL);
 }
 
-static inline int
-avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
+static inline int avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu)
 {
 	int ret = 0;
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -861,7 +860,7 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 		return 0;
 
 	list_for_each_entry(irqfd, &svm->ir_list, vcpu_list) {
-		ret = amd_iommu_update_ga(cpu, r, irqfd->irq_bypass_data);
+		ret = amd_iommu_update_ga(cpu, irqfd->irq_bypass_data);
 		if (ret)
 			return ret;
 	}
@@ -923,7 +922,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 	WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry);
 
-	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
+	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id);
 
 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 }
@@ -963,7 +962,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
 	 */
 	spin_lock_irqsave(&svm->ir_list_lock, flags);
 
-	avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
+	avic_update_iommu_vcpu_affinity(vcpu, -1);
 
 	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 	svm->avic_physical_id_entry = entry;
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 5adc932b947e..bb804bbc916b 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -3990,15 +3990,17 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
  * Update the pCPU information for an IRTE that is configured to post IRQs to
  * a vCPU, without issuing an IOMMU invalidation for the IRTE.
  *
- * This API is intended to be used when a vCPU is scheduled in/out (or stops
- * running for any reason), to do a fast update of IsRun and (conditionally)
- * Destination.
+ * If the vCPU is associated with a pCPU (@cpu >= 0), configure the Destination
+ * with the pCPU's APIC ID and set IsRun, else clear IsRun.  I.e. treat vCPUs
+ * that are associated with a pCPU as running.  This API is intended to be used
+ * when a vCPU is scheduled in/out (or stops running for any reason), to do a
+ * fast update of IsRun and (conditionally) Destination.
  *
  * Per the IOMMU spec, the Destination, IsRun, and GATag fields are not cached
  * and thus don't require an invalidation to ensure the IOMMU consumes fresh
  * information.
  */
-int amd_iommu_update_ga(int cpu, bool is_run, void *data)
+int amd_iommu_update_ga(int cpu, void *data)
 {
 	struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
 	struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
@@ -4015,8 +4017,10 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
 					APICID_TO_IRTE_DEST_LO(cpu);
 		entry->hi.fields.destination =
 					APICID_TO_IRTE_DEST_HI(cpu);
+		entry->lo.fields_vapic.is_run = true;
+	} else {
+		entry->lo.fields_vapic.is_run = false;
 	}
-	entry->lo.fields_vapic.is_run = is_run;
 
 	return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
 				ir_data->irq_2_irte.index, entry);
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 99b4fa9a0296..fe0e16ffe0e5 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -30,8 +30,7 @@ static inline void amd_iommu_detect(void) { }
 /* IOMMU AVIC Function */
 extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32));
 
-extern int
-amd_iommu_update_ga(int cpu, bool is_run, void *data);
+extern int amd_iommu_update_ga(int cpu, void *data);
 
 extern int amd_iommu_activate_guest_mode(void *data);
 extern int amd_iommu_deactivate_guest_mode(void *data);
@@ -44,8 +43,7 @@ amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
 	return 0;
 }
 
-static inline int
-amd_iommu_update_ga(int cpu, bool is_run, void *data)
+static inline int amd_iommu_update_ga(int cpu, void *data)
 {
 	return 0;
 }
-- 
2.49.0.1151.ga128411c76-goog
Re: [PATCH v2 38/59] iommu/amd: KVM: SVM: Infer IsRun from validity of pCPU destination
Posted by Sairaj Kodilkar 6 months, 2 weeks ago

On 5/23/2025 6:29 AM, Sean Christopherson wrote:
> Infer whether or not a vCPU should be marked running from the validity of
> the pCPU on which it is running.  amd_iommu_update_ga() already skips the
> IRTE update if the pCPU is invalid, i.e. passing %true for is_run with an
> invalid pCPU would be a blatant and egregrious KVM bug.
> 
> Tested-by: Sairaj Kodilkar <sarunkod@amd.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>   arch/x86/kvm/svm/avic.c   | 11 +++++------
>   drivers/iommu/amd/iommu.c | 14 +++++++++-----
>   include/linux/amd-iommu.h |  6 ++----
>   3 files changed, 16 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 4747fb09aca4..c79648d96752 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -832,7 +832,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   		entry = svm->avic_physical_id_entry;
>   		if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
>   			amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
> -					    true, pi_data.ir_data);
> +					    pi_data.ir_data);
>   
>   		irqfd->irq_bypass_data = pi_data.ir_data;
>   		list_add(&irqfd->vcpu_list, &svm->ir_list);
> @@ -841,8 +841,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   	return irq_set_vcpu_affinity(host_irq, NULL);
>   }
>   
> -static inline int
> -avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
> +static inline int avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu)
>   {

Hi sean

What if define cpu as "unsigned int" instead of "int" and use nr_cpu_ids
as invalid cpu id ? I see that it is common in the other subsystems to
use nr_cpu_ids instead of -1.

Thanks
Sairaj
Re: [PATCH v2 38/59] iommu/amd: KVM: SVM: Infer IsRun from validity of pCPU destination
Posted by Sean Christopherson 6 months, 2 weeks ago
On Fri, May 30, 2025, Sairaj Kodilkar wrote:
> On 5/23/2025 6:29 AM, Sean Christopherson wrote:
> > Infer whether or not a vCPU should be marked running from the validity of
> > the pCPU on which it is running.  amd_iommu_update_ga() already skips the
> > IRTE update if the pCPU is invalid, i.e. passing %true for is_run with an
> > invalid pCPU would be a blatant and egregrious KVM bug.
> > 
> > Tested-by: Sairaj Kodilkar <sarunkod@amd.com>
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> >   arch/x86/kvm/svm/avic.c   | 11 +++++------
> >   drivers/iommu/amd/iommu.c | 14 +++++++++-----
> >   include/linux/amd-iommu.h |  6 ++----
> >   3 files changed, 16 insertions(+), 15 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> > index 4747fb09aca4..c79648d96752 100644
> > --- a/arch/x86/kvm/svm/avic.c
> > +++ b/arch/x86/kvm/svm/avic.c
> > @@ -832,7 +832,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
> >   		entry = svm->avic_physical_id_entry;
> >   		if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
> >   			amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
> > -					    true, pi_data.ir_data);
> > +					    pi_data.ir_data);
> >   		irqfd->irq_bypass_data = pi_data.ir_data;
> >   		list_add(&irqfd->vcpu_list, &svm->ir_list);
> > @@ -841,8 +841,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
> >   	return irq_set_vcpu_affinity(host_irq, NULL);
> >   }
> > -static inline int
> > -avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
> > +static inline int avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu)
> >   {
> 
> Hi sean
> 
> What if define cpu as "unsigned int" instead of "int" and use nr_cpu_ids
> as invalid cpu id ? I see that it is common in the other subsystems to
> use nr_cpu_ids instead of -1.

My vote is for -1, as it makes the KVM side of things much more intuitive

E.g. this is pretty obviously saying "no associated CPU"

  avic_update_iommu_vcpu_affinity(vcpu, -1);

whereas this honestly just looks a bit weird.
 
  avic_update_iommu_vcpu_affinity(vcpu, nr_cpu_ids);

It also requires knowing what cpu numbers are strictly packed in the kernel, i.e.
that nr_cpu_ids is guaranteed to be greater than the cpu numbers themselves (e.g.
the the kernel can't have nr_cpu_ids=2 with CPU0 and CPU2 being the two CPUs).

I also don't love that nr_cpu_ids is __read_mostly, i.e. isn't const post-boot
(though at a glance, it looks like it could be __ro_after_init on x86 at least).