[PATCH 65/67] KVM: SVM: Generate GA log IRQs only if the associated vCPUs is blocking

Sean Christopherson posted 67 patches 8 months, 2 weeks ago
There is a newer version of this series
[PATCH 65/67] KVM: SVM: Generate GA log IRQs only if the associated vCPUs is blocking
Posted by Sean Christopherson 8 months, 2 weeks ago
Configure IRTEs to GA log interrupts for device posted IRQs that hit
non-running vCPUs if and only if the target vCPU is blocking, i.e.
actually needs a wake event.  If the vCPU has exited to userspace or was
preempted, generating GA log entries and interrupts is wasteful and
unnecessary, as the vCPU will be re-loaded and/or scheduled back in
irrespective of the GA log notification (avic_ga_log_notifier() is just a
fancy wrapper for kvm_vcpu_wake_up()).

Use a should-be-zero bit in the vCPU's Physical APIC ID Table Entry to
track whether or not the vCPU's associated IRTEs are configured to
generate GA logs, but only set the synthetic bit in KVM's "cache", i.e.
never set the should-be-zero bit in tables that are used by hardware.
Use a synthetic bit instead of a dedicated boolean to minimize the odds
of messing up the locking, i.e. so that all the existing rules that apply
to avic_physical_id_entry for IS_RUNNING are reused verbatim for
GA_LOG_INTR.

Note, because KVM (by design) "puts" AVIC state in a "pre-blocking"
phase, using kvm_vcpu_is_blocking() to track the need for notifications
isn't a viable option.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/include/asm/svm.h |  7 ++++++
 arch/x86/kvm/svm/avic.c    | 49 +++++++++++++++++++++++++++-----------
 2 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 8b07939ef3b9..be6e833bf92c 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -246,6 +246,13 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT			31
 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK		(1 << 31)
 
+/*
+ * GA_LOG_INTR is a synthetic flag that's never propagated to hardware-visible
+ * tables.  GA_LOG_INTR is set if the vCPU needs device posted IRQs to generate
+ * GA log interrupts to wake the vCPU (because it's blocking or about to block).
+ */
+#define AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR		BIT_ULL(61)
+
 #define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK	GENMASK_ULL(11, 0)
 #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK	GENMASK_ULL(51, 12)
 #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK		(1ULL << 62)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 1466e66cca6c..0d2a17a74be6 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -798,7 +798,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 			pi_data.cpu = entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
 		} else {
 			pi_data.cpu = -1;
-			pi_data.ga_log_intr = true;
+			pi_data.ga_log_intr = entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR;
 		}
 
 		ret = irq_set_vcpu_affinity(host_irq, &pi_data);
@@ -823,7 +823,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 }
 
 static void avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu,
-					    bool toggle_avic)
+					    bool toggle_avic, bool ga_log_intr)
 {
 	struct amd_svm_iommu_ir *ir;
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -839,9 +839,9 @@ static void avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu,
 
 	list_for_each_entry(ir, &svm->ir_list, node) {
 		if (!toggle_avic)
-			WARN_ON_ONCE(amd_iommu_update_ga(ir->data, cpu, true));
+			WARN_ON_ONCE(amd_iommu_update_ga(ir->data, cpu, ga_log_intr));
 		else if (cpu >= 0)
-			WARN_ON_ONCE(amd_iommu_activate_guest_mode(ir->data, cpu, true));
+			WARN_ON_ONCE(amd_iommu_activate_guest_mode(ir->data, cpu, ga_log_intr));
 		else
 			WARN_ON_ONCE(amd_iommu_deactivate_guest_mode(ir->data));
 	}
@@ -875,7 +875,8 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool toggle_avic)
 	entry = svm->avic_physical_id_entry;
 	WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
 
-	entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
+	entry &= ~(AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK |
+		   AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR);
 	entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
 	entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 
@@ -892,7 +893,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool toggle_avic)
 
 	WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry);
 
-	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, toggle_avic);
+	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, toggle_avic, false);
 
 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 }
@@ -912,7 +913,8 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	__avic_vcpu_load(vcpu, cpu, false);
 }
 
-static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic)
+static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic,
+			    bool is_blocking)
 {
 	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -934,14 +936,28 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic)
 	 */
 	spin_lock_irqsave(&svm->ir_list_lock, flags);
 
-	avic_update_iommu_vcpu_affinity(vcpu, -1, toggle_avic);
+	avic_update_iommu_vcpu_affinity(vcpu, -1, toggle_avic, is_blocking);
 
+	WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR);
+
+	/*
+	 * Keep the previouv APIC ID in the entry so that a rogue doorbell from
+	 * hardware is at least restricted to a CPU associated with the vCPU.
+	 */
 	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
-	svm->avic_physical_id_entry = entry;
 
 	if (enable_ipiv)
 		WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry);
 
+	/*
+	 * Note!  Don't set AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR in the table as
+	 * it's a synthetic flag that usurps an unused a should-be-zero bit.
+	 */
+	if (is_blocking)
+		entry |= AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR;
+
+	svm->avic_physical_id_entry = entry;
+
 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 }
 
@@ -957,10 +973,15 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
 	u64 entry = to_svm(vcpu)->avic_physical_id_entry;
 
 	/* Nothing to do if IsRunning == '0' due to vCPU blocking. */
-	if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
-		return;
+	if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)) {
+		if (WARN_ON_ONCE(!kvm_vcpu_is_blocking(vcpu)))
+			return;
 
-	__avic_vcpu_put(vcpu, false);
+		if (!(WARN_ON_ONCE(!(entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR))))
+			return;
+	}
+
+	__avic_vcpu_put(vcpu, false, kvm_vcpu_is_blocking(vcpu));
 }
 
 void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
@@ -997,7 +1018,7 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 	if (kvm_vcpu_apicv_active(vcpu))
 		__avic_vcpu_load(vcpu, vcpu->cpu, true);
 	else
-		__avic_vcpu_put(vcpu, true);
+		__avic_vcpu_put(vcpu, true, true);
 }
 
 void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
@@ -1023,7 +1044,7 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
 	 * CPU and cause noisy neighbor problems if the VM is sending interrupts
 	 * to the vCPU while it's scheduled out.
 	 */
-	avic_vcpu_put(vcpu);
+	__avic_vcpu_put(vcpu, false, true);
 }
 
 void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
-- 
2.49.0.504.g3bcea36a83-goog
Re: [PATCH 65/67] KVM: SVM: Generate GA log IRQs only if the associated vCPUs is blocking
Posted by Paolo Bonzini 8 months, 1 week ago
On 4/4/25 21:39, Sean Christopherson wrote:
> Configure IRTEs to GA log interrupts for device posted IRQs that hit
> non-running vCPUs if and only if the target vCPU is blocking, i.e.
> actually needs a wake event.  If the vCPU has exited to userspace or was
> preempted, generating GA log entries and interrupts is wasteful and
> unnecessary, as the vCPU will be re-loaded and/or scheduled back in
> irrespective of the GA log notification (avic_ga_log_notifier() is just a
> fancy wrapper for kvm_vcpu_wake_up()).
> 
> Use a should-be-zero bit in the vCPU's Physical APIC ID Table Entry to
> track whether or not the vCPU's associated IRTEs are configured to
> generate GA logs, but only set the synthetic bit in KVM's "cache", i.e.
> never set the should-be-zero bit in tables that are used by hardware.
> Use a synthetic bit instead of a dedicated boolean to minimize the odds
> of messing up the locking, i.e. so that all the existing rules that apply
> to avic_physical_id_entry for IS_RUNNING are reused verbatim for
> GA_LOG_INTR.
> 
> Note, because KVM (by design) "puts" AVIC state in a "pre-blocking"
> phase, using kvm_vcpu_is_blocking() to track the need for notifications
> isn't a viable option.
> 
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>   arch/x86/include/asm/svm.h |  7 ++++++
>   arch/x86/kvm/svm/avic.c    | 49 +++++++++++++++++++++++++++-----------
>   2 files changed, 42 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
> index 8b07939ef3b9..be6e833bf92c 100644
> --- a/arch/x86/include/asm/svm.h
> +++ b/arch/x86/include/asm/svm.h
> @@ -246,6 +246,13 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
>   #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT			31
>   #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK		(1 << 31)
>   
> +/*
> + * GA_LOG_INTR is a synthetic flag that's never propagated to hardware-visible
> + * tables.  GA_LOG_INTR is set if the vCPU needs device posted IRQs to generate
> + * GA log interrupts to wake the vCPU (because it's blocking or about to block).
> + */
> +#define AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR		BIT_ULL(61)
> +
>   #define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK	GENMASK_ULL(11, 0)
>   #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK	GENMASK_ULL(51, 12)
>   #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK		(1ULL << 62)
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 1466e66cca6c..0d2a17a74be6 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -798,7 +798,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   			pi_data.cpu = entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
>   		} else {
>   			pi_data.cpu = -1;
> -			pi_data.ga_log_intr = true;
> +			pi_data.ga_log_intr = entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR;
>   		}
>   
>   		ret = irq_set_vcpu_affinity(host_irq, &pi_data);
> @@ -823,7 +823,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   }
>   
>   static void avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu,
> -					    bool toggle_avic)
> +					    bool toggle_avic, bool ga_log_intr)
>   {
>   	struct amd_svm_iommu_ir *ir;
>   	struct vcpu_svm *svm = to_svm(vcpu);
> @@ -839,9 +839,9 @@ static void avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu,
>   
>   	list_for_each_entry(ir, &svm->ir_list, node) {
>   		if (!toggle_avic)
> -			WARN_ON_ONCE(amd_iommu_update_ga(ir->data, cpu, true));
> +			WARN_ON_ONCE(amd_iommu_update_ga(ir->data, cpu, ga_log_intr));
>   		else if (cpu >= 0)
> -			WARN_ON_ONCE(amd_iommu_activate_guest_mode(ir->data, cpu, true));
> +			WARN_ON_ONCE(amd_iommu_activate_guest_mode(ir->data, cpu, ga_log_intr));
>   		else
>   			WARN_ON_ONCE(amd_iommu_deactivate_guest_mode(ir->data));
>   	}
> @@ -875,7 +875,8 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool toggle_avic)
>   	entry = svm->avic_physical_id_entry;
>   	WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
>   
> -	entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
> +	entry &= ~(AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK |
> +		   AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR);
>   	entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
>   	entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
>   
> @@ -892,7 +893,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool toggle_avic)
>   
>   	WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry);
>   
> -	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, toggle_avic);
> +	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, toggle_avic, false);
>   
>   	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
>   }
> @@ -912,7 +913,8 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>   	__avic_vcpu_load(vcpu, cpu, false);
>   }
>   
> -static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic)
> +static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic,
> +			    bool is_blocking)

What would it look like to use an enum { SCHED_OUT, SCHED_IN, 
ENABLE_AVIC, DISABLE_AVIC, START_BLOCKING } for both __avic_vcpu_put and 
__avic_vcpu_load's second argument?  Consecutive bools are ugly...

Paolo

>   {
>   	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
>   	struct vcpu_svm *svm = to_svm(vcpu);
> @@ -934,14 +936,28 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic)
>   	 */
>   	spin_lock_irqsave(&svm->ir_list_lock, flags);
>   
> -	avic_update_iommu_vcpu_affinity(vcpu, -1, toggle_avic);
> +	avic_update_iommu_vcpu_affinity(vcpu, -1, toggle_avic, is_blocking);
>   
> +	WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR);
> +
> +	/*
> +	 * Keep the previouv APIC ID in the entry so that a rogue doorbell from
> +	 * hardware is at least restricted to a CPU associated with the vCPU.
> +	 */
>   	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
> -	svm->avic_physical_id_entry = entry;
>   
>   	if (enable_ipiv)
>   		WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry);
>   
> +	/*
> +	 * Note!  Don't set AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR in the table as
> +	 * it's a synthetic flag that usurps an unused a should-be-zero bit.
> +	 */
> +	if (is_blocking)
> +		entry |= AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR;
> +
> +	svm->avic_physical_id_entry = entry;
> +
>   	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
>   }
>   
> @@ -957,10 +973,15 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
>   	u64 entry = to_svm(vcpu)->avic_physical_id_entry;
>   
>   	/* Nothing to do if IsRunning == '0' due to vCPU blocking. */
> -	if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
> -		return;
> +	if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)) {
> +		if (WARN_ON_ONCE(!kvm_vcpu_is_blocking(vcpu)))
> +			return;
>   
> -	__avic_vcpu_put(vcpu, false);
> +		if (!(WARN_ON_ONCE(!(entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR))))
> +			return;
> +	}
> +
> +	__avic_vcpu_put(vcpu, false, kvm_vcpu_is_blocking(vcpu));
>   }
>   
>   void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
> @@ -997,7 +1018,7 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
>   	if (kvm_vcpu_apicv_active(vcpu))
>   		__avic_vcpu_load(vcpu, vcpu->cpu, true);
>   	else
> -		__avic_vcpu_put(vcpu, true);
> +		__avic_vcpu_put(vcpu, true, true);
>   }
>   
>   void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
> @@ -1023,7 +1044,7 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
>   	 * CPU and cause noisy neighbor problems if the VM is sending interrupts
>   	 * to the vCPU while it's scheduled out.
>   	 */
> -	avic_vcpu_put(vcpu);
> +	__avic_vcpu_put(vcpu, false, true);
>   }
>   
>   void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
Re: [PATCH 65/67] KVM: SVM: Generate GA log IRQs only if the associated vCPUs is blocking
Posted by Sean Christopherson 8 months, 1 week ago
On Tue, Apr 08, 2025, Paolo Bonzini wrote:
> On 4/4/25 21:39, Sean Christopherson wrote:
> > @@ -892,7 +893,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool toggle_avic)
> >   	WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry);
> > -	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, toggle_avic);
> > +	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, toggle_avic, false);
> >   	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
> >   }
> > @@ -912,7 +913,8 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> >   	__avic_vcpu_load(vcpu, cpu, false);
> >   }
> > -static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic)
> > +static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic,
> > +			    bool is_blocking)
> 
> What would it look like to use an enum { SCHED_OUT, SCHED_IN, ENABLE_AVIC,
> DISABLE_AVIC, START_BLOCKING } for both __avic_vcpu_put and
> __avic_vcpu_load's second argument?

There's gotta be a way to make it look better than this code.  I gave a half-
hearted attempt at using an enum before posting, but wasn't able to come up with
anything decent.

Coming back to it with fresh eyes, what about this (full on-top diff below)?

enum avic_vcpu_action {
	AVIC_SCHED_IN		= 0,
	AVIC_SCHED_OUT		= 0,
	AVIC_START_BLOCKING	= BIT(0),

	AVIC_TOGGLE_ON_OFF	= BIT(1),
	AVIC_ACTIVATE		= AVIC_TOGGLE_ON_OFF,
	AVIC_DEACTIVATE		= AVIC_TOGGLE_ON_OFF,
};

AVIC_SCHED_IN and AVIC_SCHED_OUT are essentially syntactic sugar, as are
AVIC_ACTIVATE and AVIC_DEACTIVATE to a certain extent.  But it's much better than
booleans, and using a bitmask makes avic_update_iommu_vcpu_affinity() slightly
prettier.

> Consecutive bools are ugly...

Yeah, I hated it when I wrote it, and still hate it now.

And more error prone, e.g. the __avic_vcpu_put() call from avic_refresh_apicv_exec_ctrl()
should specify is_blocking=false, not true, as kvm_x86_ops.refresh_apicv_exec_ctrl()
should never be called while the vCPU is blocking.

---
 arch/x86/kvm/svm/avic.c | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 425674e1a04c..1752420c68aa 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -833,9 +833,20 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 	return irq_set_vcpu_affinity(host_irq, NULL);
 }
 
+enum avic_vcpu_action {
+	AVIC_SCHED_IN		= 0,
+	AVIC_SCHED_OUT		= 0,
+	AVIC_START_BLOCKING	= BIT(0),
+
+	AVIC_TOGGLE_ON_OFF	= BIT(1),
+	AVIC_ACTIVATE		= AVIC_TOGGLE_ON_OFF,
+	AVIC_DEACTIVATE		= AVIC_TOGGLE_ON_OFF,
+};
+
 static void avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu,
-					    bool toggle_avic, bool ga_log_intr)
+					    enum avic_vcpu_action action)
 {
+	bool ga_log_intr = (action & AVIC_START_BLOCKING);
 	struct amd_svm_iommu_ir *ir;
 	struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -849,7 +860,7 @@ static void avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu,
 		return;
 
 	list_for_each_entry(ir, &svm->ir_list, node) {
-		if (!toggle_avic)
+		if (!(action & AVIC_TOGGLE_ON_OFF))
 			WARN_ON_ONCE(amd_iommu_update_ga(ir->data, cpu, ga_log_intr));
 		else if (cpu >= 0)
 			WARN_ON_ONCE(amd_iommu_activate_guest_mode(ir->data, cpu, ga_log_intr));
@@ -858,7 +869,8 @@ static void avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu,
 	}
 }
 
-static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool toggle_avic)
+static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu,
+			     enum avic_vcpu_action action)
 {
 	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 	int h_physical_id = kvm_cpu_get_apicid(cpu);
@@ -904,7 +916,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool toggle_avic)
 
 	WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry);
 
-	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, toggle_avic, false);
+	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, action);
 
 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 }
@@ -921,11 +933,10 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	if (kvm_vcpu_is_blocking(vcpu))
 		return;
 
-	__avic_vcpu_load(vcpu, cpu, false);
+	__avic_vcpu_load(vcpu, cpu, AVIC_SCHED_IN);
 }
 
-static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic,
-			    bool is_blocking)
+static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action)
 {
 	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -947,7 +958,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic,
 	 */
 	spin_lock_irqsave(&svm->ir_list_lock, flags);
 
-	avic_update_iommu_vcpu_affinity(vcpu, -1, toggle_avic, is_blocking);
+	avic_update_iommu_vcpu_affinity(vcpu, -1, action);
 
 	WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR);
 
@@ -964,7 +975,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic,
 	 * Note!  Don't set AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR in the table as
 	 * it's a synthetic flag that usurps an unused a should-be-zero bit.
 	 */
-	if (is_blocking)
+	if (action & AVIC_START_BLOCKING)
 		entry |= AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR;
 
 	svm->avic_physical_id_entry = entry;
@@ -992,7 +1003,8 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
 			return;
 	}
 
-	__avic_vcpu_put(vcpu, false, kvm_vcpu_is_blocking(vcpu));
+	__avic_vcpu_put(vcpu, kvm_vcpu_is_blocking(vcpu) ? AVIC_START_BLOCKING :
+							   AVIC_SCHED_OUT);
 }
 
 void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
@@ -1024,12 +1036,15 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 	if (!enable_apicv)
 		return;
 
+	/* APICv should only be toggled on/off while the vCPU is running. */
+	WARN_ON_ONCE(kvm_vcpu_is_blocking(vcpu));
+
 	avic_refresh_virtual_apic_mode(vcpu);
 
 	if (kvm_vcpu_apicv_active(vcpu))
-		__avic_vcpu_load(vcpu, vcpu->cpu, true);
+		__avic_vcpu_load(vcpu, vcpu->cpu, AVIC_ACTIVATE);
 	else
-		__avic_vcpu_put(vcpu, true, true);
+		__avic_vcpu_put(vcpu, AVIC_DEACTIVATE);
 }
 
 void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
@@ -1055,7 +1070,7 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
 	 * CPU and cause noisy neighbor problems if the VM is sending interrupts
 	 * to the vCPU while it's scheduled out.
 	 */
-	__avic_vcpu_put(vcpu, false, true);
+	__avic_vcpu_put(vcpu, AVIC_START_BLOCKING);
 }
 
 void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)

base-commit: fe5b44cf46d5444ff071bc2373fbe7b109a3f60b
--
Re: [PATCH 65/67] KVM: SVM: Generate GA log IRQs only if the associated vCPUs is blocking
Posted by Paolo Bonzini 8 months, 1 week ago
On 4/8/25 23:31, Sean Christopherson wrote:
> On Tue, Apr 08, 2025, Paolo Bonzini wrote:
>> On 4/4/25 21:39, Sean Christopherson wrote:
>>> @@ -892,7 +893,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool toggle_avic)
>>>    	WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry);
>>> -	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, toggle_avic);
>>> +	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, toggle_avic, false);
>>>    	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
>>>    }
>>> @@ -912,7 +913,8 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>    	__avic_vcpu_load(vcpu, cpu, false);
>>>    }
>>> -static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic)
>>> +static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic,
>>> +			    bool is_blocking)
>>
>> What would it look like to use an enum { SCHED_OUT, SCHED_IN, ENABLE_AVIC,
>> DISABLE_AVIC, START_BLOCKING } for both __avic_vcpu_put and
>> __avic_vcpu_load's second argument?
> 
> There's gotta be a way to make it look better than this code.  I gave a half-
> hearted attempt at using an enum before posting, but wasn't able to come up with
> anything decent.
> 
> Coming back to it with fresh eyes, what about this (full on-top diff below)?
> 
> enum avic_vcpu_action {
> 	AVIC_SCHED_IN		= 0,
> 	AVIC_SCHED_OUT		= 0,
> 	AVIC_START_BLOCKING	= BIT(0),
> 
> 	AVIC_TOGGLE_ON_OFF	= BIT(1),
> 	AVIC_ACTIVATE		= AVIC_TOGGLE_ON_OFF,
> 	AVIC_DEACTIVATE		= AVIC_TOGGLE_ON_OFF,
> };
> 
> AVIC_SCHED_IN and AVIC_SCHED_OUT are essentially syntactic sugar, as are
> AVIC_ACTIVATE and AVIC_DEACTIVATE to a certain extent.  But it's much better than
> booleans, and using a bitmask makes avic_update_iommu_vcpu_affinity() slightly
> prettier.

Even just the bitmask at least makes it clear what is "true" and what is 
"false" (which is obvious but I never thought about it this way, you 
never stop learning).

You decide whether you prefer the syntactic sugar or not.

Paolo

>> Consecutive bools are ugly...
> 
> Yeah, I hated it when I wrote it, and still hate it now.
> 
> And more error prone, e.g. the __avic_vcpu_put() call from avic_refresh_apicv_exec_ctrl()
> should specify is_blocking=false, not true, as kvm_x86_ops.refresh_apicv_exec_ctrl()
> should never be called while the vCPU is blocking.
> 
> ---
>   arch/x86/kvm/svm/avic.c | 41 ++++++++++++++++++++++++++++-------------
>   1 file changed, 28 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 425674e1a04c..1752420c68aa 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -833,9 +833,20 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
>   	return irq_set_vcpu_affinity(host_irq, NULL);
>   }
>   
> +enum avic_vcpu_action {
> +	AVIC_SCHED_IN		= 0,
> +	AVIC_SCHED_OUT		= 0,
> +	AVIC_START_BLOCKING	= BIT(0),
> +
> +	AVIC_TOGGLE_ON_OFF	= BIT(1),
> +	AVIC_ACTIVATE		= AVIC_TOGGLE_ON_OFF,
> +	AVIC_DEACTIVATE		= AVIC_TOGGLE_ON_OFF,
> +};
> +
>   static void avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu,
> -					    bool toggle_avic, bool ga_log_intr)
> +					    enum avic_vcpu_action action)
>   {
> +	bool ga_log_intr = (action & AVIC_START_BLOCKING);
>   	struct amd_svm_iommu_ir *ir;
>   	struct vcpu_svm *svm = to_svm(vcpu);
>   
> @@ -849,7 +860,7 @@ static void avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu,
>   		return;
>   
>   	list_for_each_entry(ir, &svm->ir_list, node) {
> -		if (!toggle_avic)
> +		if (!(action & AVIC_TOGGLE_ON_OFF))
>   			WARN_ON_ONCE(amd_iommu_update_ga(ir->data, cpu, ga_log_intr));
>   		else if (cpu >= 0)
>   			WARN_ON_ONCE(amd_iommu_activate_guest_mode(ir->data, cpu, ga_log_intr));
> @@ -858,7 +869,8 @@ static void avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu,
>   	}
>   }
>   
> -static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool toggle_avic)
> +static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu,
> +			     enum avic_vcpu_action action)
>   {
>   	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
>   	int h_physical_id = kvm_cpu_get_apicid(cpu);
> @@ -904,7 +916,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool toggle_avic)
>   
>   	WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry);
>   
> -	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, toggle_avic, false);
> +	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, action);
>   
>   	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
>   }
> @@ -921,11 +933,10 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>   	if (kvm_vcpu_is_blocking(vcpu))
>   		return;
>   
> -	__avic_vcpu_load(vcpu, cpu, false);
> +	__avic_vcpu_load(vcpu, cpu, AVIC_SCHED_IN);
>   }
>   
> -static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic,
> -			    bool is_blocking)
> +static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action)
>   {
>   	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
>   	struct vcpu_svm *svm = to_svm(vcpu);
> @@ -947,7 +958,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic,
>   	 */
>   	spin_lock_irqsave(&svm->ir_list_lock, flags);
>   
> -	avic_update_iommu_vcpu_affinity(vcpu, -1, toggle_avic, is_blocking);
> +	avic_update_iommu_vcpu_affinity(vcpu, -1, action);
>   
>   	WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR);
>   
> @@ -964,7 +975,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, bool toggle_avic,
>   	 * Note!  Don't set AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR in the table as
>   	 * it's a synthetic flag that usurps an unused a should-be-zero bit.
>   	 */
> -	if (is_blocking)
> +	if (action & AVIC_START_BLOCKING)
>   		entry |= AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR;
>   
>   	svm->avic_physical_id_entry = entry;
> @@ -992,7 +1003,8 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
>   			return;
>   	}
>   
> -	__avic_vcpu_put(vcpu, false, kvm_vcpu_is_blocking(vcpu));
> +	__avic_vcpu_put(vcpu, kvm_vcpu_is_blocking(vcpu) ? AVIC_START_BLOCKING :
> +							   AVIC_SCHED_OUT);
>   }
>   
>   void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
> @@ -1024,12 +1036,15 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
>   	if (!enable_apicv)
>   		return;
>   
> +	/* APICv should only be toggled on/off while the vCPU is running. */
> +	WARN_ON_ONCE(kvm_vcpu_is_blocking(vcpu));
> +
>   	avic_refresh_virtual_apic_mode(vcpu);
>   
>   	if (kvm_vcpu_apicv_active(vcpu))
> -		__avic_vcpu_load(vcpu, vcpu->cpu, true);
> +		__avic_vcpu_load(vcpu, vcpu->cpu, AVIC_ACTIVATE);
>   	else
> -		__avic_vcpu_put(vcpu, true, true);
> +		__avic_vcpu_put(vcpu, AVIC_DEACTIVATE);
>   }
>   
>   void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
> @@ -1055,7 +1070,7 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
>   	 * CPU and cause noisy neighbor problems if the VM is sending interrupts
>   	 * to the vCPU while it's scheduled out.
>   	 */
> -	__avic_vcpu_put(vcpu, false, true);
> +	__avic_vcpu_put(vcpu, AVIC_START_BLOCKING);
>   }
>   
>   void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
> 
> base-commit: fe5b44cf46d5444ff071bc2373fbe7b109a3f60b