[PATCH] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits

weizijie posted 1 patch 1 year, 2 months ago
There is a newer version of this series
arch/x86/include/asm/kvm_host.h |  1 +
arch/x86/kvm/ioapic.c           | 11 +++++++++--
arch/x86/kvm/vmx/vmx.c          | 10 ++++++++++
3 files changed, 20 insertions(+), 2 deletions(-)
[PATCH] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits
Posted by weizijie 1 year, 2 months ago
Address performance issues caused by a vector being reused by a
non-IOAPIC source.

commit 0fc5a36dd6b3
("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
addressed the issues related to EOI and IOAPIC reconfiguration races.
However, it has introduced some performance concerns:

Configuring IOAPIC interrupts while an interrupt request (IRQ) is
already in service can unintentionally trigger a VM exit for other
interrupts that normally do not require one, due to the settings of
`ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
runtime, this issue persists, continuing to adversely affect
performance.

Simple Fix Proposal:
A straightforward solution is to record the vector that is pending at
the time of injection. Then, upon the next guest exit, clean up the
ioapic_handled_vectors corresponding to the vector number that was
pending. This ensures that interrupts are properly handled and prevents
performance issues.

Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/ioapic.c           | 11 +++++++++--
 arch/x86/kvm/vmx/vmx.c          | 10 ++++++++++
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e159e44a6a1b..b008c933d2ab 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1041,6 +1041,7 @@ struct kvm_vcpu_arch {
 #if IS_ENABLED(CONFIG_HYPERV)
 	hpa_t hv_root_tdp;
 #endif
+	DECLARE_BITMAP(ioapic_pending_vectors, 256);
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 995eb5054360..6f5a88dc63da 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -284,6 +284,8 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
 
 	spin_lock(&ioapic->lock);
 
+	bitmap_zero(vcpu->arch.ioapic_pending_vectors, 256);
+
 	/* Make sure we see any missing RTC EOI */
 	if (test_bit(vcpu->vcpu_id, dest_map->map))
 		__set_bit(dest_map->vectors[vcpu->vcpu_id],
@@ -297,10 +299,15 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
 			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
 
 			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
-						e->fields.dest_id, dm) ||
-			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
+						e->fields.dest_id, dm))
+				__set_bit(e->fields.vector,
+					  ioapic_handled_vectors);
+			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
 				__set_bit(e->fields.vector,
 					  ioapic_handled_vectors);
+				__set_bit(e->fields.vector,
+					  vcpu->arch.ioapic_pending_vectors);
+			}
 		}
 	}
 	spin_unlock(&ioapic->lock);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0f008f5ef6f0..572e6f9b8602 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5710,6 +5710,16 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
 
 	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
 	kvm_apic_set_eoi_accelerated(vcpu, vector);
+
+	/* When there are instances where ioapic_handled_vectors is
+	 * set due to pending interrupts, clean up the record and the
+	 * corresponding bit after the interrupt is completed.
+	 */
+	if (test_bit(vector, vcpu->arch.ioapic_pending_vectors)) {
+		clear_bit(vector, vcpu->arch.ioapic_pending_vectors);
+		clear_bit(vector, vcpu->arch.ioapic_handled_vectors);
+		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
+	}
 	return 1;
 }
 
-- 
2.43.5
Re: [PATCH] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits
Posted by Sean Christopherson 1 year, 1 month ago
On Thu, Nov 21, 2024, weizijie wrote:
> Address performance issues caused by a vector being reused by a
> non-IOAPIC source.
> 
> commit 0fc5a36dd6b3
> ("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
> addressed the issues related to EOI and IOAPIC reconfiguration races.
> However, it has introduced some performance concerns:
> 
> Configuring IOAPIC interrupts while an interrupt request (IRQ) is
> already in service can unintentionally trigger a VM exit for other
> interrupts that normally do not require one, due to the settings of
> `ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
> runtime, this issue persists, continuing to adversely affect
> performance.
> 
> Simple Fix Proposal:
> A straightforward solution is to record the vector that is pending at
> the time of injection. Then, upon the next guest exit, clean up the
> ioapic_handled_vectors corresponding to the vector number that was
> pending. This ensures that interrupts are properly handled and prevents
> performance issues.
> 
> Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
> Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>

Your SoB should be last, and assuming Xuyun is a co-author, they need to be
credited via Co-developed-by.  See Documentation/process/submitting-patches.rst
for details.

> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/ioapic.c           | 11 +++++++++--
>  arch/x86/kvm/vmx/vmx.c          | 10 ++++++++++
>  3 files changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index e159e44a6a1b..b008c933d2ab 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1041,6 +1041,7 @@ struct kvm_vcpu_arch {
>  #if IS_ENABLED(CONFIG_HYPERV)
>  	hpa_t hv_root_tdp;
>  #endif
> +	DECLARE_BITMAP(ioapic_pending_vectors, 256);
>  };
>  
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
> index 995eb5054360..6f5a88dc63da 100644
> --- a/arch/x86/kvm/ioapic.c
> +++ b/arch/x86/kvm/ioapic.c
> @@ -284,6 +284,8 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)

The split IRQ chip mode should have the same enhancement.

>  	spin_lock(&ioapic->lock);
>  
> +	bitmap_zero(vcpu->arch.ioapic_pending_vectors, 256);

Rather than use a bitmap, what does performance look like if this is a single u8
that tracks the highest in-service IRQ at the time of the last scan?  And then
when that IRQ is EOI'd, do a full KVM_REQ_SCAN_IOAPIC instead of
KVM_REQ_LOAD_EOI_EXITMAP?  Having multiple interrupts in-flight at the time of
scan should be quite rare.

I like the idea, but burning 32 bytes for an edge case of an edge case seems
unnecessary.
 
> +
>  	/* Make sure we see any missing RTC EOI */
>  	if (test_bit(vcpu->vcpu_id, dest_map->map))
>  		__set_bit(dest_map->vectors[vcpu->vcpu_id],
> @@ -297,10 +299,15 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
>  			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
>  
>  			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
> -						e->fields.dest_id, dm) ||
> -			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
> +						e->fields.dest_id, dm))
> +				__set_bit(e->fields.vector,
> +					  ioapic_handled_vectors);
> +			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
>  				__set_bit(e->fields.vector,
>  					  ioapic_handled_vectors);
> +				__set_bit(e->fields.vector,
> +					  vcpu->arch.ioapic_pending_vectors);
> +			}
>  		}
>  	}
>  	spin_unlock(&ioapic->lock);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 0f008f5ef6f0..572e6f9b8602 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5710,6 +5710,16 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
>  
>  	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
>  	kvm_apic_set_eoi_accelerated(vcpu, vector);
> +
> +	/* When there are instances where ioapic_handled_vectors is
> +	 * set due to pending interrupts, clean up the record and the
> +	 * corresponding bit after the interrupt is completed.
> +	 */
> +	if (test_bit(vector, vcpu->arch.ioapic_pending_vectors)) {

This belongs in common code, probably kvm_ioapic_send_eoi().

> +		clear_bit(vector, vcpu->arch.ioapic_pending_vectors);
> +		clear_bit(vector, vcpu->arch.ioapic_handled_vectors);
> +		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
> +	}
>  	return 1;
>  }
>  
> -- 
> 2.43.5
>
Re: [PATCH] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits
Posted by wzj 1 year, 1 month ago
On December 18, 2024, Sean Christopherson wrote:
> On Thu, Nov 21, 2024, weizijie wrote:
>> Address performance issues caused by a vector being reused by a
>> non-IOAPIC source.
>>
>> commit 0fc5a36dd6b3
>> ("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
>> addressed the issues related to EOI and IOAPIC reconfiguration races.
>> However, it has introduced some performance concerns:
>>
>> Configuring IOAPIC interrupts while an interrupt request (IRQ) is
>> already in service can unintentionally trigger a VM exit for other
>> interrupts that normally do not require one, due to the settings of
>> `ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
>> runtime, this issue persists, continuing to adversely affect
>> performance.
>>
>> Simple Fix Proposal:
>> A straightforward solution is to record the vector that is pending at
>> the time of injection. Then, upon the next guest exit, clean up the
>> ioapic_handled_vectors corresponding to the vector number that was
>> pending. This ensures that interrupts are properly handled and prevents
>> performance issues.
>>
>> Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
>> Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
> Your SoB should be last, and assuming Xuyun is a co-author, they need to be
> credited via Co-developed-by.  See Documentation/process/submitting-patches.rst
> for details.

I'm sincerely apologize, that was my oversight. I will add 
Co-developed-by and move my SoB to the end.

>
>> ---
>>   arch/x86/include/asm/kvm_host.h |  1 +
>>   arch/x86/kvm/ioapic.c           | 11 +++++++++--
>>   arch/x86/kvm/vmx/vmx.c          | 10 ++++++++++
>>   3 files changed, 20 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index e159e44a6a1b..b008c933d2ab 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -1041,6 +1041,7 @@ struct kvm_vcpu_arch {
>>   #if IS_ENABLED(CONFIG_HYPERV)
>>   	hpa_t hv_root_tdp;
>>   #endif
>> +	DECLARE_BITMAP(ioapic_pending_vectors, 256);
>>   };
>>   
>>   struct kvm_lpage_info {
>> diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
>> index 995eb5054360..6f5a88dc63da 100644
>> --- a/arch/x86/kvm/ioapic.c
>> +++ b/arch/x86/kvm/ioapic.c
>> @@ -284,6 +284,8 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
> The split IRQ chip mode should have the same enhancement.

You are absolutely right; the split IRQ has the same issue.

We will apply the same enhancement here as will.

>
>>   	spin_lock(&ioapic->lock);
>>   
>> +	bitmap_zero(vcpu->arch.ioapic_pending_vectors, 256);
> Rather than use a bitmap, what does performance look like if this is a single u8
> that tracks the highest in-service IRQ at the time of the last scan?  And then
> when that IRQ is EOI'd, do a full KVM_REQ_SCAN_IOAPIC instead of
> KVM_REQ_LOAD_EOI_EXITMAP?  Having multiple interrupts in-flight at the time of
> scan should be quite rare.
>
> I like the idea, but burning 32 bytes for an edge case of an edge case seems
> unnecessary.

This is truly an excellent modification suggestion. Your comprehensive 
consideration is impressive. Using just a u8 to record highest 
in-service IRQ and only redoing a full KVM_REQ_SCAN_IOAPIC when the 
recorded IRQ is EOI'd not only avoids impacting other interrupts that 
should cause a vm exit, but also saves space.

>   
>> +
>>   	/* Make sure we see any missing RTC EOI */
>>   	if (test_bit(vcpu->vcpu_id, dest_map->map))
>>   		__set_bit(dest_map->vectors[vcpu->vcpu_id],
>> @@ -297,10 +299,15 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
>>   			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
>>   
>>   			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
>> -						e->fields.dest_id, dm) ||
>> -			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
>> +						e->fields.dest_id, dm))
>> +				__set_bit(e->fields.vector,
>> +					  ioapic_handled_vectors);
>> +			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
>>   				__set_bit(e->fields.vector,
>>   					  ioapic_handled_vectors);
>> +				__set_bit(e->fields.vector,
>> +					  vcpu->arch.ioapic_pending_vectors);
>> +			}
>>   		}
>>   	}
>>   	spin_unlock(&ioapic->lock);
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index 0f008f5ef6f0..572e6f9b8602 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -5710,6 +5710,16 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
>>   
>>   	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
>>   	kvm_apic_set_eoi_accelerated(vcpu, vector);
>> +
>> +	/* When there are instances where ioapic_handled_vectors is
>> +	 * set due to pending interrupts, clean up the record and the
>> +	 * corresponding bit after the interrupt is completed.
>> +	 */
>> +	if (test_bit(vector, vcpu->arch.ioapic_pending_vectors)) {
> This belongs in common code, probably kvm_ioapic_send_eoi().
We make the code on the common path as simple as possible.
>> +		clear_bit(vector, vcpu->arch.ioapic_pending_vectors);
>> +		clear_bit(vector, vcpu->arch.ioapic_handled_vectors);
>> +		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
>> +	}
>>   	return 1;
>>   }
>>   
KVM: x86: ioapic: Optimize EOI handling to reduce
  unnecessary VM exits

Address performance issues caused by a vector being reused by a
non-IOAPIC source.

commit 0fc5a36dd6b3
("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
addressed the issues related to EOI and IOAPIC reconfiguration races.
However, it has introduced some performance concerns:

Configuring IOAPIC interrupts while an interrupt request (IRQ) is
already in service can unintentionally trigger a VM exit for other
interrupts that normally do not require one, due to the settings of
`ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
runtime, this issue persists, continuing to adversely affect
performance.

Simple Fix Proposal:
A straightforward solution is to record highest in-service IRQ that
is pending at the time of the last scan. Then, upon the next guest
exit, do a full KVM_REQ_SCAN_IOAPIC. This ensures that a re-scan of
the ioapic occurs only when the recorded vector is EOI'd, and
subsequently, the extra bit in the eoi_exit_bitmap are cleared,
avoiding unnecessary VM exits.

Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
---
v1 -> v2

* Move my SoB to the end and add Co-developed-by for Xuyun

* Use a u8 type to record a pending IRQ during the ioapic scan process

* Made the same changes for the split IRQ chip mode

arch/x86/include/asm/kvm_host.h |  1 +
  arch/x86/kvm/ioapic.c           | 10 ++++++++--
  arch/x86/kvm/irq_comm.c         |  9 +++++++--
  arch/x86/kvm/vmx/vmx.c          |  9 +++++++++
  4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h 
b/arch/x86/include/asm/kvm_host.h
index e159e44a6a1b..f84a4881afa4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1041,6 +1041,7 @@ struct kvm_vcpu_arch {
  #if IS_ENABLED(CONFIG_HYPERV)
         hpa_t hv_root_tdp;
  #endif
+       u8 last_pending_vector;
  };

  struct kvm_lpage_info {
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 995eb5054360..40252a800897 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -297,10 +297,16 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, 
ulong *ioapic_handled_vectors)
                         u16 dm = 
kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);

                         if (kvm_apic_match_dest(vcpu, NULL, 
APIC_DEST_NOSHORT,
- e->fields.dest_id, dm) ||
-                           kvm_apic_pending_eoi(vcpu, e->fields.vector))
+ e->fields.dest_id, dm))
                                 __set_bit(e->fields.vector,
                                           ioapic_handled_vectors);
+                       else if (kvm_apic_pending_eoi(vcpu, 
e->fields.vector)) {
+                               __set_bit(e->fields.vector,
+                                         ioapic_handled_vectors);
+                               vcpu->arch.last_pending_vector = 
e->fields.vector >
+ vcpu->arch.last_pending_vector ? e->fields.vector :
+ vcpu->arch.last_pending_vector;
+                       }
                 }
         }
         spin_unlock(&ioapic->lock);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8136695f7b96..1d23c52576e1 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -426,9 +426,14 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,

                         if (irq.trig_mode &&
                             (kvm_apic_match_dest(vcpu, NULL, 
APIC_DEST_NOSHORT,
-                                                irq.dest_id, 
irq.dest_mode) ||
-                            kvm_apic_pending_eoi(vcpu, irq.vector)))
+                                                irq.dest_id, 
irq.dest_mode)))
                                 __set_bit(irq.vector, 
ioapic_handled_vectors);
+                       else if (kvm_apic_pending_eoi(vcpu, irq.vector)) {
+                               __set_bit(irq.vector, 
ioapic_handled_vectors);
+                               vcpu->arch.last_pending_vector = 
irq.vector >
+ vcpu->arch.last_pending_vector ? irq.vector :
+ vcpu->arch.last_pending_vector;
+                       }
                 }
         }
         srcu_read_unlock(&kvm->irq_srcu, idx);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 893366e53732..cd0db1496ce7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5702,6 +5702,15 @@ static int handle_apic_eoi_induced(struct 
kvm_vcpu *vcpu)

         /* EOI-induced VM exit is trap-like and thus no need to adjust 
IP */
         kvm_apic_set_eoi_accelerated(vcpu, vector);
+
+       /* When there are instances where ioapic_handled_vectors is
+        * set due to pending interrupts, clean up the record and do
+        * a full KVM_REQ_SCAN_IOAPIC.
+        */
+       if (vcpu->arch.last_pending_vector == vector) {
+               vcpu->arch.last_pending_vector = 0;
+               kvm_make_request(KVM_REQ_SCAN_IOAPIC, vcpu);
+       }
         return 1;
  }

>> -- 
>> 2.43.5
>>
Re: [PATCH] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits
Posted by Sean Christopherson 12 months ago
On Fri, Dec 27, 2024, wzj wrote:
> On December 18, 2024, Sean Christopherson wrote:
> > On Thu, Nov 21, 2024, weizijie wrote:
> We make the code on the common path as simple as possible.
> > > +		clear_bit(vector, vcpu->arch.ioapic_pending_vectors);
> > > +		clear_bit(vector, vcpu->arch.ioapic_handled_vectors);
> > > +		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
> > > +	}
> > >   	return 1;
> > >   }
> KVM: x86: ioapic: Optimize EOI handling to reduce
>  unnecessary VM exits
> 
> Address performance issues caused by a vector being reused by a
> non-IOAPIC source.
> 
> commit 0fc5a36dd6b3
> ("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
> addressed the issues related to EOI and IOAPIC reconfiguration races.
> However, it has introduced some performance concerns:
> 
> Configuring IOAPIC interrupts while an interrupt request (IRQ) is
> already in service can unintentionally trigger a VM exit for other
> interrupts that normally do not require one, due to the settings of
> `ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
> runtime, this issue persists, continuing to adversely affect
> performance.
> 
> Simple Fix Proposal:
> A straightforward solution is to record highest in-service IRQ that
> is pending at the time of the last scan. Then, upon the next guest
> exit, do a full KVM_REQ_SCAN_IOAPIC. This ensures that a re-scan of
> the ioapic occurs only when the recorded vector is EOI'd, and
> subsequently, the extra bit in the eoi_exit_bitmap are cleared,
> avoiding unnecessary VM exits.
> 
> Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
> Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
> Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
> ---

This is whitespace damaged beyond what I am willing to massage and apply, and
honestly even beyond what I am willing to review.  Please fix your setup and
resend.

> v1 -> v2
> 
> * Move my SoB to the end and add Co-developed-by for Xuyun
> 
> * Use a u8 type to record a pending IRQ during the ioapic scan process
> 
> * Made the same changes for the split IRQ chip mode
> 
> arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/ioapic.c           | 10 ++++++++--
>  arch/x86/kvm/irq_comm.c         |  9 +++++++--
>  arch/x86/kvm/vmx/vmx.c          |  9 +++++++++
>  4 files changed, 25 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> index e159e44a6a1b..f84a4881afa4 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1041,6 +1041,7 @@ struct kvm_vcpu_arch {
>  #if IS_ENABLED(CONFIG_HYPERV)
>         hpa_t hv_root_tdp;
>  #endif
> +       u8 last_pending_vector;
>  };
> 
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
> index 995eb5054360..40252a800897 100644
> --- a/arch/x86/kvm/ioapic.c
> +++ b/arch/x86/kvm/ioapic.c
> @@ -297,10 +297,16 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
> ulong *ioapic_handled_vectors)
>                         u16 dm =
> kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
> 
>                         if (kvm_apic_match_dest(vcpu, NULL,
> APIC_DEST_NOSHORT,
> - e->fields.dest_id, dm) ||
> -                           kvm_apic_pending_eoi(vcpu, e->fields.vector))
> + e->fields.dest_id, dm))
>                                 __set_bit(e->fields.vector,
>                                           ioapic_handled_vectors);
> +                       else if (kvm_apic_pending_eoi(vcpu,
> e->fields.vector)) {
> +                               __set_bit(e->fields.vector,
> +                                         ioapic_handled_vectors);
> +                               vcpu->arch.last_pending_vector =
> e->fields.vector >
> + vcpu->arch.last_pending_vector ? e->fields.vector :
> + vcpu->arch.last_pending_vector;
> +                       }
>                 }
>         }
>         spin_unlock(&ioapic->lock);
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 8136695f7b96..1d23c52576e1 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -426,9 +426,14 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
> 
>                         if (irq.trig_mode &&
>                             (kvm_apic_match_dest(vcpu, NULL,
> APIC_DEST_NOSHORT,
> -                                                irq.dest_id, irq.dest_mode)
> ||
> -                            kvm_apic_pending_eoi(vcpu, irq.vector)))
> +                                                irq.dest_id,
> irq.dest_mode)))
>                                 __set_bit(irq.vector,
> ioapic_handled_vectors);
> +                       else if (kvm_apic_pending_eoi(vcpu, irq.vector)) {
> +                               __set_bit(irq.vector,
> ioapic_handled_vectors);
> +                               vcpu->arch.last_pending_vector = irq.vector
> >
> + vcpu->arch.last_pending_vector ? irq.vector :
> + vcpu->arch.last_pending_vector;
> +                       }
>                 }
>         }
>         srcu_read_unlock(&kvm->irq_srcu, idx);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 893366e53732..cd0db1496ce7 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5702,6 +5702,15 @@ static int handle_apic_eoi_induced(struct kvm_vcpu
> *vcpu)
> 
>         /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
>         kvm_apic_set_eoi_accelerated(vcpu, vector);
> +
> +       /* When there are instances where ioapic_handled_vectors is
> +        * set due to pending interrupts, clean up the record and do
> +        * a full KVM_REQ_SCAN_IOAPIC.
> +        */
> +       if (vcpu->arch.last_pending_vector == vector) {
> +               vcpu->arch.last_pending_vector = 0;
> +               kvm_make_request(KVM_REQ_SCAN_IOAPIC, vcpu);
> +       }
>         return 1;
>  }
> 
> > > -- 
> > > 2.43.5
> > > 
[PATCH Resend] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits
Posted by weizijie 11 months, 2 weeks ago
Address performance issues caused by a vector being reused by a
non-IOAPIC source.

Commit 0fc5a36dd6b3
("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
addressed the issues related to EOI and IOAPIC reconfiguration races.
However, it has introduced some performance concerns:

Configuring IOAPIC interrupts while an interrupt request (IRQ) is
already in service can unintentionally trigger a VM exit for other
interrupts that normally do not require one, due to the settings of
`ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
runtime, this issue persists, continuing to adversely affect
performance.

Simple Fix Proposal:
A straightforward solution is to record highest in-service IRQ that
is pending at the time of the last scan. Then, upon the next guest
exit, do a full KVM_REQ_SCAN_IOAPIC. This ensures that a re-scan of
the ioapic occurs only when the recorded vector is EOI'd, and
subsequently, the extra bit in the eoi_exit_bitmap are cleared,
avoiding unnecessary VM exits.

Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/ioapic.c           | 10 ++++++++--
 arch/x86/kvm/irq_comm.c         |  9 +++++++--
 arch/x86/kvm/vmx/vmx.c          |  9 +++++++++
 4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0b7af5902ff7..8c50e7b4a96f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1062,6 +1062,7 @@ struct kvm_vcpu_arch {
 #if IS_ENABLED(CONFIG_HYPERV)
 	hpa_t hv_root_tdp;
 #endif
+	u8 last_pending_vector;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 995eb5054360..40252a800897 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -297,10 +297,16 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
 			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
 
 			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
-						e->fields.dest_id, dm) ||
-			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
+						e->fields.dest_id, dm))
 				__set_bit(e->fields.vector,
 					  ioapic_handled_vectors);
+			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
+				__set_bit(e->fields.vector,
+					  ioapic_handled_vectors);
+				vcpu->arch.last_pending_vector = e->fields.vector >
+					vcpu->arch.last_pending_vector ? e->fields.vector :
+					vcpu->arch.last_pending_vector;
+			}
 		}
 	}
 	spin_unlock(&ioapic->lock);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8136695f7b96..1d23c52576e1 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -426,9 +426,14 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
 
 			if (irq.trig_mode &&
 			    (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
-						 irq.dest_id, irq.dest_mode) ||
-			     kvm_apic_pending_eoi(vcpu, irq.vector)))
+						 irq.dest_id, irq.dest_mode)))
 				__set_bit(irq.vector, ioapic_handled_vectors);
+			else if (kvm_apic_pending_eoi(vcpu, irq.vector)) {
+				__set_bit(irq.vector, ioapic_handled_vectors);
+				vcpu->arch.last_pending_vector = irq.vector >
+					vcpu->arch.last_pending_vector ? irq.vector :
+					vcpu->arch.last_pending_vector;
+			}
 		}
 	}
 	srcu_read_unlock(&kvm->irq_srcu, idx);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6c56d5235f0f..047cdd5964e5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5712,6 +5712,15 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
 
 	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
 	kvm_apic_set_eoi_accelerated(vcpu, vector);
+
+	/* When there are instances where ioapic_handled_vectors is
+	 * set due to pending interrupts, clean up the record and do
+	 * a full KVM_REQ_SCAN_IOAPIC.
+	 */
+	if (vcpu->arch.last_pending_vector == vector) {
+		vcpu->arch.last_pending_vector = 0;
+		kvm_make_request(KVM_REQ_SCAN_IOAPIC, vcpu);
+	}
 	return 1;
 }
 
-- 
2.43.5
Re: [PATCH Resend] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits
Posted by Huang, Kai 11 months, 2 weeks ago

On 25/02/2025 7:42 pm, weizijie wrote:
> Address performance issues caused by a vector being reused by a
> non-IOAPIC source.
> 
> Commit 0fc5a36dd6b3
> ("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
> addressed the issues related to EOI and IOAPIC reconfiguration races.
> However, it has introduced some performance concerns:
> 
> Configuring IOAPIC interrupts while an interrupt request (IRQ) is
> already in service can unintentionally trigger a VM exit for other
> interrupts that normally do not require one, due to the settings of
> `ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
> runtime, this issue persists, continuing to adversely affect
> performance.

Could you elaborate why the guest would configure the IOAPIC entry to 
use the same vector of an IRQ which is already in service?  Is it some 
kinda temporary configuration (which means the guest will either the 
reconfigure the vector of the conflicting IRQ or the IOAPIC entry soon)?

I.e., why would this issue persist?

If such "persist" is due to guest bug or bad behaviour I am not sure we 
need to tackle that in KVM.

> 
> Simple Fix Proposal:
> A straightforward solution is to record highest in-service IRQ that
> is pending at the time of the last scan. Then, upon the next guest
> exit, do a full KVM_REQ_SCAN_IOAPIC. This ensures that a re-scan of
> the ioapic occurs only when the recorded vector is EOI'd, and
> subsequently, the extra bit in the eoi_exit_bitmap are cleared,
> avoiding unnecessary VM exits.
> 
> Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
> Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
> Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
> ---
>   arch/x86/include/asm/kvm_host.h |  1 +
>   arch/x86/kvm/ioapic.c           | 10 ++++++++--
>   arch/x86/kvm/irq_comm.c         |  9 +++++++--
>   arch/x86/kvm/vmx/vmx.c          |  9 +++++++++
>   4 files changed, 25 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 0b7af5902ff7..8c50e7b4a96f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1062,6 +1062,7 @@ struct kvm_vcpu_arch {
>   #if IS_ENABLED(CONFIG_HYPERV)
>   	hpa_t hv_root_tdp;
>   #endif
> +	u8 last_pending_vector;
>   };
>   
>   struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
> index 995eb5054360..40252a800897 100644
> --- a/arch/x86/kvm/ioapic.c
> +++ b/arch/x86/kvm/ioapic.c
> @@ -297,10 +297,16 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
>   			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
>   
>   			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
> -						e->fields.dest_id, dm) ||
> -			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
> +						e->fields.dest_id, dm))
>   				__set_bit(e->fields.vector,
>   					  ioapic_handled_vectors);
> +			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
> +				__set_bit(e->fields.vector,
> +					  ioapic_handled_vectors);
> +				vcpu->arch.last_pending_vector = e->fields.vector >
> +					vcpu->arch.last_pending_vector ? e->fields.vector :
> +					vcpu->arch.last_pending_vector;
> +			}
>   		}
>   	}
>   	spin_unlock(&ioapic->lock);
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 8136695f7b96..1d23c52576e1 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -426,9 +426,14 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
>   
>   			if (irq.trig_mode &&
>   			    (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
> -						 irq.dest_id, irq.dest_mode) ||
> -			     kvm_apic_pending_eoi(vcpu, irq.vector)))
> +						 irq.dest_id, irq.dest_mode)))
>   				__set_bit(irq.vector, ioapic_handled_vectors);
> +			else if (kvm_apic_pending_eoi(vcpu, irq.vector)) {
> +				__set_bit(irq.vector, ioapic_handled_vectors);
> +				vcpu->arch.last_pending_vector = irq.vector >
> +					vcpu->arch.last_pending_vector ? irq.vector :
> +					vcpu->arch.last_pending_vector;
> +			}
>   		}
>   	}
>   	srcu_read_unlock(&kvm->irq_srcu, idx);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 6c56d5235f0f..047cdd5964e5 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5712,6 +5712,15 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
>   
>   	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
>   	kvm_apic_set_eoi_accelerated(vcpu, vector);
> +
> +	/* When there are instances where ioapic_handled_vectors is
> +	 * set due to pending interrupts, clean up the record and do
> +	 * a full KVM_REQ_SCAN_IOAPIC.
> +	 */

Comment style:

	/*
	 * When ...
	 */

> +	if (vcpu->arch.last_pending_vector == vector) {
> +		vcpu->arch.last_pending_vector = 0;
> +		kvm_make_request(KVM_REQ_SCAN_IOAPIC, vcpu);
> +	}

As Sean commented before, this should be in a common code probably in 
kvm_ioapic_send_eoi().

https://lore.kernel.org/all/Z2IDkWPz2rhDLD0P@google.com/
Re: [PATCH Resend] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits
Posted by wzj 11 months, 2 weeks ago

On 2025/2/26 22:44, Huang, Kai wrote:
> 
> 
> On 25/02/2025 7:42 pm, weizijie wrote:
>> Address performance issues caused by a vector being reused by a
>> non-IOAPIC source.
>>
>> Commit 0fc5a36dd6b3
>> ("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
>> addressed the issues related to EOI and IOAPIC reconfiguration races.
>> However, it has introduced some performance concerns:
>>
>> Configuring IOAPIC interrupts while an interrupt request (IRQ) is
>> already in service can unintentionally trigger a VM exit for other
>> interrupts that normally do not require one, due to the settings of
>> `ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
>> runtime, this issue persists, continuing to adversely affect
>> performance.
> 
> Could you elaborate why the guest would configure the IOAPIC entry to 
> use the same vector of an IRQ which is already in service?  Is it some 
> kinda temporary configuration (which means the guest will either the 
> reconfigure the vector of the conflicting IRQ or the IOAPIC entry soon)?
> 
> I.e., why would this issue persist?
> 
> If such "persist" is due to guest bug or bad behaviour I am not sure we 
> need to tackle that in KVM.
> 

The previous patches:
db2bdcbbbd32 (KVM: x86: fix edge EOI and IOAPIC reconfig race)
0fc5a36dd6b3 (KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC 
reconfigure race)
both mentioned this issue.

For example, when there is an interrupt being processed on CPU0 with 
vector 33, and an IOAPIC interrupt reconfiguration occurs at that 
moment, an interrupt is assigned to CPU1, and this interrupt’s vector
is also 33 (it could also be another value). At this point, the 
interrupt being processed, which originally did not need to cause a VM 
exit, will now need to continuously cause a VM exit afterward.
You are absolutely correct; if the guest triggers an IOAPIC interrupt 
reconfiguration again afterward and does not encounter the 
aforementioned situation, then vector 33 on CPU0 will no longer need to 
cause a VM exit.

>>
>> Simple Fix Proposal:
>> A straightforward solution is to record highest in-service IRQ that
>> is pending at the time of the last scan. Then, upon the next guest
>> exit, do a full KVM_REQ_SCAN_IOAPIC. This ensures that a re-scan of
>> the ioapic occurs only when the recorded vector is EOI'd, and
>> subsequently, the extra bit in the eoi_exit_bitmap are cleared,
>> avoiding unnecessary VM exits.
>>
>> Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
>> Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
>> Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
>> ---
>>   arch/x86/include/asm/kvm_host.h |  1 +
>>   arch/x86/kvm/ioapic.c           | 10 ++++++++--
>>   arch/x86/kvm/irq_comm.c         |  9 +++++++--
>>   arch/x86/kvm/vmx/vmx.c          |  9 +++++++++
>>   4 files changed, 25 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/ 
>> kvm_host.h
>> index 0b7af5902ff7..8c50e7b4a96f 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -1062,6 +1062,7 @@ struct kvm_vcpu_arch {
>>   #if IS_ENABLED(CONFIG_HYPERV)
>>       hpa_t hv_root_tdp;
>>   #endif
>> +    u8 last_pending_vector;
>>   };
>>   struct kvm_lpage_info {
>> diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
>> index 995eb5054360..40252a800897 100644
>> --- a/arch/x86/kvm/ioapic.c
>> +++ b/arch/x86/kvm/ioapic.c
>> @@ -297,10 +297,16 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu 
>> *vcpu, ulong *ioapic_handled_vectors)
>>               u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
>>               if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
>> -                        e->fields.dest_id, dm) ||
>> -                kvm_apic_pending_eoi(vcpu, e->fields.vector))
>> +                        e->fields.dest_id, dm))
>>                   __set_bit(e->fields.vector,
>>                         ioapic_handled_vectors);
>> +            else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
>> +                __set_bit(e->fields.vector,
>> +                      ioapic_handled_vectors);
>> +                vcpu->arch.last_pending_vector = e->fields.vector >
>> +                    vcpu->arch.last_pending_vector ? e->fields.vector :
>> +                    vcpu->arch.last_pending_vector;
>> +            }
>>           }
>>       }
>>       spin_unlock(&ioapic->lock);
>> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
>> index 8136695f7b96..1d23c52576e1 100644
>> --- a/arch/x86/kvm/irq_comm.c
>> +++ b/arch/x86/kvm/irq_comm.c
>> @@ -426,9 +426,14 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
>>               if (irq.trig_mode &&
>>                   (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
>> -                         irq.dest_id, irq.dest_mode) ||
>> -                 kvm_apic_pending_eoi(vcpu, irq.vector)))
>> +                         irq.dest_id, irq.dest_mode)))
>>                   __set_bit(irq.vector, ioapic_handled_vectors);
>> +            else if (kvm_apic_pending_eoi(vcpu, irq.vector)) {
>> +                __set_bit(irq.vector, ioapic_handled_vectors);
>> +                vcpu->arch.last_pending_vector = irq.vector >
>> +                    vcpu->arch.last_pending_vector ? irq.vector :
>> +                    vcpu->arch.last_pending_vector;
>> +            }
>>           }
>>       }
>>       srcu_read_unlock(&kvm->irq_srcu, idx);
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index 6c56d5235f0f..047cdd5964e5 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -5712,6 +5712,15 @@ static int handle_apic_eoi_induced(struct 
>> kvm_vcpu *vcpu)
>>       /* EOI-induced VM exit is trap-like and thus no need to adjust 
>> IP */
>>       kvm_apic_set_eoi_accelerated(vcpu, vector);
>> +
>> +    /* When there are instances where ioapic_handled_vectors is
>> +     * set due to pending interrupts, clean up the record and do
>> +     * a full KVM_REQ_SCAN_IOAPIC.
>> +     */
> 
> Comment style:
> 
>      /*
>       * When ...
>       */
> 

Thank you very much for your suggestion.

>> +    if (vcpu->arch.last_pending_vector == vector) {
>> +        vcpu->arch.last_pending_vector = 0;
>> +        kvm_make_request(KVM_REQ_SCAN_IOAPIC, vcpu);
>> +    }
> 
> As Sean commented before, this should be in a common code probably in 
> kvm_ioapic_send_eoi().
> 

I will move the modifications here and send a new patch.

> https://lore.kernel.org/all/Z2IDkWPz2rhDLD0P@google.com/

Best regards!

[PATCH v2] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits
Posted by weizijie 1 year, 1 month ago
Address performance issues caused by a vector being reused by a
non-IOAPIC source.

commit 0fc5a36dd6b3
("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
addressed the issues related to EOI and IOAPIC reconfiguration races.
However, it has introduced some performance concerns:

Configuring IOAPIC interrupts while an interrupt request (IRQ) is
already in service can unintentionally trigger a VM exit for other
interrupts that normally do not require one, due to the settings of
`ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
runtime, this issue persists, continuing to adversely affect
performance.

Simple Fix Proposal:
A straightforward solution is to record a vector that is pending at
the time of the last scan. Then, upon the next guest exit, do a full
KVM_REQ_SCAN_IOAPIC. This ensures that a re-scan of the ioapic occurs
only when the recorded vector is EOI'd, and subsequently, the extra
bit in the eoi_exit_bitmap are cleared, avoiding unnecessary VM exits.

Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/ioapic.c           | 8 ++++++--
 arch/x86/kvm/irq_comm.c         | 7 +++++--
 arch/x86/kvm/vmx/vmx.c          | 9 +++++++++
 4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e159e44a6a1b..f84a4881afa4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1041,6 +1041,7 @@ struct kvm_vcpu_arch {
 #if IS_ENABLED(CONFIG_HYPERV)
 	hpa_t hv_root_tdp;
 #endif
+	u8 last_pending_vector;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 995eb5054360..6b203f0847ec 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -297,10 +297,14 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
 			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
 
 			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
-						e->fields.dest_id, dm) ||
-			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
+						e->fields.dest_id, dm))
 				__set_bit(e->fields.vector,
 					  ioapic_handled_vectors);
+			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
+				__set_bit(e->fields.vector,
+					  ioapic_handled_vectors);
+				vcpu->arch.last_pending_vector = e->fields.vector;
+			}
 		}
 	}
 	spin_unlock(&ioapic->lock);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8136695f7b96..ca45be1503f4 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -426,9 +426,12 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
 
 			if (irq.trig_mode &&
 			    (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
-						 irq.dest_id, irq.dest_mode) ||
-			     kvm_apic_pending_eoi(vcpu, irq.vector)))
+						 irq.dest_id, irq.dest_mode)))
 				__set_bit(irq.vector, ioapic_handled_vectors);
+			else if (kvm_apic_pending_eoi(vcpu, irq.vector)) {
+				__set_bit(irq.vector, ioapic_handled_vectors);
+				vcpu->arch.last_pending_vector = irq.vector;
+			}
 		}
 	}
 	srcu_read_unlock(&kvm->irq_srcu, idx);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0f008f5ef6f0..2abf67e76780 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5710,6 +5710,15 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
 
 	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
 	kvm_apic_set_eoi_accelerated(vcpu, vector);
+
+	/* When there are instances where ioapic_handled_vectors is
+	 * set due to pending interrupts, clean up the record and do
+	 * a full KVM_REQ_SCAN_IOAPIC.
+	 */
+	if (vcpu->arch.last_pending_vector == vector) {
+		vcpu->arch.last_pending_vector = 0;
+		kvm_make_request(KVM_REQ_SCAN_IOAPIC, vcpu);
+	}
 	return 1;
 }
 
-- 
2.43.5
[PATCH v4] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits
Posted by weizijie 11 months, 1 week ago
Configuring IOAPIC routed interrupts triggers KVM to rescan all vCPU's
ioapic_handled_vectors which is used to control which vectors need to
trigger EOI-induced VMEXITs. If any interrupt is already in service on
some vCPU using some vector when the IOAPIC is being rescanned, the
vector is set to vCPU's ioapic_handled_vectors. If the vector is then
reused by other interrupts, each of them will cause a VMEXIT even it is
unnecessary. W/o further IOAPIC rescan, the vector remains set, and this
issue persists, impacting guest's interrupt performance.

Both

  commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race")

and

  commit 0fc5a36dd6b3 ("KVM: x86: ioapic: Fix level-triggered EOI and
IOAPIC reconfigure race")

mentioned this issue, but it was considered as "rare" thus was not
addressed. However in real environment this issue can actually happen
in a well-behaved guest.

Simple Fix Proposal:
A straightforward solution is to record highest in-service IRQ that
is pending at the time of the last scan. Then, upon the next guest
exit, do a full KVM_REQ_SCAN_IOAPIC. This ensures that a re-scan of
the ioapic occurs only when the recorded vector is EOI'd, and
subsequently, the extra bits in the eoi_exit_bitmap are cleared,
avoiding unnecessary VM exits.

Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
Reviewed-by: Kai Huang <kai.huang@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/ioapic.c           | 10 ++++++++--
 arch/x86/kvm/irq_comm.c         |  9 +++++++--
 arch/x86/kvm/lapic.c            | 13 +++++++++++++
 4 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0b7af5902ff7..8c50e7b4a96f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1062,6 +1062,7 @@ struct kvm_vcpu_arch {
 #if IS_ENABLED(CONFIG_HYPERV)
 	hpa_t hv_root_tdp;
 #endif
+	u8 last_pending_vector;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 995eb5054360..40252a800897 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -297,10 +297,16 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
 			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
 
 			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
-						e->fields.dest_id, dm) ||
-			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
+						e->fields.dest_id, dm))
 				__set_bit(e->fields.vector,
 					  ioapic_handled_vectors);
+			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
+				__set_bit(e->fields.vector,
+					  ioapic_handled_vectors);
+				vcpu->arch.last_pending_vector = e->fields.vector >
+					vcpu->arch.last_pending_vector ? e->fields.vector :
+					vcpu->arch.last_pending_vector;
+			}
 		}
 	}
 	spin_unlock(&ioapic->lock);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8136695f7b96..1d23c52576e1 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -426,9 +426,14 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
 
 			if (irq.trig_mode &&
 			    (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
-						 irq.dest_id, irq.dest_mode) ||
-			     kvm_apic_pending_eoi(vcpu, irq.vector)))
+						 irq.dest_id, irq.dest_mode)))
 				__set_bit(irq.vector, ioapic_handled_vectors);
+			else if (kvm_apic_pending_eoi(vcpu, irq.vector)) {
+				__set_bit(irq.vector, ioapic_handled_vectors);
+				vcpu->arch.last_pending_vector = irq.vector >
+					vcpu->arch.last_pending_vector ? irq.vector :
+					vcpu->arch.last_pending_vector;
+			}
 		}
 	}
 	srcu_read_unlock(&kvm->irq_srcu, idx);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a009c94c26c2..7c540a0eb340 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1466,6 +1466,19 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 	if (!kvm_ioapic_handles_vector(apic, vector))
 		return;
 
+	/*
+	 * When there are instances where ioapic_handled_vectors is
+	 * set due to pending interrupts, clean up the record and do
+	 * a full KVM_REQ_SCAN_IOAPIC.
+	 * This ensures the vector is cleared in the vCPU's ioapic_handled_vectors,
+	 * if the vector is reused by non-IOAPIC interrupts, avoiding unnecessary
+	 * EOI-induced VMEXITs for that vector.
+	 */
+	if (apic->vcpu->arch.last_pending_vector == vector) {
+		apic->vcpu->arch.last_pending_vector = 0;
+		kvm_make_request(KVM_REQ_SCAN_IOAPIC, apic->vcpu);
+	}
+
 	/* Request a KVM exit to inform the userspace IOAPIC. */
 	if (irqchip_split(apic->vcpu->kvm)) {
 		apic->vcpu->arch.pending_ioapic_eoi = vector;
-- 
2.43.5

Re: [PATCH v4] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits
Posted by Sean Christopherson 11 months, 1 week ago
Several minor comments.  No need to post v5, I'll do so today as a small series
with preparatory patches to refactor and deduplicate the userspace vs. in-kernel
logic.

On Mon, Mar 03, 2025, weizijie wrote:
> Configuring IOAPIC routed interrupts triggers KVM to rescan all vCPU's
> ioapic_handled_vectors which is used to control which vectors need to
> trigger EOI-induced VMEXITs. If any interrupt is already in service on
> some vCPU using some vector when the IOAPIC is being rescanned, the
> vector is set to vCPU's ioapic_handled_vectors. If the vector is then
> reused by other interrupts, each of them will cause a VMEXIT even it is
> unnecessary. W/o further IOAPIC rescan, the vector remains set, and this
> issue persists, impacting guest's interrupt performance.
> 
> Both
> 
>   commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race")
> 
> and
> 
>   commit 0fc5a36dd6b3 ("KVM: x86: ioapic: Fix level-triggered EOI and
> IOAPIC reconfigure race")
> 
> mentioned this issue, but it was considered as "rare" thus was not
> addressed. However in real environment this issue can actually happen
> in a well-behaved guest.
> 
> Simple Fix Proposal:
> A straightforward solution is to record highest in-service IRQ that
> is pending at the time of the last scan. Then, upon the next guest
> exit, do a full KVM_REQ_SCAN_IOAPIC. This ensures that a re-scan of
> the ioapic occurs only when the recorded vector is EOI'd, and
> subsequently, the extra bits in the eoi_exit_bitmap are cleared,
> avoiding unnecessary VM exits.

Write changelogs as "commands", i.e. state what changes are actually being made,
as opposed to passively describing a proposed/possible change.

> Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
> Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
> Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
> Reviewed-by: Kai Huang <kai.huang@intel.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/ioapic.c           | 10 ++++++++--
>  arch/x86/kvm/irq_comm.c         |  9 +++++++--
>  arch/x86/kvm/lapic.c            | 13 +++++++++++++
>  4 files changed, 29 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 0b7af5902ff7..8c50e7b4a96f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1062,6 +1062,7 @@ struct kvm_vcpu_arch {
>  #if IS_ENABLED(CONFIG_HYPERV)
>  	hpa_t hv_root_tdp;
>  #endif
> +	u8 last_pending_vector;

To be consistent with how KVM handles IRQs, this should probably be an "int" with
-1 as the "no pending EOI" value.

I also think we should go with a verbose name to try and precisely capture what
this field tracks, e.g. highest_stale_pending_ioapic_eoi.  It's abusrdly long,
but with massaging and refactoring the line lengths are a non-issue, and I want
to avoid confusion with pending_ioapic_eoi and highest_isr_cache (and others).

>  };
>  
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
> index 995eb5054360..40252a800897 100644
> --- a/arch/x86/kvm/ioapic.c
> +++ b/arch/x86/kvm/ioapic.c
> @@ -297,10 +297,16 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
>  			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
>  
>  			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
> -						e->fields.dest_id, dm) ||
> -			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
> +						e->fields.dest_id, dm))
>  				__set_bit(e->fields.vector,
>  					  ioapic_handled_vectors);
> +			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
> +				__set_bit(e->fields.vector,
> +					  ioapic_handled_vectors);
> +				vcpu->arch.last_pending_vector = e->fields.vector >
> +					vcpu->arch.last_pending_vector ? e->fields.vector :
> +					vcpu->arch.last_pending_vector;

Eh, no need to use a ternary operator, last_pending_vector only needs to be written
if it's changing.

>  		}
>  	}
>  	spin_unlock(&ioapic->lock);
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 8136695f7b96..1d23c52576e1 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -426,9 +426,14 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
>  
>  			if (irq.trig_mode &&
>  			    (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
> -						 irq.dest_id, irq.dest_mode) ||
> -			     kvm_apic_pending_eoi(vcpu, irq.vector)))
> +						 irq.dest_id, irq.dest_mode)))
>  				__set_bit(irq.vector, ioapic_handled_vectors);
> +			else if (kvm_apic_pending_eoi(vcpu, irq.vector)) {
> +				__set_bit(irq.vector, ioapic_handled_vectors);
> +				vcpu->arch.last_pending_vector = irq.vector >
> +					vcpu->arch.last_pending_vector ? irq.vector :
> +					vcpu->arch.last_pending_vector;

This is wrong.  Well, maybe not "wrong" per se, but unnecessary.  The trig_mode
check guards both the "new" and "old" routing cases, i.e. KVM's behavior is to
intercept EOIs for in-flight IRQs if and only if the IRQ is level-triggered.

This code really needs to be de-duplicated between the userspace and in-kernel
I/O APICs.  It probably should have been de-duplicated by fceb3a36c29a ("KVM: x86:
ioapic: Fix level-triggered EOI and userspace I/OAPIC reconfigure race"), but it's
a much more pressing issue now.

> +			}
>  		}
>  	}
>  	srcu_read_unlock(&kvm->irq_srcu, idx);
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index a009c94c26c2..7c540a0eb340 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1466,6 +1466,19 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
>  	if (!kvm_ioapic_handles_vector(apic, vector))
>  		return;
>  
> +	/*
> +	 * When there are instances where ioapic_handled_vectors is
> +	 * set due to pending interrupts, clean up the record and do
> +	 * a full KVM_REQ_SCAN_IOAPIC.
> +	 * This ensures the vector is cleared in the vCPU's ioapic_handled_vectors,
> +	 * if the vector is reused by non-IOAPIC interrupts, avoiding unnecessary
> +	 * EOI-induced VMEXITs for that vector.
> +	 */
> +	if (apic->vcpu->arch.last_pending_vector == vector) {
> +		apic->vcpu->arch.last_pending_vector = 0;

I think it makes sense to reset the field when KVM_REQ_SCAN_IOAPIC, mainly so
that it's more obviously correct, i.e. so that it's easier to see that the field
is reset immediately prior to scanning, along with the bitmap itself.

> +		kvm_make_request(KVM_REQ_SCAN_IOAPIC, apic->vcpu);
> +	}
> +
>  	/* Request a KVM exit to inform the userspace IOAPIC. */
>  	if (irqchip_split(apic->vcpu->kvm)) {
>  		apic->vcpu->arch.pending_ioapic_eoi = vector;
> -- 
> 2.43.5
> 
[PATCH v3] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits
Posted by weizijie 11 months, 2 weeks ago
Address performance issues caused by a vector being reused by a
non-IOAPIC source.

Commit 0fc5a36dd6b3
("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
addressed the issues related to EOI and IOAPIC reconfiguration races.
However, it has introduced some performance concerns:

Configuring IOAPIC interrupts while an interrupt request (IRQ) is
already in service can unintentionally trigger a VM exit for other
interrupts that normally do not require one, due to the settings of
`ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
runtime, this issue persists, continuing to adversely affect
performance.

Simple Fix Proposal:
A straightforward solution is to record highest in-service IRQ that
is pending at the time of the last scan. Then, upon the next guest
exit, do a full KVM_REQ_SCAN_IOAPIC. This ensures that a re-scan of
the ioapic occurs only when the recorded vector is EOI'd, and
subsequently, the extra bit in the eoi_exit_bitmap are cleared,
avoiding unnecessary VM exits.

Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/ioapic.c           | 10 ++++++++--
 arch/x86/kvm/irq_comm.c         |  9 +++++++--
 arch/x86/kvm/lapic.c            | 10 ++++++++++
 4 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0b7af5902ff7..8c50e7b4a96f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1062,6 +1062,7 @@ struct kvm_vcpu_arch {
 #if IS_ENABLED(CONFIG_HYPERV)
 	hpa_t hv_root_tdp;
 #endif
+	u8 last_pending_vector;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 995eb5054360..40252a800897 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -297,10 +297,16 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
 			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
 
 			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
-						e->fields.dest_id, dm) ||
-			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
+						e->fields.dest_id, dm))
 				__set_bit(e->fields.vector,
 					  ioapic_handled_vectors);
+			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
+				__set_bit(e->fields.vector,
+					  ioapic_handled_vectors);
+				vcpu->arch.last_pending_vector = e->fields.vector >
+					vcpu->arch.last_pending_vector ? e->fields.vector :
+					vcpu->arch.last_pending_vector;
+			}
 		}
 	}
 	spin_unlock(&ioapic->lock);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8136695f7b96..1d23c52576e1 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -426,9 +426,14 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
 
 			if (irq.trig_mode &&
 			    (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
-						 irq.dest_id, irq.dest_mode) ||
-			     kvm_apic_pending_eoi(vcpu, irq.vector)))
+						 irq.dest_id, irq.dest_mode)))
 				__set_bit(irq.vector, ioapic_handled_vectors);
+			else if (kvm_apic_pending_eoi(vcpu, irq.vector)) {
+				__set_bit(irq.vector, ioapic_handled_vectors);
+				vcpu->arch.last_pending_vector = irq.vector >
+					vcpu->arch.last_pending_vector ? irq.vector :
+					vcpu->arch.last_pending_vector;
+			}
 		}
 	}
 	srcu_read_unlock(&kvm->irq_srcu, idx);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a009c94c26c2..5d62ea5f1503 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1466,6 +1466,16 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 	if (!kvm_ioapic_handles_vector(apic, vector))
 		return;
 
+	/*
+	 * When there are instances where ioapic_handled_vectors is
+	 * set due to pending interrupts, clean up the record and do
+	 * a full KVM_REQ_SCAN_IOAPIC.
+	 */
+	if (apic->vcpu->arch.last_pending_vector == vector) {
+		apic->vcpu->arch.last_pending_vector = 0;
+		kvm_make_request(KVM_REQ_SCAN_IOAPIC, apic->vcpu);
+	}
+
 	/* Request a KVM exit to inform the userspace IOAPIC. */
 	if (irqchip_split(apic->vcpu->kvm)) {
 		apic->vcpu->arch.pending_ioapic_eoi = vector;
-- 
2.43.5
Re: [PATCH v3] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits
Posted by Huang, Kai 11 months, 2 weeks ago
On Fri, 2025-02-28 at 10:15 +0800, weizijie wrote:
> Address performance issues caused by a vector being reused by a
> non-IOAPIC source.

I saw your reply in v2.  Thanks.

Some minor comments below, which may be just nits for Sean/Paolo, so feel free
to add:

Reviewed-by: Kai Huang <kai.huang@intel.com>

> 
> Commit 0fc5a36dd6b3
> ("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
> addressed the issues related to EOI and IOAPIC reconfiguration races.
> However, it has introduced some performance concerns:
> 
> Configuring IOAPIC interrupts while an interrupt request (IRQ) is
> already in service can unintentionally trigger a VM exit for other
> interrupts that normally do not require one, due to the settings of
> `ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
> runtime, this issue persists, continuing to adversely affect
> performance.


So in short:

  The "rare case" mentioned in db2bdcbbbd32 and 0fc5a36dd6b3 is actually 
  not that rare and can actually happen in the good behaved guest.

The above commit message isn't very clear to me, though.  How about below?

Configuring IOAPIC routed interrupts triggers KVM to rescan all vCPU's
ioapic_handled_vectors which is used to control which vectors need to trigger
EOI-induced VMEXITs.  If any interrupt is already in service on some vCPU using
some vector when the IOAPIC is being rescanned, the vector is set to vCPU's
ioapic_handled_vectors.  If the vector is then reused by other interrupts, each
of them will cause a VMEXIT even it is unnecessary.  W/o further IOAPIC rescan,
the vector remains set, and this issue persists, impacting guest's interrupt
performance.

Both commit

  db2bdcbbbd32 (KVM: x86: fix edge EOI and IOAPIC reconfig race)

and commit

  0fc5a36dd6b3 (KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure
race)

mentioned this issue, but it was considered as "rare" thus was not addressed. 
However in real environment this issue can actually happen in a well-behaved
guest.

> 
> Simple Fix Proposal:
> A straightforward solution is to record highest in-service IRQ that
> is pending at the time of the last scan. Then, upon the next guest
> exit, do a full KVM_REQ_SCAN_IOAPIC. This ensures that a re-scan of
> the ioapic occurs only when the recorded vector is EOI'd, and
> subsequently, the extra bit in the eoi_exit_bitmap are cleared,
			  ^
			  bits

> avoiding unnecessary VM exits.
> 
> Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
> Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
> Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/ioapic.c           | 10 ++++++++--
>  arch/x86/kvm/irq_comm.c         |  9 +++++++--
>  arch/x86/kvm/lapic.c            | 10 ++++++++++
>  4 files changed, 26 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 0b7af5902ff7..8c50e7b4a96f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1062,6 +1062,7 @@ struct kvm_vcpu_arch {
>  #if IS_ENABLED(CONFIG_HYPERV)
>  	hpa_t hv_root_tdp;
>  #endif
> +	u8 last_pending_vector;
>  };
>  
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
> index 995eb5054360..40252a800897 100644
> --- a/arch/x86/kvm/ioapic.c
> +++ b/arch/x86/kvm/ioapic.c
> @@ -297,10 +297,16 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
>  			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
>  
>  			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
> -						e->fields.dest_id, dm) ||
> -			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
> +						e->fields.dest_id, dm))
>  				__set_bit(e->fields.vector,
>  					  ioapic_handled_vectors);
> +			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
> +				__set_bit(e->fields.vector,
> +					  ioapic_handled_vectors);
> +				vcpu->arch.last_pending_vector = e->fields.vector >
> +					vcpu->arch.last_pending_vector ? e->fields.vector :
> +					vcpu->arch.last_pending_vector;
> +			}
>  		}
>  	}
>  	spin_unlock(&ioapic->lock);
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 8136695f7b96..1d23c52576e1 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -426,9 +426,14 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
>  
>  			if (irq.trig_mode &&
>  			    (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
> -						 irq.dest_id, irq.dest_mode) ||
> -			     kvm_apic_pending_eoi(vcpu, irq.vector)))
> +						 irq.dest_id, irq.dest_mode)))
>  				__set_bit(irq.vector, ioapic_handled_vectors);
> +			else if (kvm_apic_pending_eoi(vcpu, irq.vector)) {
> +				__set_bit(irq.vector, ioapic_handled_vectors);
> +				vcpu->arch.last_pending_vector = irq.vector >
> +					vcpu->arch.last_pending_vector ? irq.vector :
> +					vcpu->arch.last_pending_vector;
> +			}
>  		}
>  	}
>  	srcu_read_unlock(&kvm->irq_srcu, idx);
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index a009c94c26c2..5d62ea5f1503 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1466,6 +1466,16 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
>  	if (!kvm_ioapic_handles_vector(apic, vector))
>  		return;
>  
> +	/*
> +	 * When there are instances where ioapic_handled_vectors is
> +	 * set due to pending interrupts, clean up the record and do
> +	 * a full KVM_REQ_SCAN_IOAPIC.
> +	 */

How about also add below to the comment?

	This ensures the vector is cleared in the vCPU's ioapic_handled_vectors
	if the vector is reused by non-IOAPIC interrupts,  avoiding unnecessary
	EOI-induced VMEXITs for that vector. 

> +	if (apic->vcpu->arch.last_pending_vector == vector) {
> +		apic->vcpu->arch.last_pending_vector = 0;
> +		kvm_make_request(KVM_REQ_SCAN_IOAPIC, apic->vcpu);
> +	}
> +
>  	/* Request a KVM exit to inform the userspace IOAPIC. */
>  	if (irqchip_split(apic->vcpu->kvm)) {
>  		apic->vcpu->arch.pending_ioapic_eoi = vector;

Re: [PATCH v3] KVM: x86: ioapic: Optimize EOI handling to reduce unnecessary VM exits
Posted by wzj 11 months, 1 week ago

On 2025/2/28 20:25, Huang, Kai wrote:
> On Fri, 2025-02-28 at 10:15 +0800, weizijie wrote:
>> Address performance issues caused by a vector being reused by a
>> non-IOAPIC source.
> 
> I saw your reply in v2.  Thanks.
> 
> Some minor comments below, which may be just nits for Sean/Paolo, so feel free
> to add:
> 
> Reviewed-by: Kai Huang <kai.huang@intel.com>
> 
>>
>> Commit 0fc5a36dd6b3
>> ("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race")
>> addressed the issues related to EOI and IOAPIC reconfiguration races.
>> However, it has introduced some performance concerns:
>>
>> Configuring IOAPIC interrupts while an interrupt request (IRQ) is
>> already in service can unintentionally trigger a VM exit for other
>> interrupts that normally do not require one, due to the settings of
>> `ioapic_handled_vectors`. If the IOAPIC is not reconfigured during
>> runtime, this issue persists, continuing to adversely affect
>> performance.
> 
> 
> So in short:
> 
>    The "rare case" mentioned in db2bdcbbbd32 and 0fc5a36dd6b3 is actually
>    not that rare and can actually happen in the good behaved guest.
> 
> The above commit message isn't very clear to me, though.  How about below?
> 
> Configuring IOAPIC routed interrupts triggers KVM to rescan all vCPU's
> ioapic_handled_vectors which is used to control which vectors need to trigger
> EOI-induced VMEXITs.  If any interrupt is already in service on some vCPU using
> some vector when the IOAPIC is being rescanned, the vector is set to vCPU's
> ioapic_handled_vectors.  If the vector is then reused by other interrupts, each
> of them will cause a VMEXIT even it is unnecessary.  W/o further IOAPIC rescan,
> the vector remains set, and this issue persists, impacting guest's interrupt
> performance.
> 
> Both commit
> 
>    db2bdcbbbd32 (KVM: x86: fix edge EOI and IOAPIC reconfig race)
> 
> and commit
> 
>    0fc5a36dd6b3 (KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure
> race)
> 
> mentioned this issue, but it was considered as "rare" thus was not addressed.
> However in real environment this issue can actually happen in a well-behaved
> guest.
> 

Thank you very much for your suggestions on the comment modifications.
I will apply it to this patch.

>>
>> Simple Fix Proposal:
>> A straightforward solution is to record highest in-service IRQ that
>> is pending at the time of the last scan. Then, upon the next guest
>> exit, do a full KVM_REQ_SCAN_IOAPIC. This ensures that a re-scan of
>> the ioapic occurs only when the recorded vector is EOI'd, and
>> subsequently, the extra bit in the eoi_exit_bitmap are cleared,
> 			  ^
> 			  bits
> 

Thank you for the correction.

>> avoiding unnecessary VM exits.
>>
>> Co-developed-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
>> Signed-off-by: xuyun <xuyun_xy.xy@linux.alibaba.com>
>> Signed-off-by: weizijie <zijie.wei@linux.alibaba.com>
>> ---
>>   arch/x86/include/asm/kvm_host.h |  1 +
>>   arch/x86/kvm/ioapic.c           | 10 ++++++++--
>>   arch/x86/kvm/irq_comm.c         |  9 +++++++--
>>   arch/x86/kvm/lapic.c            | 10 ++++++++++
>>   4 files changed, 26 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index 0b7af5902ff7..8c50e7b4a96f 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -1062,6 +1062,7 @@ struct kvm_vcpu_arch {
>>   #if IS_ENABLED(CONFIG_HYPERV)
>>   	hpa_t hv_root_tdp;
>>   #endif
>> +	u8 last_pending_vector;
>>   };
>>   
>>   struct kvm_lpage_info {
>> diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
>> index 995eb5054360..40252a800897 100644
>> --- a/arch/x86/kvm/ioapic.c
>> +++ b/arch/x86/kvm/ioapic.c
>> @@ -297,10 +297,16 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
>>   			u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
>>   
>>   			if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
>> -						e->fields.dest_id, dm) ||
>> -			    kvm_apic_pending_eoi(vcpu, e->fields.vector))
>> +						e->fields.dest_id, dm))
>>   				__set_bit(e->fields.vector,
>>   					  ioapic_handled_vectors);
>> +			else if (kvm_apic_pending_eoi(vcpu, e->fields.vector)) {
>> +				__set_bit(e->fields.vector,
>> +					  ioapic_handled_vectors);
>> +				vcpu->arch.last_pending_vector = e->fields.vector >
>> +					vcpu->arch.last_pending_vector ? e->fields.vector :
>> +					vcpu->arch.last_pending_vector;
>> +			}
>>   		}
>>   	}
>>   	spin_unlock(&ioapic->lock);
>> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
>> index 8136695f7b96..1d23c52576e1 100644
>> --- a/arch/x86/kvm/irq_comm.c
>> +++ b/arch/x86/kvm/irq_comm.c
>> @@ -426,9 +426,14 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
>>   
>>   			if (irq.trig_mode &&
>>   			    (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
>> -						 irq.dest_id, irq.dest_mode) ||
>> -			     kvm_apic_pending_eoi(vcpu, irq.vector)))
>> +						 irq.dest_id, irq.dest_mode)))
>>   				__set_bit(irq.vector, ioapic_handled_vectors);
>> +			else if (kvm_apic_pending_eoi(vcpu, irq.vector)) {
>> +				__set_bit(irq.vector, ioapic_handled_vectors);
>> +				vcpu->arch.last_pending_vector = irq.vector >
>> +					vcpu->arch.last_pending_vector ? irq.vector :
>> +					vcpu->arch.last_pending_vector;
>> +			}
>>   		}
>>   	}
>>   	srcu_read_unlock(&kvm->irq_srcu, idx);
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> index a009c94c26c2..5d62ea5f1503 100644
>> --- a/arch/x86/kvm/lapic.c
>> +++ b/arch/x86/kvm/lapic.c
>> @@ -1466,6 +1466,16 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
>>   	if (!kvm_ioapic_handles_vector(apic, vector))
>>   		return;
>>   
>> +	/*
>> +	 * When there are instances where ioapic_handled_vectors is
>> +	 * set due to pending interrupts, clean up the record and do
>> +	 * a full KVM_REQ_SCAN_IOAPIC.
>> +	 */
> 
> How about also add below to the comment?
> 
> 	This ensures the vector is cleared in the vCPU's ioapic_handled_vectors
> 	if the vector is reused by non-IOAPIC interrupts,  avoiding unnecessary
> 	EOI-induced VMEXITs for that vector.
> 

This additional information seems to make it easier to understand. Thank 
you very much.

>> +	if (apic->vcpu->arch.last_pending_vector == vector) {
>> +		apic->vcpu->arch.last_pending_vector = 0;
>> +		kvm_make_request(KVM_REQ_SCAN_IOAPIC, apic->vcpu);
>> +	}
>> +
>>   	/* Request a KVM exit to inform the userspace IOAPIC. */
>>   	if (irqchip_split(apic->vcpu->kvm)) {
>>   		apic->vcpu->arch.pending_ioapic_eoi = vector;
>