[RFC PATCH] KVM: x86: hyper-v: Inhibit APICv with VP Assist on SPR/EMR

Eiichi Tsukata posted 1 patch 1 year, 4 months ago
arch/x86/include/asm/kvm_host.h |  8 ++++++++
arch/x86/kvm/hyperv.c           | 13 +++++++++++++
arch/x86/kvm/vmx/main.c         |  1 +
3 files changed, 22 insertions(+)
[RFC PATCH] KVM: x86: hyper-v: Inhibit APICv with VP Assist on SPR/EMR
Posted by Eiichi Tsukata 1 year, 4 months ago
Running multiple Windows VMs with VP Assis and APICv causes KVM internal
error on Spapphire Rpaids and Emerald Rapids as is reported in [1].
Here Qemu outputs:

  KVM internal error. Suberror: 3
  extra data[0]: 0x000000008000002f
  extra data[1]: 0x0000000000000020
  extra data[2]: 0x0000000000000582
  extra data[3]: 0x0000000000000006
  RAX=0000000000000000 RBX=0000000000000000 RCX=0000000040000070
  RDX=0000000000000000
  RSI=fffffa8001e3db60 RDI=fffffa8002bc8aa0 RBP=fffff88005a91670
  RSP=fffff88005a915c8
  R8 =0000000000000009 R9 =000000000000000b R10=fffff8000264b000
  R11=fffff88005a91750
  R12=fffff88002e40180 R13=fffffa8001e3dc68 R14=fffffa8001e3dc68
  R15=0000000000000002
  RIP=fffff8000271722c RFL=00000046 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
  ES =002b 0000000000000000 ffffffff 00c0f300 DPL=3 DS   [-WA]
  CS =0010 0000000000000000 00000000 00209b00 DPL=0 CS64 [-RA]
  SS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS   [-WA]
  DS =002b 0000000000000000 ffffffff 00c0f300 DPL=3 DS   [-WA]
  FS =0053 00000000fff9a000 00007c00 0040f300 DPL=3 DS   [-WA]
  GS =002b fffff88002e40000 ffffffff 00c0f300 DPL=3 DS   [-WA]
  LDT=0000 0000000000000000 ffffffff 00c00000
  TR =0040 fffff88002e44ec0 00000067 00008b00 DPL=0 TSS64-busy
  GDT=     fffff88002e4b4c0 0000007f
  IDT=     fffff88002e4b540 00000fff
  CR0=80050031 CR2=00000000002e408e CR3=000000001c6f5000 CR4=000406f8
  DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000
  DR3=0000000000000000
  DR6=00000000fffe07f0 DR7=0000000000000400
  EFER=0000000000000d01
  Code=25 a8 4b 00 00 b9 70 00 00 40 0f ba 32 00 72 06 33 c0 8b d0 <0f> 30
  5a 58 59 c3 90 90 90 90 90 90 90 90 90 90 90 90 90 90 cc cc cc cc cc cc
  66 66 0f 1f

As is noted in [1], this issue is considered to be a microcode issue
specific to SPR/EMR.

Disable APICv when guest tries to enable VP Assist page only when it's
running on those problematic CPU models.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=218267 [1]
Signed-off-by: Eiichi Tsukata <eiichi.tsukata@nutanix.com>
---
 arch/x86/include/asm/kvm_host.h |  8 ++++++++
 arch/x86/kvm/hyperv.c           | 13 +++++++++++++
 arch/x86/kvm/vmx/main.c         |  1 +
 3 files changed, 22 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 950a03e0181e..9ff687c7326b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1213,6 +1213,13 @@ enum kvm_apicv_inhibit {
 	 */
 	APICV_INHIBIT_REASON_HYPERV,
 
+	/*
+	 * Using VP Assist and APICv simultaneously on Sapphire Rapids
+	 * or Emerald Rapids causes KVM internal error, which is
+	 * considered to be a microcode issue.
+	 */
+	APICV_INHIBIT_REASON_HYPERV_VP_ASSIST,
+
 	/*
 	 * APIC acceleration is inhibited because the userspace didn't yet
 	 * enable the kernel/split irqchip.
@@ -1285,6 +1292,7 @@ enum kvm_apicv_inhibit {
 #define APICV_INHIBIT_REASONS				\
 	__APICV_INHIBIT_REASON(DISABLED),		\
 	__APICV_INHIBIT_REASON(HYPERV),			\
+	__APICV_INHIBIT_REASON(HYPERV_VP_ASSIST),	\
 	__APICV_INHIBIT_REASON(ABSENT),			\
 	__APICV_INHIBIT_REASON(BLOCKIRQ),		\
 	__APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED),	\
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4f0a94346d00..8d5a1f685191 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -36,6 +36,7 @@
 
 #include <asm/apicdef.h>
 #include <asm/mshyperv.h>
+#include <asm/cpu_device_id.h>
 #include <trace/events/kvm.h>
 
 #include "trace.h"
@@ -1550,6 +1551,8 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 	case HV_X64_MSR_VP_ASSIST_PAGE: {
 		u64 gfn;
 		unsigned long addr;
+		struct kvm *kvm = vcpu->kvm;
+		struct cpuinfo_x86 *c = &boot_cpu_data;
 
 		if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
 			hv_vcpu->hv_vapic = data;
@@ -1571,6 +1574,16 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 			return 1;
 		hv_vcpu->hv_vapic = data;
 		kvm_vcpu_mark_page_dirty(vcpu, gfn);
+
+		/*
+		 * Using VP Assist and APICv simultaneously on Sapphire Rapids
+		 * or Emerald Rapids causes KVM internal error, which is
+		 * considered to be a microcode issue.
+		 */
+		if (c->x86_vfm == INTEL_SAPPHIRERAPIDS_X ||
+		    c->x86_vfm == INTEL_EMERALDRAPIDS_X)
+			kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_HYPERV_VP_ASSIST);
+
 		if (kvm_lapic_set_pv_eoi(vcpu,
 					    gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
 					    sizeof(struct hv_vp_assist_page)))
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 0bf35ebe8a1b..a1e7007133a1 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -11,6 +11,7 @@
 	(BIT(APICV_INHIBIT_REASON_DISABLED) |			\
 	 BIT(APICV_INHIBIT_REASON_ABSENT) |			\
 	 BIT(APICV_INHIBIT_REASON_HYPERV) |			\
+	 BIT(APICV_INHIBIT_REASON_HYPERV_VP_ASSIST) |		\
 	 BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |			\
 	 BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) |	\
 	 BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |		\
-- 
2.45.2
Re: [RFC PATCH] KVM: x86: hyper-v: Inhibit APICv with VP Assist on SPR/EMR
Posted by Sean Christopherson 1 year, 4 months ago
On Tue, Aug 06, 2024, Eiichi Tsukata wrote:
> Running multiple Windows VMs with VP Assis and APICv causes KVM internal
> error on Spapphire Rpaids and Emerald Rapids as is reported in [1].
> Here Qemu outputs:
> 
>   KVM internal error. Suberror: 3
>   extra data[0]: 0x000000008000002f
>   extra data[1]: 0x0000000000000020
>   extra data[2]: 0x0000000000000582
>   extra data[3]: 0x0000000000000006
>   RAX=0000000000000000 RBX=0000000000000000 RCX=0000000040000070
>   RDX=0000000000000000
>   RSI=fffffa8001e3db60 RDI=fffffa8002bc8aa0 RBP=fffff88005a91670
>   RSP=fffff88005a915c8
>   R8 =0000000000000009 R9 =000000000000000b R10=fffff8000264b000
>   R11=fffff88005a91750
>   R12=fffff88002e40180 R13=fffffa8001e3dc68 R14=fffffa8001e3dc68
>   R15=0000000000000002
>   RIP=fffff8000271722c RFL=00000046 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
>   ES =002b 0000000000000000 ffffffff 00c0f300 DPL=3 DS   [-WA]
>   CS =0010 0000000000000000 00000000 00209b00 DPL=0 CS64 [-RA]
>   SS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS   [-WA]
>   DS =002b 0000000000000000 ffffffff 00c0f300 DPL=3 DS   [-WA]
>   FS =0053 00000000fff9a000 00007c00 0040f300 DPL=3 DS   [-WA]
>   GS =002b fffff88002e40000 ffffffff 00c0f300 DPL=3 DS   [-WA]
>   LDT=0000 0000000000000000 ffffffff 00c00000
>   TR =0040 fffff88002e44ec0 00000067 00008b00 DPL=0 TSS64-busy
>   GDT=     fffff88002e4b4c0 0000007f
>   IDT=     fffff88002e4b540 00000fff
>   CR0=80050031 CR2=00000000002e408e CR3=000000001c6f5000 CR4=000406f8
>   DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000
>   DR3=0000000000000000
>   DR6=00000000fffe07f0 DR7=0000000000000400
>   EFER=0000000000000d01
>   Code=25 a8 4b 00 00 b9 70 00 00 40 0f ba 32 00 72 06 33 c0 8b d0 <0f> 30
>   5a 58 59 c3 90 90 90 90 90 90 90 90 90 90 90 90 90 90 cc cc cc cc cc cc
>   66 66 0f 1f
> 
> As is noted in [1], this issue is considered to be a microcode issue
> specific to SPR/EMR.

I don't think we can claim that without a more explicit statement from Intel.
And I would really like Intel to clarify exactly what is going on, so that (a)
it can be properly documented and (b) we can implement a precise, targeted
workaround in KVM.

Chao?

> Disable APICv when guest tries to enable VP Assist page only when it's
> running on those problematic CPU models.
> 
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=218267 [1]
> Signed-off-by: Eiichi Tsukata <eiichi.tsukata@nutanix.com>
> ---
Re: [RFC PATCH] KVM: x86: hyper-v: Inhibit APICv with VP Assist on SPR/EMR
Posted by Chao Gao 1 year, 4 months ago
On Tue, Aug 06, 2024 at 09:03:11AM -0700, Sean Christopherson wrote:
>On Tue, Aug 06, 2024, Eiichi Tsukata wrote:
>> Running multiple Windows VMs with VP Assis and APICv causes KVM internal
>> error on Spapphire Rpaids and Emerald Rapids as is reported in [1].
>> Here Qemu outputs:
>> 
>>   KVM internal error. Suberror: 3
>>   extra data[0]: 0x000000008000002f
>>   extra data[1]: 0x0000000000000020
>>   extra data[2]: 0x0000000000000582
>>   extra data[3]: 0x0000000000000006
>>   RAX=0000000000000000 RBX=0000000000000000 RCX=0000000040000070
>>   RDX=0000000000000000
>>   RSI=fffffa8001e3db60 RDI=fffffa8002bc8aa0 RBP=fffff88005a91670
>>   RSP=fffff88005a915c8
>>   R8 =0000000000000009 R9 =000000000000000b R10=fffff8000264b000
>>   R11=fffff88005a91750
>>   R12=fffff88002e40180 R13=fffffa8001e3dc68 R14=fffffa8001e3dc68
>>   R15=0000000000000002
>>   RIP=fffff8000271722c RFL=00000046 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
>>   ES =002b 0000000000000000 ffffffff 00c0f300 DPL=3 DS   [-WA]
>>   CS =0010 0000000000000000 00000000 00209b00 DPL=0 CS64 [-RA]
>>   SS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS   [-WA]
>>   DS =002b 0000000000000000 ffffffff 00c0f300 DPL=3 DS   [-WA]
>>   FS =0053 00000000fff9a000 00007c00 0040f300 DPL=3 DS   [-WA]
>>   GS =002b fffff88002e40000 ffffffff 00c0f300 DPL=3 DS   [-WA]
>>   LDT=0000 0000000000000000 ffffffff 00c00000
>>   TR =0040 fffff88002e44ec0 00000067 00008b00 DPL=0 TSS64-busy
>>   GDT=     fffff88002e4b4c0 0000007f
>>   IDT=     fffff88002e4b540 00000fff
>>   CR0=80050031 CR2=00000000002e408e CR3=000000001c6f5000 CR4=000406f8
>>   DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000
>>   DR3=0000000000000000
>>   DR6=00000000fffe07f0 DR7=0000000000000400
>>   EFER=0000000000000d01
>>   Code=25 a8 4b 00 00 b9 70 00 00 40 0f ba 32 00 72 06 33 c0 8b d0 <0f> 30
>>   5a 58 59 c3 90 90 90 90 90 90 90 90 90 90 90 90 90 90 cc cc cc cc cc cc
>>   66 66 0f 1f
>> 
>> As is noted in [1], this issue is considered to be a microcode issue
>> specific to SPR/EMR.
>
>I don't think we can claim that without a more explicit statement from Intel.
>And I would really like Intel to clarify exactly what is going on, so that (a)
>it can be properly documented and (b) we can implement a precise, targeted
>workaround in KVM.
>
>Chao?

I am asking if there is anything I can disclose at this point, including the
plan to disclose details of this issue and release the microcode fix.
Re: [RFC PATCH] KVM: x86: hyper-v: Inhibit APICv with VP Assist on SPR/EMR
Posted by Paolo Bonzini 1 year, 4 months ago
On Tue, Aug 6, 2024 at 6:03 PM Sean Christopherson <seanjc@google.com> wrote:
> > As is noted in [1], this issue is considered to be a microcode issue
> > specific to SPR/EMR.
>
> I don't think we can claim that without a more explicit statement from Intel.
> And I would really like Intel to clarify exactly what is going on, so that (a)
> it can be properly documented and (b) we can implement a precise, targeted
> workaround in KVM.

It is not even clear to me why this patch has any effect at all,
because PV EOI and APICv don't work together anyway: PV EOI requires
apic->highest_isr_cache == -1 (see apic_sync_pv_eoi_to_guest()) but
the cache is only set without APICv (see apic_set_isr()).  Therefore,
PV EOI should be basically a no-op with APICv in use.

Second, there should be no need to disable APICv: as long as
kvm_lapic_set_pv_eoi() is not called, the VP assist page can be
enabled but PV EOI never triggered.  That should be tested and, if it
doesn't work, that's even more proof that we don't know what's going
on.

Also note that the PV EOI mechanism exists in the same way with KVM
paravirtualization, even without Hyper-V enabled, and should trigger
the same microcode issue.  The only difference is that Windows does
not know about it and, it seems, the issue does not trigger with
Linux.

Paolo


> Chao?
>
> > Disable APICv when guest tries to enable VP Assist page only when it's
> > running on those problematic CPU models.
> >
> > Link: https://bugzilla.kernel.org/show_bug.cgi?id=218267 [1]
> > Signed-off-by: Eiichi Tsukata <eiichi.tsukata@nutanix.com>
> > ---
>
Re: [RFC PATCH] KVM: x86: hyper-v: Inhibit APICv with VP Assist on SPR/EMR
Posted by Sean Christopherson 1 year, 4 months ago
On Tue, Aug 06, 2024, Paolo Bonzini wrote:
> On Tue, Aug 6, 2024 at 6:03 PM Sean Christopherson <seanjc@google.com> wrote:
> > > As is noted in [1], this issue is considered to be a microcode issue
> > > specific to SPR/EMR.
> >
> > I don't think we can claim that without a more explicit statement from Intel.
> > And I would really like Intel to clarify exactly what is going on, so that (a)
> > it can be properly documented and (b) we can implement a precise, targeted
> > workaround in KVM.
> 
> It is not even clear to me why this patch has any effect at all,
> because PV EOI and APICv don't work together anyway: PV EOI requires
> apic->highest_isr_cache == -1 (see apic_sync_pv_eoi_to_guest()) but
> the cache is only set without APICv (see apic_set_isr()).  Therefore,
> PV EOI should be basically a no-op with APICv in use.

Per Chao, this is a ucode bug though.  Speculating wildly, I wonder if Intel added
acceleration and/or redirection of HV_X64_MSR_EOI when APICv is enabled, e.g. to
speed up existing VMs, and something went sideways.
Re: [RFC PATCH] KVM: x86: hyper-v: Inhibit APICv with VP Assist on SPR/EMR
Posted by Jon Kohler 9 months, 4 weeks ago

> On Aug 7, 2024, at 9:10 AM, Sean Christopherson <seanjc@google.com> wrote:
> 
> !-------------------------------------------------------------------|
>  CAUTION: External Email
> 
> |-------------------------------------------------------------------!
> 
> On Tue, Aug 06, 2024, Paolo Bonzini wrote:
>> On Tue, Aug 6, 2024 at 6:03 PM Sean Christopherson <seanjc@google.com> wrote:
>>>> As is noted in [1], this issue is considered to be a microcode issue
>>>> specific to SPR/EMR.
>>> 
>>> I don't think we can claim that without a more explicit statement from Intel.
>>> And I would really like Intel to clarify exactly what is going on, so that (a)
>>> it can be properly documented and (b) we can implement a precise, targeted
>>> workaround in KVM.
>> 
>> It is not even clear to me why this patch has any effect at all,
>> because PV EOI and APICv don't work together anyway: PV EOI requires
>> apic->highest_isr_cache == -1 (see apic_sync_pv_eoi_to_guest()) but
>> the cache is only set without APICv (see apic_set_isr()).  Therefore,
>> PV EOI should be basically a no-op with APICv in use.
> 
> Per Chao, this is a ucode bug though.  Speculating wildly, I wonder if Intel added
> acceleration and/or redirection of HV_X64_MSR_EOI when APICv is enabled, e.g. to
> speed up existing VMs, and something went sideways.

Hey Sean, Chao, Paolo, quick follow up on this one. 

Eiichi was working on pulling down Intel Microcode 20250211 [1], and I had
asked to retest this one.

Knock on wood, it looks like the issue is “gone” with 20250211 on SPR/EMR

The EMR [2] and SPR [3] release notes allude to some Erratum regarding
some vmexit fixups that sound interesting, but I’m not sure if they are actually
the backing issue, or if this is sheer coincidence, or if there was another fix
but just isn’t fully documented as an errata?

These two are listed as “fixed” in the release notes:
EMR137. VM Exit Following MOV to CR8 Instruction May Lead to Unexpected IDT Vectoring-Information
SPR141. VM Exit Following MOV to CR8 Instruction May Lead to Unexpected IDT Vectoring-Information

@Chao - could you help confirm our observations one way or the other?

[1] https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files/releases/tag/microcode-20250211
[2] https://cdrdv2.intel.com/v1/dl/getContent/793902 (EMR microcode release notes)
[3] https://cdrdv2.intel.com/v1/dl/getContent/772415 (SPR microcode release notes)
Re: [RFC PATCH] KVM: x86: hyper-v: Inhibit APICv with VP Assist on SPR/EMR
Posted by Chao Gao 9 months, 4 weeks ago
On Wed, Feb 19, 2025 at 03:03:39PM +0000, Jon Kohler wrote:
>
>
>> On Aug 7, 2024, at 9:10 AM, Sean Christopherson <seanjc@google.com> wrote:
>> 
>> !-------------------------------------------------------------------|
>>  CAUTION: External Email
>> 
>> |-------------------------------------------------------------------!
>> 
>> On Tue, Aug 06, 2024, Paolo Bonzini wrote:
>>> On Tue, Aug 6, 2024 at 6:03 PM Sean Christopherson <seanjc@google.com> wrote:
>>>>> As is noted in [1], this issue is considered to be a microcode issue
>>>>> specific to SPR/EMR.
>>>> 
>>>> I don't think we can claim that without a more explicit statement from Intel.
>>>> And I would really like Intel to clarify exactly what is going on, so that (a)
>>>> it can be properly documented and (b) we can implement a precise, targeted
>>>> workaround in KVM.
>>> 
>>> It is not even clear to me why this patch has any effect at all,
>>> because PV EOI and APICv don't work together anyway: PV EOI requires
>>> apic->highest_isr_cache == -1 (see apic_sync_pv_eoi_to_guest()) but
>>> the cache is only set without APICv (see apic_set_isr()).  Therefore,
>>> PV EOI should be basically a no-op with APICv in use.
>> 
>> Per Chao, this is a ucode bug though.  Speculating wildly, I wonder if Intel added
>> acceleration and/or redirection of HV_X64_MSR_EOI when APICv is enabled, e.g. to
>> speed up existing VMs, and something went sideways.
>
>Hey Sean, Chao, Paolo, quick follow up on this one. 
>
>Eiichi was working on pulling down Intel Microcode 20250211 [1], and I had
>asked to retest this one.
>
>Knock on wood, it looks like the issue is “gone” with 20250211 on SPR/EMR
>
>The EMR [2] and SPR [3] release notes allude to some Erratum regarding
>some vmexit fixups that sound interesting, but I’m not sure if they are actually
>the backing issue,

Yes. EMR137 and SPR141 are exactly the backing issue and have been fixed in
recent microcode releases.

>or if this is sheer coincidence, or if there was another fix
>but just isn’t fully documented as an errata?
>
>These two are listed as “fixed” in the release notes:
>EMR137. VM Exit Following MOV to CR8 Instruction May Lead to Unexpected IDT Vectoring-Information
>SPR141. VM Exit Following MOV to CR8 Instruction May Lead to Unexpected IDT Vectoring-Information
>
>@Chao - could you help confirm our observations one way or the other?
>
>[1] https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files/releases/tag/microcode-20250211
>[2] https://cdrdv2.intel.com/v1/dl/getContent/793902 (EMR microcode release notes)
>[3] https://cdrdv2.intel.com/v1/dl/getContent/772415 (SPR microcode release notes)
Re: [RFC PATCH] KVM: x86: hyper-v: Inhibit APICv with VP Assist on SPR/EMR
Posted by Eiichi Tsukata 1 year, 4 months ago
Hi

This issue has been observed on SPR and EMR but may be reproducible on other
CPU models.  Appreciate if anyone know further detailed information about the  issue.

Minimized repro is:  
```
for i in `seq 1 20`  do
 /usr/libexec/qemu-kvm -accel kvm \
  -cpu kvm64,hv-vapic=on \
  -hda /path/to/win7-$i.qcow2 -m 4G \
  -smp 1 \    -vnc 127.0.0.1:$i > qemu.$i.log 2>&1 &
done
```

I'd like to note that this issue is observed only when the node runs
*multiple* Windows VMs at once.  i.e.: Running only one Windows VM doesn't
cause the issue.

Moreover, running VMs on a dedicated CPU doesn't cause the issue.
I mean, prepending "taskset" to qemu-kvm like this:

```
for i in `seq 1 20`  do
 taskset $i /usr/libexec/qemu-kvm -accel kvm \
  -cpu kvm64,hv-vapic=on \
  -hda /path/to/win7-$i.qcow2 -m 4G \
  -smp 1 \    -vnc 127.0.0.1:$i > qemu.$i.log 2>&1 &
done
```

Then the issue doesn't reproduce.

Best,
Eiichi

> On Aug 6, 2024, at 14:37, Eiichi Tsukata <eiichi.tsukata@nutanix.com> wrote:
> 
> Running multiple Windows VMs with VP Assis and APICv causes KVM internal
> error on Spapphire Rpaids and Emerald Rapids as is reported in [1].
> Here Qemu outputs:
> 
>  KVM internal error. Suberror: 3
>  extra data[0]: 0x000000008000002f
>  extra data[1]: 0x0000000000000020
>  extra data[2]: 0x0000000000000582
>  extra data[3]: 0x0000000000000006
>  RAX=0000000000000000 RBX=0000000000000000 RCX=0000000040000070
>  RDX=0000000000000000
>  RSI=fffffa8001e3db60 RDI=fffffa8002bc8aa0 RBP=fffff88005a91670
>  RSP=fffff88005a915c8
>  R8 =0000000000000009 R9 =000000000000000b R10=fffff8000264b000
>  R11=fffff88005a91750
>  R12=fffff88002e40180 R13=fffffa8001e3dc68 R14=fffffa8001e3dc68
>  R15=0000000000000002
>  RIP=fffff8000271722c RFL=00000046 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
>  ES =002b 0000000000000000 ffffffff 00c0f300 DPL=3 DS   [-WA]
>  CS =0010 0000000000000000 00000000 00209b00 DPL=0 CS64 [-RA]
>  SS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS   [-WA]
>  DS =002b 0000000000000000 ffffffff 00c0f300 DPL=3 DS   [-WA]
>  FS =0053 00000000fff9a000 00007c00 0040f300 DPL=3 DS   [-WA]
>  GS =002b fffff88002e40000 ffffffff 00c0f300 DPL=3 DS   [-WA]
>  LDT=0000 0000000000000000 ffffffff 00c00000
>  TR =0040 fffff88002e44ec0 00000067 00008b00 DPL=0 TSS64-busy
>  GDT=     fffff88002e4b4c0 0000007f
>  IDT=     fffff88002e4b540 00000fff
>  CR0=80050031 CR2=00000000002e408e CR3=000000001c6f5000 CR4=000406f8
>  DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000
>  DR3=0000000000000000
>  DR6=00000000fffe07f0 DR7=0000000000000400
>  EFER=0000000000000d01
>  Code=25 a8 4b 00 00 b9 70 00 00 40 0f ba 32 00 72 06 33 c0 8b d0 <0f> 30
>  5a 58 59 c3 90 90 90 90 90 90 90 90 90 90 90 90 90 90 cc cc cc cc cc cc
>  66 66 0f 1f
> 
> As is noted in [1], this issue is considered to be a microcode issue
> specific to SPR/EMR.
> 
> Disable APICv when guest tries to enable VP Assist page only when it's
> running on those problematic CPU models.
> 
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=218267 [1]
> Signed-off-by: Eiichi Tsukata <eiichi.tsukata@nutanix.com>
> ---
> arch/x86/include/asm/kvm_host.h |  8 ++++++++
> arch/x86/kvm/hyperv.c           | 13 +++++++++++++
> arch/x86/kvm/vmx/main.c         |  1 +
> 3 files changed, 22 insertions(+)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 950a03e0181e..9ff687c7326b 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1213,6 +1213,13 @@ enum kvm_apicv_inhibit {
> */
> APICV_INHIBIT_REASON_HYPERV,
> 
> + /*
> + * Using VP Assist and APICv simultaneously on Sapphire Rapids
> + * or Emerald Rapids causes KVM internal error, which is
> + * considered to be a microcode issue.
> + */
> + APICV_INHIBIT_REASON_HYPERV_VP_ASSIST,
> +
> /*
> * APIC acceleration is inhibited because the userspace didn't yet
> * enable the kernel/split irqchip.
> @@ -1285,6 +1292,7 @@ enum kvm_apicv_inhibit {
> #define APICV_INHIBIT_REASONS \
> __APICV_INHIBIT_REASON(DISABLED), \
> __APICV_INHIBIT_REASON(HYPERV), \
> + __APICV_INHIBIT_REASON(HYPERV_VP_ASSIST), \
> __APICV_INHIBIT_REASON(ABSENT), \
> __APICV_INHIBIT_REASON(BLOCKIRQ), \
> __APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED), \
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 4f0a94346d00..8d5a1f685191 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -36,6 +36,7 @@
> 
> #include <asm/apicdef.h>
> #include <asm/mshyperv.h>
> +#include <asm/cpu_device_id.h>
> #include <trace/events/kvm.h>
> 
> #include "trace.h"
> @@ -1550,6 +1551,8 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
> case HV_X64_MSR_VP_ASSIST_PAGE: {
> u64 gfn;
> unsigned long addr;
> + struct kvm *kvm = vcpu->kvm;
> + struct cpuinfo_x86 *c = &boot_cpu_data;
> 
> if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
> hv_vcpu->hv_vapic = data;
> @@ -1571,6 +1574,16 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
> return 1;
> hv_vcpu->hv_vapic = data;
> kvm_vcpu_mark_page_dirty(vcpu, gfn);
> +
> + /*
> + * Using VP Assist and APICv simultaneously on Sapphire Rapids
> + * or Emerald Rapids causes KVM internal error, which is
> + * considered to be a microcode issue.
> + */
> + if (c->x86_vfm == INTEL_SAPPHIRERAPIDS_X ||
> +    c->x86_vfm == INTEL_EMERALDRAPIDS_X)
> + kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_HYPERV_VP_ASSIST);
> +
> if (kvm_lapic_set_pv_eoi(vcpu,
>    gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
>    sizeof(struct hv_vp_assist_page)))
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index 0bf35ebe8a1b..a1e7007133a1 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -11,6 +11,7 @@
> (BIT(APICV_INHIBIT_REASON_DISABLED) | \
> BIT(APICV_INHIBIT_REASON_ABSENT) | \
> BIT(APICV_INHIBIT_REASON_HYPERV) | \
> + BIT(APICV_INHIBIT_REASON_HYPERV_VP_ASSIST) | \
> BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \
> BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \
> BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \
> -- 
> 2.45.2
>