From nobody Thu Apr 2 20:28:00 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 06C582FD696; Mon, 23 Feb 2026 15:46:53 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771861613; cv=none; b=jQ2aYmKM0GD8z5KE8ZO4cRj7PWA89Oir78MjTh+4XzfgLiZTm2S43syIBz4X6jsXm9cca1mLu+47ZnZm5egF0L0PWL7SLa2myk+gvxhh1O6jGGIwEzDvpfY60Dy28lcJYQWf3fA0uAV8LGbOxfureUO8JMue01ouTxlZdDmIRlk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771861613; c=relaxed/simple; bh=CtKf3PxCKlK/q3R2eNvkCYnd7jg2nQePx8d9gvUyedc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=swGXDfJNuu4tx7gg5jpeo0E5+BGuBEXNLteQiDbjIv/S+78ozQ3QP4djkQ/t1k/EYaCjFRRSVu7SQg7fgu5Lkjmo3HIlKF5lfRgPoFY0LMFsO6EB2oBJJSlRCaghxrSHkrujDarqz8kxMkTGgpL9YLhrjUu0kaO0AQ5BJfJJv/4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=ueZfIcSQ; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="ueZfIcSQ" Received: by smtp.kernel.org (Postfix) with ESMTPSA id A372BC19423; Mon, 23 Feb 2026 15:46:52 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1771861612; bh=CtKf3PxCKlK/q3R2eNvkCYnd7jg2nQePx8d9gvUyedc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=ueZfIcSQG5/WuGKz8ofoqA7qA3M3ydXOmMn47h/i4lyfvrT0V/Os8FG9HL3HFa4Iw 73KTyneV9XZPiQ79KFDiF3s0aWWOpLKcRy0W0VMId21WjOODI4D61DQgDMyRf72p8E oeeS8x3a4o4IGFFS8f+ihc+vqVo0fT5DuzStNLcrhtqUqqLuA6RDFLvHrm3Axd3w+a 2mTbKexdT6+uHGcMGF85GZdab/sgRPy7r6ssFjQg3WjwHfZOX5xre0awZO2f+ALgPu jxpZIyGWW+QSPq95hoQQcEbF47NsW0S9Zqi5VapmhT0hpbCeNVvcLhmAZGzTy2IDp6 rgpOBZpOl6Ypw== From: Yosry Ahmed To: Sean Christopherson Cc: Paolo Bonzini , kvm@vger.kernel.org, linux-kernel@vger.kernel.org, Yosry Ahmed , stable@vger.kernel.org Subject: [PATCH v1 1/4] KVM: nSVM: Always use NextRIP as vmcb02's NextRIP after first L2 VMRUN Date: Mon, 23 Feb 2026 15:46:33 +0000 Message-ID: <20260223154636.116671-2-yosry@kernel.org> X-Mailer: git-send-email 2.53.0.345.g96ddfc5eaa-goog In-Reply-To: <20260223154636.116671-1-yosry@kernel.org> References: <20260223154636.116671-1-yosry@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" For guests with NRIPS disabled, L1 does not provide NextRIP when running an L2 with an injected soft interrupt, instead it advances the current RIP before running it. KVM uses the current RIP as the NextRIP in vmcb02 to emulate a CPU without NRIPS. However, after L2 runs the first time, NextRIP will be updated by the CPU and/or KVM, and the current RIP is no longer the correct value to use in vmcb02. Hence, after save/restore, use the current RIP if and only if a nested run is pending, otherwise use NextRIP. Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET= _NESTED_STATE") CC: stable@vger.kernel.org Signed-off-by: Yosry Ahmed --- arch/x86/kvm/svm/nested.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index de90b104a0dd5..a82e6f0472ca7 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -844,17 +844,24 @@ static void nested_vmcb02_prepare_control(struct vcpu= _svm *svm, vmcb02->control.event_inj_err =3D svm->nested.ctl.event_inj_err; =20 /* - * next_rip is consumed on VMRUN as the return address pushed on the + * NextRIP is consumed on VMRUN as the return address pushed on the * stack for injected soft exceptions/interrupts. If nrips is exposed - * to L1, take it verbatim from vmcb12. If nrips is supported in - * hardware but not exposed to L1, stuff the actual L2 RIP to emulate - * what a nrips=3D0 CPU would do (L1 is responsible for advancing RIP - * prior to injecting the event). + * to L1, take it verbatim from vmcb12. + * + * If nrips is supported in hardware but not exposed to L1, stuff the + * actual L2 RIP to emulate what a nrips=3D0 CPU would do (L1 is + * responsible for advancing RIP prior to injecting the event). This is + * only the case for the first L2 run after VMRUN. After that (e.g. + * during save/restore), NextRIP is updated by the CPU and/or KVM, and + * the value of the L2 RIP from vmcb12 should not be used. */ - if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) - vmcb02->control.next_rip =3D svm->nested.ctl.next_rip; - else if (boot_cpu_has(X86_FEATURE_NRIPS)) - vmcb02->control.next_rip =3D vmcb12_rip; + if (boot_cpu_has(X86_FEATURE_NRIPS)) { + if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || + !svm->nested.nested_run_pending) + vmcb02->control.next_rip =3D svm->nested.ctl.next_rip; + else + vmcb02->control.next_rip =3D vmcb12_rip; + } =20 svm->nmi_l1_to_l2 =3D is_evtinj_nmi(vmcb02->control.event_inj); if (is_evtinj_soft(vmcb02->control.event_inj)) { --=20 2.53.0.345.g96ddfc5eaa-goog From nobody Thu Apr 2 20:28:00 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 664E83016E3; Mon, 23 Feb 2026 15:46:53 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771861613; cv=none; b=T/TXnyE5LGABBa9OYeVwsEvxbnBthEGt1C9EuSbSgPiqCn2ySpEpus7P73psJ3sEqyOQ+ohvOy/Fy6LcQ/1uJWr68IIRJMFrHHmQlk6A/J8i3XfQ0PzhsqWXiWeZLyy4ChkOxvAjdui8r7ryot6hc9tafDrPJ/U5ofUngDi46Ls= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771861613; c=relaxed/simple; bh=Y8Pg+7XWSPNEVqmUOFpomAns4n3+klMSUO/2YgFvnrQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=ie+YlcqQkc8QrmsPn5WP4lgKM0TyvzcNVVDmlkGqKM5oCtimhvlO6wWDZVuVuj7slf8/v4kSbljoGKoh1qNM8w9oFdf4R+uO4P8FNvc2523TtAX2ftQR7mkCnNFywVAwTaiKAstlUljcOr8CuQ3mXKQGKME37WG6EINe/+FH/mk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=qhC9e6r7; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="qhC9e6r7" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 10927C116D0; Mon, 23 Feb 2026 15:46:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1771861613; bh=Y8Pg+7XWSPNEVqmUOFpomAns4n3+klMSUO/2YgFvnrQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=qhC9e6r7/d5ZvG5BB3XR7ZQ1ovydzLa34VsxX3guTF04RxLkyKZLkiyOKHYeonqFX W3u0qR0thD16FAIThP2OqPcYbk7syWeezyRwSXMITLAH239DCIpuqrYQMAg7oEmuOj Hv8oEe3qqCLrg4iwfWVIZ6vz2QVXcXOjmom1yoQMhk4KrOQ0wsLfQDHRLZ+kVKOseZ Fzb//EkilZOfoN3RK+CqEryj3kDTSDgKNDG1cXq8fDm9UFoqYLgel1PvKRXz+qBwF7 GUpWfIkBZKU06tHNzpDgIBZICOzmJEuxYPVsZyri+xRS2NuaeDf0mAkhyPEOpjstqE +5KYBG02s5Cow== From: Yosry Ahmed To: Sean Christopherson Cc: Paolo Bonzini , kvm@vger.kernel.org, linux-kernel@vger.kernel.org, Yosry Ahmed , stable@vger.kernel.org Subject: [PATCH v1 2/4] KVM: nSVM: Delay stuffing L2's current RIP into NextRIP until vCPU run Date: Mon, 23 Feb 2026 15:46:34 +0000 Message-ID: <20260223154636.116671-3-yosry@kernel.org> X-Mailer: git-send-email 2.53.0.345.g96ddfc5eaa-goog In-Reply-To: <20260223154636.116671-1-yosry@kernel.org> References: <20260223154636.116671-1-yosry@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" For guests with NRIPS disabled, L1 does not provide NextRIP when running an L2 with an injected soft interrupt, instead it advances L2's RIP before running it. KVM uses L2's current RIP as the NextRIP in vmcb02 to emulate a CPU without NRIPS. However, in svm_set_nested_state(), the value used for L2's current RIP comes from vmcb02, which is just whatever the vCPU had in vmcb02 before restoring nested state (zero on a freshly created vCPU). Passing the cached RIP value instead (i.e. kvm_rip_read()) would only fix the issue if registers are restored before nested state. Instead, split the logic of setting NextRIP in vmcb02. Handle the 'normal' case of initializing vmcb02's NextRIP using NextRIP from vmcb12 (or KVM_GET_NESTED_STATE's payload) in nested_vmcb02_prepare_control(). Delay the special case of stuffing L2's current RIP into vmcb02's NextRIP until shortly before the vCPU is run, to make sure the most up-to-date value of RIP is used regardless of KVM_SET_REGS and KVM_SET_NESTED_STATE's relative ordering. Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET= _NESTED_STATE") CC: stable@vger.kernel.org Suggested-by: Sean Christopherson Signed-off-by: Yosry Ahmed --- arch/x86/kvm/svm/nested.c | 25 ++++++++----------------- arch/x86/kvm/svm/svm.c | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a82e6f0472ca7..b7c80aeaebab3 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -844,24 +844,15 @@ static void nested_vmcb02_prepare_control(struct vcpu= _svm *svm, vmcb02->control.event_inj_err =3D svm->nested.ctl.event_inj_err; =20 /* - * NextRIP is consumed on VMRUN as the return address pushed on the - * stack for injected soft exceptions/interrupts. If nrips is exposed - * to L1, take it verbatim from vmcb12. - * - * If nrips is supported in hardware but not exposed to L1, stuff the - * actual L2 RIP to emulate what a nrips=3D0 CPU would do (L1 is - * responsible for advancing RIP prior to injecting the event). This is - * only the case for the first L2 run after VMRUN. After that (e.g. - * during save/restore), NextRIP is updated by the CPU and/or KVM, and - * the value of the L2 RIP from vmcb12 should not be used. + * If nrips is exposed to L1, take NextRIP as-is. Otherwise, L1 + * advances L2's RIP before VMRUN instead of using NextRIP. KVM will + * stuff the current RIP as vmcb02's NextRIP before L2 is run. After + * the first run of L2 (e.g. after save+restore), NextRIP is updated by + * the CPU and/or KVM and should be used regardless of L1's support. */ - if (boot_cpu_has(X86_FEATURE_NRIPS)) { - if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || - !svm->nested.nested_run_pending) - vmcb02->control.next_rip =3D svm->nested.ctl.next_rip; - else - vmcb02->control.next_rip =3D vmcb12_rip; - } + if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || + !svm->nested.nested_run_pending) + vmcb02->control.next_rip =3D svm->nested.ctl.next_rip; =20 svm->nmi_l1_to_l2 =3D is_evtinj_nmi(vmcb02->control.event_inj); if (is_evtinj_soft(vmcb02->control.event_inj)) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8f8bc863e2143..e084b9688f556 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1413,6 +1413,24 @@ static void svm_prepare_switch_to_guest(struct kvm_v= cpu *vcpu) sd->bp_spec_reduce_set =3D true; msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); } + + /* + * If nrips is supported in hardware but not exposed to L1, stuff the + * actual L2 RIP to emulate what a nrips=3D0 CPU would do (L1 is + * responsible for advancing RIP prior to injecting the event). Once L2 + * runs after L1 executes VMRUN, NextRIP is updated by the CPU and/or + * KVM, and this is no longer needed. + * + * This is done here (as opposed to when preparing vmcb02) to use the + * most up-to-date value of RIP regardless of the order of restoring + * registers and nested state in the vCPU save+restore path. + */ + if (is_guest_mode(vcpu) && svm->nested.nested_run_pending) { + if (boot_cpu_has(X86_FEATURE_NRIPS) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) + svm->vmcb->control.next_rip =3D kvm_rip_read(vcpu); + } + svm->guest_state_loaded =3D true; } =20 --=20 2.53.0.345.g96ddfc5eaa-goog From nobody Thu Apr 2 20:28:00 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E24CB303C93; Mon, 23 Feb 2026 15:46:53 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771861614; cv=none; b=dogEETzlO9YiazhdD1Vgf+C/c4wIri7ddKac+l2Myasb+tcLwNjtoYDOHCTaDeD0qCMsIcSNEwEFcaO8mNyIyark1Dt/oacRPbDCzng1dcnPgDHtmLFGiGXAlOm0DXgWD0om32d9ktPkKZv2bvXOsZl1qMNA7P1beT1/2xTpK1c= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771861614; c=relaxed/simple; bh=9Z3qvPLjIfA4xmLgP5BOB9tiL0sSsppjvHrWM3gcbrY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=MRafO1//Pb3h/wzRf1biafOXRY0hooS2CH7CZ3pMQnRRj82Wn85F/H3B2Hs2pr8ep3b9iAcQNOlXbt93Ml8zrzoaDZwpwxa6LC85cCqwVJRB9rBvo8wQ6+B2Pyy4uB8lrn8twC10pZZwDklllsHXEo8ie6aGw9No/V5q4pnDjR8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=LPyVv/Ar; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="LPyVv/Ar" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 71B3BC19424; Mon, 23 Feb 2026 15:46:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1771861613; bh=9Z3qvPLjIfA4xmLgP5BOB9tiL0sSsppjvHrWM3gcbrY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=LPyVv/ArCOEdL8W+xcwtlqVcMNFXHAPaJQUzsijgX1PRUZUqOHlY3krVFGIl8sTeT 4fjIEijq2E3w6VBg2ESbQCNafUg78DCTN39Va+NSk1ByXnif7GI0jXZ78Jh1z8Jgi1 HeP6ilFsaO87FWXMZCTVOuTXNymYS9EoIVwPiSQvubwcU8nim3E2Av33cyQvQPHPGg 08+wHPocsijTGFflzaXZQra+xhXVuJjiRjRBp+K7x0eI66JfwypDzAityjB04nVg9+ kkB7pLNoI9MDHJqCF5eLbuqr86LBPG0aZv425g599aqrZakRiTsq0JqmRANGjPOEJh n9FOX6qCBf50Q== From: Yosry Ahmed To: Sean Christopherson Cc: Paolo Bonzini , kvm@vger.kernel.org, linux-kernel@vger.kernel.org, Yosry Ahmed , stable@vger.kernel.org Subject: [PATCH v1 3/4] KVM: nSVM: Delay setting soft IRQ RIP tracking fields until vCPU run Date: Mon, 23 Feb 2026 15:46:35 +0000 Message-ID: <20260223154636.116671-4-yosry@kernel.org> X-Mailer: git-send-email 2.53.0.345.g96ddfc5eaa-goog In-Reply-To: <20260223154636.116671-1-yosry@kernel.org> References: <20260223154636.116671-1-yosry@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" In the save+restore path, when restoring nested state, the values of RIP and CS base passed into nested_vmcb02_prepare_control() are mostly incorrect. They are both pulled from the vmcb02. For CS base, the value is only correct if system regs are restored before nested state. The value of RIP is whatever the vCPU had in vmcb02 before restoring nested state (zero on a freshly created vCPU). Instead, take a similar approach to NextRIP, and delay initializing the RIP tracking fields until shortly before the vCPU is run, to make sure the most up-to-date values of RIP and CS base are used regardless of KVM_SET_SREGS, KVM_SET_REGS, and KVM_SET_NESTED_STATE's relative ordering. Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET= _NESTED_STATE") CC: stable@vger.kernel.org Suggested-by: Sean Christopherson Signed-off-by: Yosry Ahmed --- arch/x86/kvm/svm/nested.c | 17 ++++++++--------- arch/x86/kvm/svm/svm.c | 10 ++++++++++ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index b7c80aeaebab3..0547fd2810a3a 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -741,9 +741,7 @@ static bool is_evtinj_nmi(u32 evtinj) return type =3D=3D SVM_EVTINJ_TYPE_NMI; } =20 -static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, - unsigned long vmcb12_rip, - unsigned long vmcb12_csbase) +static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) { u32 int_ctl_vmcb01_bits =3D V_INTR_MASKING_MASK; u32 int_ctl_vmcb12_bits =3D V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK; @@ -855,14 +853,15 @@ static void nested_vmcb02_prepare_control(struct vcpu= _svm *svm, vmcb02->control.next_rip =3D svm->nested.ctl.next_rip; =20 svm->nmi_l1_to_l2 =3D is_evtinj_nmi(vmcb02->control.event_inj); + + /* + * soft_int_csbase, soft_int_old_rip, and soft_int_next_rip (if L1 + * doesn't have NRIPS) are initialized later, before the vCPU is run. + */ if (is_evtinj_soft(vmcb02->control.event_inj)) { svm->soft_int_injected =3D true; - svm->soft_int_csbase =3D vmcb12_csbase; - svm->soft_int_old_rip =3D vmcb12_rip; if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) svm->soft_int_next_rip =3D svm->nested.ctl.next_rip; - else - svm->soft_int_next_rip =3D vmcb12_rip; } =20 /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */ @@ -960,7 +959,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmc= b12_gpa, nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr); =20 svm_switch_vmcb(svm, &svm->nested.vmcb02); - nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base= ); + nested_vmcb02_prepare_control(svm); nested_vmcb02_prepare_save(svm, vmcb12); =20 ret =3D nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3, @@ -1905,7 +1904,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, nested_copy_vmcb_control_to_cache(svm, ctl); =20 svm_switch_vmcb(svm, &svm->nested.vmcb02); - nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.c= s.base); + nested_vmcb02_prepare_control(svm); =20 /* * While the nested guest CR3 is already checked and set by diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e084b9688f556..37f3b031b3a76 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1424,11 +1424,21 @@ static void svm_prepare_switch_to_guest(struct kvm_= vcpu *vcpu) * This is done here (as opposed to when preparing vmcb02) to use the * most up-to-date value of RIP regardless of the order of restoring * registers and nested state in the vCPU save+restore path. + * + * Simiarly, initialize svm->soft_int_* fields here to use the most + * up-to-date values of RIP and CS base, regardless of restore order. */ if (is_guest_mode(vcpu) && svm->nested.nested_run_pending) { if (boot_cpu_has(X86_FEATURE_NRIPS) && !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) svm->vmcb->control.next_rip =3D kvm_rip_read(vcpu); + + if (svm->soft_int_injected) { + svm->soft_int_csbase =3D svm->vmcb->save.cs.base; + svm->soft_int_old_rip =3D kvm_rip_read(vcpu); + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) + svm->soft_int_next_rip =3D kvm_rip_read(vcpu); + } } =20 svm->guest_state_loaded =3D true; --=20 2.53.0.345.g96ddfc5eaa-goog From nobody Thu Apr 2 20:28:00 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3B6BE307494; Mon, 23 Feb 2026 15:46:54 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771861614; cv=none; b=ftwNZS6g2B/G//BJavJaUzI8i31HEvVd7FDBy9Pg9tX55yZ8LOavdKp8CLQghY8bxI3PFMICDfdh71UvG36zAWEqwQ2+od+FkfGc/pyjve1SKK4eVdJYQanNi+7oJEIQN1AkfELw5TLXa3NlmCAkiCJowtjLArzlQ/Wi6wuN3Cs= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771861614; c=relaxed/simple; bh=agBPf8Vk41yr5hWyg5kXDZN4Wnbv60LdOCC2TE3TpVY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=o2mKLSdfMP1O3SVZ5Fg1hBJpWJE9XNJ2sh0U9aPr/wENfGz7PtKD0PROIC5l1Po/9Ch03oJkYlYBpwFVhlRzzubfioQBNqg+NByhhOWK2vqWsiMOaWBonGNNo4CfolxGGsg14oitQi9WFxR88wDxPNXe9wLo423EWRHPohSh2jk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=VlNB5p5w; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="VlNB5p5w" Received: by smtp.kernel.org (Postfix) with ESMTPSA id D24FAC116D0; Mon, 23 Feb 2026 15:46:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1771861614; bh=agBPf8Vk41yr5hWyg5kXDZN4Wnbv60LdOCC2TE3TpVY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=VlNB5p5wV2BxtlCmZeoTbo4tSAP+zHxtjM3XHR6SCuEbJkZDbkxesTcw0ffXjW5Jq qeTRQ8Gie0Ytat6ylXcB7uoouEnjVRLtxi4/wZdazk/NKqrXtroxMNnAMozlM3rQS8 YU9v/omCXIU6/Lgh9PA6eurNMQZR/hjbxaCyf3PccOCJP7h4HU+TdHnqCsBIjbtGKC B0Tbqd3Jgf+seCxpqSHb94RM8iQlDEG1VTKg98cQtz5R2KUjpfVDFJSqLx3SLClnP+ dJZ3icX7EcXhEICEmCKG6hcPzgolGgKOCLdHyCFUQJB5BCeg8f9MM7/6oJl/+ctf/3 2MoZVX1loS4UA== From: Yosry Ahmed To: Sean Christopherson Cc: Paolo Bonzini , kvm@vger.kernel.org, linux-kernel@vger.kernel.org, Yosry Ahmed Subject: [PATCH v1 4/4] DO NOT MERGE: KVM: selftests: Reproduce nested RIP restore bug Date: Mon, 23 Feb 2026 15:46:36 +0000 Message-ID: <20260223154636.116671-5-yosry@kernel.org> X-Mailer: git-send-email 2.53.0.345.g96ddfc5eaa-goog In-Reply-To: <20260223154636.116671-1-yosry@kernel.org> References: <20260223154636.116671-1-yosry@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Update svm_nested_soft_inject_test such that L1 syncs to userspace before running L2. The test then enables single-stepping and steps through guest code until VMRUN is execute, and saves/restores the VM immediately after (before L2 runs). This reproduces a bug in save/restore where L2's RIP is not used correctly to construct the vmcb02 at the destination. Signed-off-by: Yosry Ahmed --- .../testing/selftests/kvm/lib/x86/processor.c | 3 + .../kvm/x86/svm_nested_soft_inject_test.c | 74 +++++++++++++++---- 2 files changed, 61 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testin= g/selftests/kvm/lib/x86/processor.c index fab18e9be66c9..3e8d516ec8d3f 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -1291,6 +1291,9 @@ void vcpu_load_state(struct kvm_vcpu *vcpu, struct kv= m_x86_state *state) =20 if (state->nested.size) vcpu_nested_state_set(vcpu, &state->nested); + + /* Switch between this and the call above */ + // vcpu_regs_set(vcpu, &state->regs); } =20 void kvm_x86_state_cleanup(struct kvm_x86_state *state) diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c = b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c index 4bd1655f9e6d0..dfefd8eed392a 100644 --- a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c @@ -101,6 +101,7 @@ static void l1_guest_code(struct svm_test_data *svm, ui= nt64_t is_nmi, uint64_t i vmcb->control.next_rip =3D vmcb->save.rip; } =20 + GUEST_SYNC(true); run_guest(vmcb, svm->vmcb_gpa); __GUEST_ASSERT(vmcb->control.exit_code =3D=3D SVM_EXIT_VMMCALL, "Expected VMMCAL #VMEXIT, got '0x%lx', info1 =3D '0x%lx, info2 = =3D '0x%lx'", @@ -131,6 +132,7 @@ static void l1_guest_code(struct svm_test_data *svm, ui= nt64_t is_nmi, uint64_t i /* The return address pushed on stack, skip over UD2 */ vmcb->control.next_rip =3D vmcb->save.rip + 2; =20 + GUEST_SYNC(true); run_guest(vmcb, svm->vmcb_gpa); __GUEST_ASSERT(vmcb->control.exit_code =3D=3D SVM_EXIT_HLT, "Expected HLT #VMEXIT, got '0x%lx', info1 =3D '0x%lx, info2 =3D '= 0x%lx'", @@ -140,6 +142,24 @@ static void l1_guest_code(struct svm_test_data *svm, u= int64_t is_nmi, uint64_t i GUEST_DONE(); } =20 +static struct kvm_vcpu *save_and_restore_vm(struct kvm_vm *vm, struct kvm_= vcpu *vcpu) +{ + struct kvm_x86_state *state =3D vcpu_save_state(vcpu); + + kvm_vm_release(vm); + vcpu =3D vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + kvm_x86_state_cleanup(state); + return vcpu; +} + +static bool is_nested_run_pending(struct kvm_vcpu *vcpu) +{ + struct kvm_x86_state *state =3D vcpu_save_state(vcpu); + + return state->nested.size && (state->nested.flags & KVM_STATE_NESTED_RUN_= PENDING); +} + static void run_test(bool is_nmi) { struct kvm_vcpu *vcpu; @@ -173,22 +193,44 @@ static void run_test(bool is_nmi) memset(&debug, 0, sizeof(debug)); vcpu_guest_debug_set(vcpu, &debug); =20 - struct ucall uc; - - alarm(2); - vcpu_run(vcpu); - alarm(0); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - /* NOT REACHED */ - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + for (;;) { + struct kvm_guest_debug debug; + struct ucall uc; + + alarm(2); + vcpu_run(vcpu); + alarm(0); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + /* + * L1 syncs before calling run_guest(), single-step over + * all instructions until VMRUN, and save+restore right + * after it (before L2 actually runs). + */ + debug.control =3D KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; + vcpu_guest_debug_set(vcpu, &debug); + + do { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); + } while (!is_nested_run_pending(vcpu)); + + memset(&debug, 0, sizeof(debug)); + vcpu_guest_debug_set(vcpu, &debug); + vcpu =3D save_and_restore_vm(vm, vcpu); + break; + + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + /* NOT REACHED */ + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } } done: kvm_vm_free(vm); --=20 2.53.0.345.g96ddfc5eaa-goog