svm_flush_tlb_asid() currently operates on the current VMCB. In
preparation for properly tracking TLB flushes for L1 and L2 ASIDs,
refactor it to work on a given VMCB. All existing callers pass the
current VMCB.
Create a svm_flush_tlb_guest() wrapper to use as the flush_tlb_guest()
callback.
kvm_hv_vcpu_purge_flush_tlb() is only called when the current VMCB is
passed to maintain current behavior.
No functional change intended.
Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
---
arch/x86/kvm/svm/svm.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 08340ae57777b..2108b48ba4959 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3954,7 +3954,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
}
-static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
+static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu, struct kvm_vmcb_info *vmcb)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3963,7 +3963,8 @@ static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
* A TLB flush for the current ASID flushes both "host" and "guest" TLB
* entries, and thus is a superset of Hyper-V's fine grained flushing.
*/
- kvm_hv_vcpu_purge_flush_tlb(vcpu);
+ if (vmcb == svm->current_vmcb)
+ kvm_hv_vcpu_purge_flush_tlb(vcpu);
/*
* Flush only the current ASID even if the TLB flush was invoked via
@@ -3973,14 +3974,15 @@ static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
* VM-Exit (via kvm_mmu_reset_context()).
*/
if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
- svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
+ vmcb->ptr->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
else
- svm->current_vmcb->asid_generation--;
+ vmcb->asid_generation--;
}
static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
{
hpa_t root_tdp = vcpu->arch.mmu->root.hpa;
+ struct vcpu_svm *svm = to_svm(vcpu);
/*
* When running on Hyper-V with EnlightenedNptTlb enabled, explicitly
@@ -3991,11 +3993,13 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
if (svm_hv_is_enlightened_tlb_enabled(vcpu) && VALID_PAGE(root_tdp))
hyperv_flush_guest_mapping(root_tdp);
- svm_flush_tlb_asid(vcpu);
+ svm_flush_tlb_asid(vcpu, svm->current_vmcb);
}
static void svm_flush_tlb_all(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
/*
* When running on Hyper-V with EnlightenedNptTlb enabled, remote TLB
* flushes should be routed to hv_flush_remote_tlbs() without requesting
@@ -4006,7 +4010,7 @@ static void svm_flush_tlb_all(struct kvm_vcpu *vcpu)
if (WARN_ON_ONCE(svm_hv_is_enlightened_tlb_enabled(vcpu)))
hv_flush_remote_tlbs(vcpu->kvm);
- svm_flush_tlb_asid(vcpu);
+ svm_flush_tlb_asid(vcpu, svm->current_vmcb);
}
static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
@@ -4016,6 +4020,13 @@ static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
invlpga(gva, svm->vmcb->control.asid);
}
+static void svm_flush_tlb_guest(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm_flush_tlb_asid(vcpu, svm->current_vmcb);
+}
+
static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -5055,7 +5066,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.flush_tlb_all = svm_flush_tlb_all,
.flush_tlb_current = svm_flush_tlb_current,
.flush_tlb_gva = svm_flush_tlb_gva,
- .flush_tlb_guest = svm_flush_tlb_asid,
+ .flush_tlb_guest = svm_flush_tlb_guest,
.vcpu_pre_run = svm_vcpu_pre_run,
.vcpu_run = svm_vcpu_run,
--
2.48.1.362.g079036d154-goog
On Wed, 2025-02-05 at 18:23 +0000, Yosry Ahmed wrote:
> svm_flush_tlb_asid() currently operates on the current VMCB. In
> preparation for properly tracking TLB flushes for L1 and L2 ASIDs,
> refactor it to work on a given VMCB. All existing callers pass the
> current VMCB.
>
> Create a svm_flush_tlb_guest() wrapper to use as the flush_tlb_guest()
> callback.
>
> kvm_hv_vcpu_purge_flush_tlb() is only called when the current VMCB is
> passed to maintain current behavior.
>
> No functional change intended.
>
> Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
> ---
> arch/x86/kvm/svm/svm.c | 25 ++++++++++++++++++-------
> 1 file changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 08340ae57777b..2108b48ba4959 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3954,7 +3954,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
> svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
> }
>
> -static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
> +static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu, struct kvm_vmcb_info *vmcb)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
>
> @@ -3963,7 +3963,8 @@ static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
> * A TLB flush for the current ASID flushes both "host" and "guest" TLB
> * entries, and thus is a superset of Hyper-V's fine grained flushing.
> */
> - kvm_hv_vcpu_purge_flush_tlb(vcpu);
> + if (vmcb == svm->current_vmcb)
> + kvm_hv_vcpu_purge_flush_tlb(vcpu);
This is hyperv PV feature that should be looked upon very carefully.
To recap,
each vCPU has 2 queues of pending TLB flush requests that target only small range of
memory pages.
One is for L1 and one for L2, because now KVM supports a mode where L2
can ask L0 to do a tlb flush on its behalf, and KVM will figure out to which L1 vCPUs
to send this flush request.
Requests arrive from other vCPUs.
Here we purge the TLB request queue because we flushed a super-set of the requests,
which used to contain both L1 and L2 TLB, but soon that won't be true.
So I think it might make sense to also add vmcb to kvm_hv_vcpu_purge_flush_tlb, and then
depending if it is vmcb01 or vmcb02, purge the correct queue.
I don't know if this is theoretical or actual bug but it is better to be safe IMHO.
>
> /*
> * Flush only the current ASID even if the TLB flush was invoked via
> @@ -3973,14 +3974,15 @@ static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
> * VM-Exit (via kvm_mmu_reset_context()).
> */
> if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
> - svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
> + vmcb->ptr->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
> else
> - svm->current_vmcb->asid_generation--;
> + vmcb->asid_generation--;
> }
>
> static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
> {
> hpa_t root_tdp = vcpu->arch.mmu->root.hpa;
> + struct vcpu_svm *svm = to_svm(vcpu);
>
> /*
> * When running on Hyper-V with EnlightenedNptTlb enabled, explicitly
> @@ -3991,11 +3993,13 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
> if (svm_hv_is_enlightened_tlb_enabled(vcpu) && VALID_PAGE(root_tdp))
> hyperv_flush_guest_mapping(root_tdp);
>
> - svm_flush_tlb_asid(vcpu);
> + svm_flush_tlb_asid(vcpu, svm->current_vmcb);
> }
>
> static void svm_flush_tlb_all(struct kvm_vcpu *vcpu)
> {
> + struct vcpu_svm *svm = to_svm(vcpu);
> +
> /*
> * When running on Hyper-V with EnlightenedNptTlb enabled, remote TLB
> * flushes should be routed to hv_flush_remote_tlbs() without requesting
> @@ -4006,7 +4010,7 @@ static void svm_flush_tlb_all(struct kvm_vcpu *vcpu)
> if (WARN_ON_ONCE(svm_hv_is_enlightened_tlb_enabled(vcpu)))
> hv_flush_remote_tlbs(vcpu->kvm);
>
> - svm_flush_tlb_asid(vcpu);
> + svm_flush_tlb_asid(vcpu, svm->current_vmcb);
> }
>
> static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
> @@ -4016,6 +4020,13 @@ static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
> invlpga(gva, svm->vmcb->control.asid);
> }
>
> +static void svm_flush_tlb_guest(struct kvm_vcpu *vcpu)
> +{
> + struct vcpu_svm *svm = to_svm(vcpu);
> +
> + svm_flush_tlb_asid(vcpu, svm->current_vmcb);
> +}
> +
Small nitpick: I think that this change is still unrelated and thus
probably should go to a separate patch.
Best regards,
Maxim Levitsky
> static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> @@ -5055,7 +5066,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> .flush_tlb_all = svm_flush_tlb_all,
> .flush_tlb_current = svm_flush_tlb_current,
> .flush_tlb_gva = svm_flush_tlb_gva,
> - .flush_tlb_guest = svm_flush_tlb_asid,
> + .flush_tlb_guest = svm_flush_tlb_guest,
>
> .vcpu_pre_run = svm_vcpu_pre_run,
> .vcpu_run = svm_vcpu_run,
On Fri, Feb 28, 2025 at 08:29:34PM -0500, Maxim Levitsky wrote:
> On Wed, 2025-02-05 at 18:23 +0000, Yosry Ahmed wrote:
> > svm_flush_tlb_asid() currently operates on the current VMCB. In
> > preparation for properly tracking TLB flushes for L1 and L2 ASIDs,
> > refactor it to work on a given VMCB. All existing callers pass the
> > current VMCB.
> >
> > Create a svm_flush_tlb_guest() wrapper to use as the flush_tlb_guest()
> > callback.
> >
> > kvm_hv_vcpu_purge_flush_tlb() is only called when the current VMCB is
> > passed to maintain current behavior.
> >
> > No functional change intended.
> >
> > Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
> > ---
> > arch/x86/kvm/svm/svm.c | 25 ++++++++++++++++++-------
> > 1 file changed, 18 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > index 08340ae57777b..2108b48ba4959 100644
> > --- a/arch/x86/kvm/svm/svm.c
> > +++ b/arch/x86/kvm/svm/svm.c
> > @@ -3954,7 +3954,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
> > svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
> > }
> >
> > -static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
> > +static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu, struct kvm_vmcb_info *vmcb)
> > {
> > struct vcpu_svm *svm = to_svm(vcpu);
> >
> > @@ -3963,7 +3963,8 @@ static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
> > * A TLB flush for the current ASID flushes both "host" and "guest" TLB
> > * entries, and thus is a superset of Hyper-V's fine grained flushing.
> > */
> > - kvm_hv_vcpu_purge_flush_tlb(vcpu);
> > + if (vmcb == svm->current_vmcb)
> > + kvm_hv_vcpu_purge_flush_tlb(vcpu);
>
> This is hyperv PV feature that should be looked upon very carefully.
>
> To recap,
> each vCPU has 2 queues of pending TLB flush requests that target only small range of
> memory pages.
Thanks for pointing this out, I missed this.
>
> One is for L1 and one for L2, because now KVM supports a mode where L2
> can ask L0 to do a tlb flush on its behalf, and KVM will figure out to which L1 vCPUs
> to send this flush request.
>
> Requests arrive from other vCPUs.
>
> Here we purge the TLB request queue because we flushed a super-set of the requests,
> which used to contain both L1 and L2 TLB, but soon that won't be true.
>
> So I think it might make sense to also add vmcb to kvm_hv_vcpu_purge_flush_tlb, and then
> depending if it is vmcb01 or vmcb02, purge the correct queue.
> I don't know if this is theoretical or actual bug but it is better to be safe IMHO.
But I think we are already purging the right queue here. We purge the
TLB flush requests only if we are flushing the current VMCB. Within
kvm_hv_vcpu_purge_flush_tlb(), we choose the queue based on
is_guest_mode(vcpu).
svm_flush_tlb_asid() is called when servicing a TLB flush request, at
which point IIUC the current VMCB and whether the vCPU is in guest mode
should be in sync. So we will be correctly purging the L1 or L2 queue
based on the current VMCB.
That being said, it's a bit confusing that svm_flush_tlb_asid() uses the
VMCB to differentiate L1 and L2 ,while kvm_hv_vcpu_purge_flush_tlb()
uses is_guest_mode(). We also miss the opportunity to purge both queues
when called from svm_flush_tlb_all().
However, we cannot pass the VMCB to kvm_hv_vcpu_purge_flush_tlb() as it
is also called from common code. So I think we can make
kvm_hv_vcpu_purge_flush_tlb() take is_guest_mode as a parameter and pass
it here based on which VMCB is passed in.
WDYT?
On Mon, 2025-03-03 at 21:58 +0000, Yosry Ahmed wrote:
> On Fri, Feb 28, 2025 at 08:29:34PM -0500, Maxim Levitsky wrote:
> > On Wed, 2025-02-05 at 18:23 +0000, Yosry Ahmed wrote:
> > > svm_flush_tlb_asid() currently operates on the current VMCB. In
> > > preparation for properly tracking TLB flushes for L1 and L2 ASIDs,
> > > refactor it to work on a given VMCB. All existing callers pass the
> > > current VMCB.
> > >
> > > Create a svm_flush_tlb_guest() wrapper to use as the flush_tlb_guest()
> > > callback.
> > >
> > > kvm_hv_vcpu_purge_flush_tlb() is only called when the current VMCB is
> > > passed to maintain current behavior.
> > >
> > > No functional change intended.
> > >
> > > Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
> > > ---
> > > arch/x86/kvm/svm/svm.c | 25 ++++++++++++++++++-------
> > > 1 file changed, 18 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > > index 08340ae57777b..2108b48ba4959 100644
> > > --- a/arch/x86/kvm/svm/svm.c
> > > +++ b/arch/x86/kvm/svm/svm.c
> > > @@ -3954,7 +3954,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
> > > svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
> > > }
> > >
> > > -static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
> > > +static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu, struct kvm_vmcb_info *vmcb)
> > > {
> > > struct vcpu_svm *svm = to_svm(vcpu);
> > >
> > > @@ -3963,7 +3963,8 @@ static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
> > > * A TLB flush for the current ASID flushes both "host" and "guest" TLB
> > > * entries, and thus is a superset of Hyper-V's fine grained flushing.
> > > */
> > > - kvm_hv_vcpu_purge_flush_tlb(vcpu);
> > > + if (vmcb == svm->current_vmcb)
> > > + kvm_hv_vcpu_purge_flush_tlb(vcpu);
> >
> > This is hyperv PV feature that should be looked upon very carefully.
> >
> > To recap,
> > each vCPU has 2 queues of pending TLB flush requests that target only small range of
> > memory pages.
>
> Thanks for pointing this out, I missed this.
>
> > One is for L1 and one for L2, because now KVM supports a mode where L2
> > can ask L0 to do a tlb flush on its behalf, and KVM will figure out to which L1 vCPUs
> > to send this flush request.
> >
> > Requests arrive from other vCPUs.
> >
> > Here we purge the TLB request queue because we flushed a super-set of the requests,
> > which used to contain both L1 and L2 TLB, but soon that won't be true.
> >
> > So I think it might make sense to also add vmcb to kvm_hv_vcpu_purge_flush_tlb, and then
> > depending if it is vmcb01 or vmcb02, purge the correct queue.
> > I don't know if this is theoretical or actual bug but it is better to be safe IMHO.
>
> But I think we are already purging the right queue here. We purge the
> TLB flush requests only if we are flushing the current VMCB. Within
> kvm_hv_vcpu_purge_flush_tlb(), we choose the queue based on
> is_guest_mode(vcpu).
>
> svm_flush_tlb_asid() is called when servicing a TLB flush request, at
> which point IIUC the current VMCB and whether the vCPU is in guest mode
> should be in sync. So we will be correctly purging the L1 or L2 queue
> based on the current VMCB.
Yes, I also think so, but to harden this code from a potential bug IMHO
it makes sense to ensure that svm_flush_tlb_asid works only on a given
vmcb without any hidden assumptions.
>
> That being said, it's a bit confusing that svm_flush_tlb_asid() uses the
> VMCB to differentiate L1 and L2 ,while kvm_hv_vcpu_purge_flush_tlb()
> uses is_guest_mode(). We also miss the opportunity to purge both queues
> when called from svm_flush_tlb_all().
Yes, I noticed that too.
>
> However, we cannot pass the VMCB to kvm_hv_vcpu_purge_flush_tlb() as it
> is also called from common code. So I think we can make
> kvm_hv_vcpu_purge_flush_tlb() take is_guest_mode as a parameter and pass
> it here based on which VMCB is passed in.
>
> WDYT?
>
Looking at this again, I see that kvm_hv_vcpu_purge_flush_tlb() can't really work
on a vmcb, so maybe the better solution is to remove the call to
kvm_hv_vcpu_purge_flush_tlb() from svm_flush_tlb_asid_vmcb at all
and instead let the caller call both svm_flush_tlb_asid() and kvm_hv_vcpu_purge_flush_tlb()?
This might lead to some code duplication but this will put emphasis that svm_flush_tlb_asid_vmcb
can work on any vmcb regardless of which one is loaded and such.
As long as it works though, I won't strongly object to whatever code that works.
Best regards,
Maxim Levitsky
© 2016 - 2026 Red Hat, Inc.