Before the separation of the CPU and the MMU role, CR0.PG was not
available in the base MMU role, because two-dimensional paging always
used direct=1 in the MMU role. However, now that the raw role is
snapshotted in mmu->cpu_role, CR0.PG *can* be found (though inverted)
as !cpu_role.base.direct. There is no need to store it again in union
kvm_mmu_extended_role; instead, write an is_cr0_pg accessor by hand that
takes care of the inversion.
Likewise, CR4.PAE is now always present in the CPU role as
!cpu_role.base.has_4_byte_gpte. The inversion makes certain tests on
the MMU role easier, and is easily hidden by the is_cr4_pae accessor
when operating on the CPU role.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 2 --
arch/x86/kvm/mmu/mmu.c | 14 ++++++++++----
2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6bc5550ae530..52ceeadbed28 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -367,8 +367,6 @@ union kvm_mmu_extended_role {
struct {
unsigned int valid:1;
unsigned int execonly:1;
- unsigned int cr0_pg:1;
- unsigned int cr4_pae:1;
unsigned int cr4_pse:1;
unsigned int cr4_pke:1;
unsigned int cr4_smap:1;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 483a3761db81..cf8a41675a79 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -224,16 +224,24 @@ static inline bool __maybe_unused is_##reg##_##name(struct kvm_mmu *mmu) \
{ \
return !!(mmu->cpu_role. base_or_ext . reg##_##name); \
}
-BUILD_MMU_ROLE_ACCESSOR(ext, cr0, pg);
BUILD_MMU_ROLE_ACCESSOR(base, cr0, wp);
BUILD_MMU_ROLE_ACCESSOR(ext, cr4, pse);
-BUILD_MMU_ROLE_ACCESSOR(ext, cr4, pae);
BUILD_MMU_ROLE_ACCESSOR(ext, cr4, smep);
BUILD_MMU_ROLE_ACCESSOR(ext, cr4, smap);
BUILD_MMU_ROLE_ACCESSOR(ext, cr4, pke);
BUILD_MMU_ROLE_ACCESSOR(ext, cr4, la57);
BUILD_MMU_ROLE_ACCESSOR(base, efer, nx);
+static inline bool is_cr0_pg(struct kvm_mmu *mmu)
+{
+ return !mmu->cpu_role.base.direct;
+}
+
+static inline bool is_cr4_pae(struct kvm_mmu *mmu)
+{
+ return !mmu->cpu_role.base.has_4_byte_gpte;
+}
+
static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
{
struct kvm_mmu_role_regs regs = {
@@ -4712,8 +4720,6 @@ kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs)
else
role.base.level = PT32_ROOT_LEVEL;
- role.ext.cr0_pg = 1;
- role.ext.cr4_pae = ____is_cr4_pae(regs);
role.ext.cr4_smep = ____is_cr4_smep(regs);
role.ext.cr4_smap = ____is_cr4_smap(regs);
role.ext.cr4_pse = ____is_cr4_pse(regs);
--
2.31.1
On Thu, Apr 14, 2022, Paolo Bonzini wrote:
> Before the separation of the CPU and the MMU role, CR0.PG was not
> available in the base MMU role, because two-dimensional paging always
> used direct=1 in the MMU role. However, now that the raw role is
> snapshotted in mmu->cpu_role, CR0.PG *can* be found (though inverted)
> as !cpu_role.base.direct. There is no need to store it again in union
> kvm_mmu_extended_role; instead, write an is_cr0_pg accessor by hand that
> takes care of the inversion.
>
> Likewise, CR4.PAE is now always present in the CPU role as
> !cpu_role.base.has_4_byte_gpte. The inversion makes certain tests on
> the MMU role easier, and is easily hidden by the is_cr4_pae accessor
> when operating on the CPU role.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> arch/x86/include/asm/kvm_host.h | 2 --
> arch/x86/kvm/mmu/mmu.c | 14 ++++++++++----
> 2 files changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6bc5550ae530..52ceeadbed28 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -367,8 +367,6 @@ union kvm_mmu_extended_role {
> struct {
> unsigned int valid:1;
> unsigned int execonly:1;
> - unsigned int cr0_pg:1;
> - unsigned int cr4_pae:1;
> unsigned int cr4_pse:1;
> unsigned int cr4_pke:1;
> unsigned int cr4_smap:1;
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 483a3761db81..cf8a41675a79 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -224,16 +224,24 @@ static inline bool __maybe_unused is_##reg##_##name(struct kvm_mmu *mmu) \
> { \
> return !!(mmu->cpu_role. base_or_ext . reg##_##name); \
> }
> -BUILD_MMU_ROLE_ACCESSOR(ext, cr0, pg);
> BUILD_MMU_ROLE_ACCESSOR(base, cr0, wp);
> BUILD_MMU_ROLE_ACCESSOR(ext, cr4, pse);
> -BUILD_MMU_ROLE_ACCESSOR(ext, cr4, pae);
> BUILD_MMU_ROLE_ACCESSOR(ext, cr4, smep);
> BUILD_MMU_ROLE_ACCESSOR(ext, cr4, smap);
> BUILD_MMU_ROLE_ACCESSOR(ext, cr4, pke);
> BUILD_MMU_ROLE_ACCESSOR(ext, cr4, la57);
> BUILD_MMU_ROLE_ACCESSOR(base, efer, nx);
>
> +static inline bool is_cr0_pg(struct kvm_mmu *mmu)
> +{
> + return !mmu->cpu_role.base.direct;
> +}
> +
> +static inline bool is_cr4_pae(struct kvm_mmu *mmu)
> +{
> + return !mmu->cpu_role.base.has_4_byte_gpte;
If it's not too late for fixup, this should be:
return is_cr0_pg(mmu) && !mmu->cpu_role.base.has_4_byte_gpte;
because has_4_byte_gpte will also be false when paging is disabled. The current
code works because the only users check is_cr0_pg() before hand, but IMO this is
unnecessarily dangerous to leave lying around (and the previous code set cr4_pae
iff cr0_pg=1).
If it's too late for fixup...
--
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 9 May 2022 17:13:39 -0700
Subject: [PATCH] KVM: x86/mmu: Return true from is_cr4_pae() iff CR0.PG is set
Condition is_cr4_pae() on is_cr0_pg() in addition to the !4-byte gPTE
check. From the MMU's perspective, PAE is disabling if paging is
disabled. The current code works because all callers check is_cr0_pg()
before invoking is_cr4_pae(), but relying on callers to maintain that
behavior is unnecessarily risky.
Fixes: faf729621c96 ("KVM: x86/mmu: remove redundant bits from extended role")
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/mmu/mmu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 909372762363..d1c20170a553 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -240,7 +240,7 @@ static inline bool is_cr0_pg(struct kvm_mmu *mmu)
static inline bool is_cr4_pae(struct kvm_mmu *mmu)
{
- return !mmu->cpu_role.base.has_4_byte_gpte;
+ return is_cr0_pg(mmu) && !mmu->cpu_role.base.has_4_byte_gpte;
}
static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
base-commit: 2764011106d0436cb44702cfb0981339d68c3509
--
On 5/10/22 02:20, Sean Christopherson wrote:
> --
> From: Sean Christopherson<seanjc@google.com>
> Date: Mon, 9 May 2022 17:13:39 -0700
> Subject: [PATCH] KVM: x86/mmu: Return true from is_cr4_pae() iff CR0.PG is set
>
> Condition is_cr4_pae() on is_cr0_pg() in addition to the !4-byte gPTE
> check. From the MMU's perspective, PAE is disabling if paging is
> disabled. The current code works because all callers check is_cr0_pg()
> before invoking is_cr4_pae(), but relying on callers to maintain that
> behavior is unnecessarily risky.
>
> Fixes: faf729621c96 ("KVM: x86/mmu: remove redundant bits from extended role")
> Signed-off-by: Sean Christopherson<seanjc@google.com>
> ---
> arch/x86/kvm/mmu/mmu.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 909372762363..d1c20170a553 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -240,7 +240,7 @@ static inline bool is_cr0_pg(struct kvm_mmu *mmu)
>
> static inline bool is_cr4_pae(struct kvm_mmu *mmu)
> {
> - return !mmu->cpu_role.base.has_4_byte_gpte;
> + return is_cr0_pg(mmu) && !mmu->cpu_role.base.has_4_byte_gpte;
> }
>
> static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
Hmm, thinking more about it this is not needed for two kind of opposite
reasons:
* if is_cr4_pae() really were to represent the raw CR4.PAE value, this
is incorrect and it should be up to the callers to check is_cr0_pg()
* if is_cr4_pae() instead represents 8-byte page table entries, then it
does even before this patch, because of the following logic in
kvm_calc_cpu_role():
if (!____is_cr0_pg(regs)) {
role.base.direct = 1;
return role;
}
...
role.base.has_4_byte_gpte = !____is_cr4_pae(regs);
So whatever meaning we give to is_cr4_pae(), there is no need for the
adjustment.
Paolo
On Thu, May 12, 2022, Paolo Bonzini wrote:
> On 5/10/22 02:20, Sean Christopherson wrote:
> > --
> > From: Sean Christopherson<seanjc@google.com>
> > Date: Mon, 9 May 2022 17:13:39 -0700
> > Subject: [PATCH] KVM: x86/mmu: Return true from is_cr4_pae() iff CR0.PG is set
> >
> > Condition is_cr4_pae() on is_cr0_pg() in addition to the !4-byte gPTE
> > check. From the MMU's perspective, PAE is disabling if paging is
> > disabled. The current code works because all callers check is_cr0_pg()
> > before invoking is_cr4_pae(), but relying on callers to maintain that
> > behavior is unnecessarily risky.
> >
> > Fixes: faf729621c96 ("KVM: x86/mmu: remove redundant bits from extended role")
> > Signed-off-by: Sean Christopherson<seanjc@google.com>
> > ---
> > arch/x86/kvm/mmu/mmu.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index 909372762363..d1c20170a553 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -240,7 +240,7 @@ static inline bool is_cr0_pg(struct kvm_mmu *mmu)
> >
> > static inline bool is_cr4_pae(struct kvm_mmu *mmu)
> > {
> > - return !mmu->cpu_role.base.has_4_byte_gpte;
> > + return is_cr0_pg(mmu) && !mmu->cpu_role.base.has_4_byte_gpte;
> > }
> >
> > static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
>
> Hmm, thinking more about it this is not needed for two kind of opposite
> reasons:
>
> * if is_cr4_pae() really were to represent the raw CR4.PAE value, this is
> incorrect and it should be up to the callers to check is_cr0_pg()
>
> * if is_cr4_pae() instead represents 8-byte page table entries, then it does
> even before this patch, because of the following logic in
> kvm_calc_cpu_role():
>
> if (!____is_cr0_pg(regs)) {
> role.base.direct = 1;
> return role;
> }
> ...
> role.base.has_4_byte_gpte = !____is_cr4_pae(regs);
>
>
> So whatever meaning we give to is_cr4_pae(), there is no need for the
> adjustment.
I disagree, because is_cr4_pae() doesn't represent either of those things. It
represents the effective (not raw) CR4.PAE from the MMU's perspective. If you
want it to represent 8-byte gPTEs, that's fine, but then please name the helper
accordingly, because is_cr4_pae() is flat out wrong if CR0.PG=0 && CR4.PAE=0.
On 5/12/22 16:18, Sean Christopherson wrote:
> On Thu, May 12, 2022, Paolo Bonzini wrote:
>> On 5/10/22 02:20, Sean Christopherson wrote:
>>> --
>>> From: Sean Christopherson<seanjc@google.com>
>>> Date: Mon, 9 May 2022 17:13:39 -0700
>>> Subject: [PATCH] KVM: x86/mmu: Return true from is_cr4_pae() iff CR0.PG is set
>>>
>>> Condition is_cr4_pae() on is_cr0_pg() in addition to the !4-byte gPTE
>>> check. From the MMU's perspective, PAE is disabling if paging is
>>> disabled. The current code works because all callers check is_cr0_pg()
>>> before invoking is_cr4_pae(), but relying on callers to maintain that
>>> behavior is unnecessarily risky.
>>>
>>> Fixes: faf729621c96 ("KVM: x86/mmu: remove redundant bits from extended role")
>>> Signed-off-by: Sean Christopherson<seanjc@google.com>
>>> ---
>>> arch/x86/kvm/mmu/mmu.c | 2 +-
>>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
>>> index 909372762363..d1c20170a553 100644
>>> --- a/arch/x86/kvm/mmu/mmu.c
>>> +++ b/arch/x86/kvm/mmu/mmu.c
>>> @@ -240,7 +240,7 @@ static inline bool is_cr0_pg(struct kvm_mmu *mmu)
>>>
>>> static inline bool is_cr4_pae(struct kvm_mmu *mmu)
>>> {
>>> - return !mmu->cpu_role.base.has_4_byte_gpte;
>>> + return is_cr0_pg(mmu) && !mmu->cpu_role.base.has_4_byte_gpte;
>>> }
>>>
>>> static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
>>
>> Hmm, thinking more about it this is not needed for two kind of opposite
>> reasons:
>>
>> * if is_cr4_pae() really were to represent the raw CR4.PAE value, this is
>> incorrect and it should be up to the callers to check is_cr0_pg()
>>
>> * if is_cr4_pae() instead represents 8-byte page table entries, then it does
>> even before this patch, because of the following logic in
>> kvm_calc_cpu_role():
>>
>> if (!____is_cr0_pg(regs)) {
>> role.base.direct = 1;
>> return role;
>> }
>> ...
>> role.base.has_4_byte_gpte = !____is_cr4_pae(regs);
>>
>>
>> So whatever meaning we give to is_cr4_pae(), there is no need for the
>> adjustment.
>
> I disagree, because is_cr4_pae() doesn't represent either of those things. It
> represents the effective (not raw) CR4.PAE from the MMU's perspective.
Doh, you're right that has_4_byte_gpte is actually 0 if CR0.PG=0.
Swapping stuff back is hard.
What do you think about a WARN_ON_ONCE(!is_cr0_pg(mmu))?
Paolo
On Thu, May 12, 2022, Paolo Bonzini wrote:
> On 5/12/22 16:18, Sean Christopherson wrote:
> > On Thu, May 12, 2022, Paolo Bonzini wrote:
> > > On 5/10/22 02:20, Sean Christopherson wrote:
> > > > --
> > > > From: Sean Christopherson<seanjc@google.com>
> > > > Date: Mon, 9 May 2022 17:13:39 -0700
> > > > Subject: [PATCH] KVM: x86/mmu: Return true from is_cr4_pae() iff CR0.PG is set
> > > >
> > > > Condition is_cr4_pae() on is_cr0_pg() in addition to the !4-byte gPTE
> > > > check. From the MMU's perspective, PAE is disabling if paging is
> > > > disabled. The current code works because all callers check is_cr0_pg()
> > > > before invoking is_cr4_pae(), but relying on callers to maintain that
> > > > behavior is unnecessarily risky.
> > > >
> > > > Fixes: faf729621c96 ("KVM: x86/mmu: remove redundant bits from extended role")
> > > > Signed-off-by: Sean Christopherson<seanjc@google.com>
> > > > ---
> > > > arch/x86/kvm/mmu/mmu.c | 2 +-
> > > > 1 file changed, 1 insertion(+), 1 deletion(-)
> > > >
> > > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > > index 909372762363..d1c20170a553 100644
> > > > --- a/arch/x86/kvm/mmu/mmu.c
> > > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > > @@ -240,7 +240,7 @@ static inline bool is_cr0_pg(struct kvm_mmu *mmu)
> > > >
> > > > static inline bool is_cr4_pae(struct kvm_mmu *mmu)
> > > > {
> > > > - return !mmu->cpu_role.base.has_4_byte_gpte;
> > > > + return is_cr0_pg(mmu) && !mmu->cpu_role.base.has_4_byte_gpte;
> > > > }
> > > >
> > > > static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
> > >
> > > Hmm, thinking more about it this is not needed for two kind of opposite
> > > reasons:
> > >
> > > * if is_cr4_pae() really were to represent the raw CR4.PAE value, this is
> > > incorrect and it should be up to the callers to check is_cr0_pg()
> > >
> > > * if is_cr4_pae() instead represents 8-byte page table entries, then it does
> > > even before this patch, because of the following logic in
> > > kvm_calc_cpu_role():
> > >
> > > if (!____is_cr0_pg(regs)) {
> > > role.base.direct = 1;
> > > return role;
> > > }
> > > ...
> > > role.base.has_4_byte_gpte = !____is_cr4_pae(regs);
> > >
> > >
> > > So whatever meaning we give to is_cr4_pae(), there is no need for the
> > > adjustment.
> >
> > I disagree, because is_cr4_pae() doesn't represent either of those things. It
> > represents the effective (not raw) CR4.PAE from the MMU's perspective.
>
> Doh, you're right that has_4_byte_gpte is actually 0 if CR0.PG=0. Swapping
> stuff back is hard.
>
> What do you think about a WARN_ON_ONCE(!is_cr0_pg(mmu))?
Why bother? WARN and continue would be rather silly as we'd knowingly let KVM
do something wrong for no benefit. And this
return !WARN_ON_ONCE(!is_cr0_pg(mmu)) && !role.base.has_4_byte_gpte;
feels wrong because there's nothing fundamentally broke with calling is_cr4_pae()
without first checking CR0.PG.
If you really want to avoid the is_cr0_pg() check, why not just use has_4_byte_gpte
directly? Logically I think that's easy enough to follow, e.g. 64 bits == 8 bytes,
32 bits == 4 bytes. We can always revisit the need for is_cr4_pae() if the MMU
needs to identify PAE paging for some reason, e.g. for PDPTR awareness.
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 909372762363..b05190027e20 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -238,11 +238,6 @@ static inline bool is_cr0_pg(struct kvm_mmu *mmu)
return mmu->cpu_role.base.level > 0;
}
-static inline bool is_cr4_pae(struct kvm_mmu *mmu)
-{
- return !mmu->cpu_role.base.has_4_byte_gpte;
-}
-
static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
{
struct kvm_mmu_role_regs regs = {
@@ -4855,7 +4850,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu,
if (!is_cr0_pg(context))
context->gva_to_gpa = nonpaging_gva_to_gpa;
- else if (is_cr4_pae(context))
+ else if (!context->cpu_role.base.has_4_byte_gpte)
context->gva_to_gpa = paging64_gva_to_gpa;
else
context->gva_to_gpa = paging32_gva_to_gpa;
@@ -4877,7 +4872,7 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
if (!is_cr0_pg(context))
nonpaging_init_context(context);
- else if (is_cr4_pae(context))
+ else if (!context->cpu_role.base.has_4_byte_gpte)
paging64_init_context(context);
else
paging32_init_context(context);
On 5/10/22 02:20, Sean Christopherson wrote: > If it's not too late for fixup, this should be: > > return is_cr0_pg(mmu) && !mmu->cpu_role.base.has_4_byte_gpte; It's not, thanks! Paolo
On 4/14/22 09:39, Paolo Bonzini wrote:
> Before the separation of the CPU and the MMU role, CR0.PG was not
> available in the base MMU role, because two-dimensional paging always
> used direct=1 in the MMU role. However, now that the raw role is
> snapshotted in mmu->cpu_role, CR0.PG *can* be found (though inverted)
> as !cpu_role.base.direct. There is no need to store it again in union
> kvm_mmu_extended_role; instead, write an is_cr0_pg accessor by hand that
> takes care of the inversion.
>
> Likewise, CR4.PAE is now always present in the CPU role as
> !cpu_role.base.has_4_byte_gpte. The inversion makes certain tests on
> the MMU role easier, and is easily hidden by the is_cr4_pae accessor
> when operating on the CPU role.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Better:
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index cf8a41675a79..2a9b589192c3 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -234,7 +234,7 @@ BUILD_MMU_ROLE_ACCESSOR(base, efer, nx);
static inline bool is_cr0_pg(struct kvm_mmu *mmu)
{
- return !mmu->cpu_role.base.direct;
+ return mmu->cpu_role.base.level > 0;
}
static inline bool is_cr4_pae(struct kvm_mmu *mmu)
given that the future of the direct bit is unclear.
Paolo
© 2016 - 2026 Red Hat, Inc.