Dynamically allocate the (massive) array of hashed lists used to track
shadow pages, as the array itself is 32KiB, i.e. is an order-3 allocation
all on its own, and is *exactly* an order-3 allocation. Dynamically
allocating the array will allow allocating "struct kvm" using kvmalloc(),
and will also allow deferring allocation of the array until it's actually
needed, i.e. until the first shadow root is allocated.
Opportunistically use kvmalloc() for the hashed lists, as an order-3
allocation is (stating the obvious) less likely to fail than an order-4
allocation, and the overhead of vmalloc() is undesirable given that the
size of the allocation is fixed.
Cc: Vipin Sharma <vipinsh@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/include/asm/kvm_host.h | 4 ++--
arch/x86/kvm/mmu/mmu.c | 23 ++++++++++++++++++++++-
arch/x86/kvm/x86.c | 5 ++++-
3 files changed, 28 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 330cdcbed1a6..9667d6b929ee 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1343,7 +1343,7 @@ struct kvm_arch {
bool has_private_mem;
bool has_protected_state;
bool pre_fault_allowed;
- struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
+ struct hlist_head *mmu_page_hash;
struct list_head active_mmu_pages;
/*
* A list of kvm_mmu_page structs that, if zapped, could possibly be
@@ -2006,7 +2006,7 @@ void kvm_mmu_vendor_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
-void kvm_mmu_init_vm(struct kvm *kvm);
+int kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index cbc84c6abc2e..41da2cb1e3f1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3882,6 +3882,18 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
return r;
}
+static int kvm_mmu_alloc_page_hash(struct kvm *kvm)
+{
+ typeof(kvm->arch.mmu_page_hash) h;
+
+ h = kvcalloc(KVM_NUM_MMU_PAGES, sizeof(*h), GFP_KERNEL_ACCOUNT);
+ if (!h)
+ return -ENOMEM;
+
+ kvm->arch.mmu_page_hash = h;
+ return 0;
+}
+
static int mmu_first_shadow_root_alloc(struct kvm *kvm)
{
struct kvm_memslots *slots;
@@ -6675,13 +6687,19 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
kvm_tdp_mmu_zap_invalidated_roots(kvm, true);
}
-void kvm_mmu_init_vm(struct kvm *kvm)
+int kvm_mmu_init_vm(struct kvm *kvm)
{
+ int r;
+
kvm->arch.shadow_mmio_value = shadow_mmio_value;
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
+ r = kvm_mmu_alloc_page_hash(kvm);
+ if (r)
+ return r;
+
if (tdp_mmu_enabled)
kvm_mmu_init_tdp_mmu(kvm);
@@ -6692,6 +6710,7 @@ void kvm_mmu_init_vm(struct kvm *kvm)
kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache;
kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO;
+ return 0;
}
static void mmu_free_vm_memory_caches(struct kvm *kvm)
@@ -6703,6 +6722,8 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm)
void kvm_mmu_uninit_vm(struct kvm *kvm)
{
+ kvfree(kvm->arch.mmu_page_hash);
+
if (tdp_mmu_enabled)
kvm_mmu_uninit_tdp_mmu(kvm);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f9f798f286ce..d204ba9368f8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12787,7 +12787,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto out;
- kvm_mmu_init_vm(kvm);
+ ret = kvm_mmu_init_vm(kvm);
+ if (ret)
+ goto out_cleanup_page_track;
ret = kvm_x86_call(vm_init)(kvm);
if (ret)
@@ -12840,6 +12842,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
out_uninit_mmu:
kvm_mmu_uninit_vm(kvm);
+out_cleanup_page_track:
kvm_page_track_cleanup(kvm);
out:
return ret;
--
2.49.0.1151.ga128411c76-goog
On 5/23/2025 8:11 AM, Sean Christopherson wrote:
> Dynamically allocate the (massive) array of hashed lists used to track
> shadow pages, as the array itself is 32KiB, i.e. is an order-3 allocation
> all on its own, and is *exactly* an order-3 allocation. Dynamically
> allocating the array will allow allocating "struct kvm" using kvmalloc(),
> and will also allow deferring allocation of the array until it's actually
> needed, i.e. until the first shadow root is allocated.
>
> Opportunistically use kvmalloc() for the hashed lists, as an order-3
> allocation is (stating the obvious) less likely to fail than an order-4
> allocation, and the overhead of vmalloc() is undesirable given that the
> size of the allocation is fixed.
>
> Cc: Vipin Sharma <vipinsh@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
> arch/x86/include/asm/kvm_host.h | 4 ++--
> arch/x86/kvm/mmu/mmu.c | 23 ++++++++++++++++++++++-
> arch/x86/kvm/x86.c | 5 ++++-
> 3 files changed, 28 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 330cdcbed1a6..9667d6b929ee 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1343,7 +1343,7 @@ struct kvm_arch {
> bool has_private_mem;
> bool has_protected_state;
> bool pre_fault_allowed;
> - struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
> + struct hlist_head *mmu_page_hash;
> struct list_head active_mmu_pages;
> /*
> * A list of kvm_mmu_page structs that, if zapped, could possibly be
> @@ -2006,7 +2006,7 @@ void kvm_mmu_vendor_module_exit(void);
>
> void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
> int kvm_mmu_create(struct kvm_vcpu *vcpu);
> -void kvm_mmu_init_vm(struct kvm *kvm);
> +int kvm_mmu_init_vm(struct kvm *kvm);
> void kvm_mmu_uninit_vm(struct kvm *kvm);
>
> void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index cbc84c6abc2e..41da2cb1e3f1 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3882,6 +3882,18 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
> return r;
> }
>
> +static int kvm_mmu_alloc_page_hash(struct kvm *kvm)
> +{
> + typeof(kvm->arch.mmu_page_hash) h;
Out of curiousity, it is uncommon in KVM to use typeof() given that we
know what the type actually is. Is there some specific reason?
anyway, it works.
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
> +
> + h = kvcalloc(KVM_NUM_MMU_PAGES, sizeof(*h), GFP_KERNEL_ACCOUNT);
> + if (!h)
> + return -ENOMEM;
> +
> + kvm->arch.mmu_page_hash = h;
> + return 0;
> +}
> +
> static int mmu_first_shadow_root_alloc(struct kvm *kvm)
> {
> struct kvm_memslots *slots;
> @@ -6675,13 +6687,19 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
> kvm_tdp_mmu_zap_invalidated_roots(kvm, true);
> }
>
> -void kvm_mmu_init_vm(struct kvm *kvm)
> +int kvm_mmu_init_vm(struct kvm *kvm)
> {
> + int r;
> +
> kvm->arch.shadow_mmio_value = shadow_mmio_value;
> INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
> INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
> spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
>
> + r = kvm_mmu_alloc_page_hash(kvm);
> + if (r)
> + return r;
> +
> if (tdp_mmu_enabled)
> kvm_mmu_init_tdp_mmu(kvm);
>
> @@ -6692,6 +6710,7 @@ void kvm_mmu_init_vm(struct kvm *kvm)
>
> kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache;
> kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO;
> + return 0;
> }
>
> static void mmu_free_vm_memory_caches(struct kvm *kvm)
> @@ -6703,6 +6722,8 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm)
>
> void kvm_mmu_uninit_vm(struct kvm *kvm)
> {
> + kvfree(kvm->arch.mmu_page_hash);
> +
> if (tdp_mmu_enabled)
> kvm_mmu_uninit_tdp_mmu(kvm);
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f9f798f286ce..d204ba9368f8 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -12787,7 +12787,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> if (ret)
> goto out;
>
> - kvm_mmu_init_vm(kvm);
> + ret = kvm_mmu_init_vm(kvm);
> + if (ret)
> + goto out_cleanup_page_track;
>
> ret = kvm_x86_call(vm_init)(kvm);
> if (ret)
> @@ -12840,6 +12842,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>
> out_uninit_mmu:
> kvm_mmu_uninit_vm(kvm);
> +out_cleanup_page_track:
> kvm_page_track_cleanup(kvm);
> out:
> return ret;
On Wed, May 28, 2025, Xiaoyao Li wrote:
> On 5/23/2025 8:11 AM, Sean Christopherson wrote:
> > Dynamically allocate the (massive) array of hashed lists used to track
> > shadow pages, as the array itself is 32KiB, i.e. is an order-3 allocation
> > all on its own, and is *exactly* an order-3 allocation. Dynamically
> > allocating the array will allow allocating "struct kvm" using kvmalloc(),
> > and will also allow deferring allocation of the array until it's actually
> > needed, i.e. until the first shadow root is allocated.
> >
> > Opportunistically use kvmalloc() for the hashed lists, as an order-3
> > allocation is (stating the obvious) less likely to fail than an order-4
> > allocation, and the overhead of vmalloc() is undesirable given that the
> > size of the allocation is fixed.
> >
> > Cc: Vipin Sharma <vipinsh@google.com>
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> > arch/x86/include/asm/kvm_host.h | 4 ++--
> > arch/x86/kvm/mmu/mmu.c | 23 ++++++++++++++++++++++-
> > arch/x86/kvm/x86.c | 5 ++++-
> > 3 files changed, 28 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 330cdcbed1a6..9667d6b929ee 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1343,7 +1343,7 @@ struct kvm_arch {
> > bool has_private_mem;
> > bool has_protected_state;
> > bool pre_fault_allowed;
> > - struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
> > + struct hlist_head *mmu_page_hash;
> > struct list_head active_mmu_pages;
> > /*
> > * A list of kvm_mmu_page structs that, if zapped, could possibly be
> > @@ -2006,7 +2006,7 @@ void kvm_mmu_vendor_module_exit(void);
> > void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
> > int kvm_mmu_create(struct kvm_vcpu *vcpu);
> > -void kvm_mmu_init_vm(struct kvm *kvm);
> > +int kvm_mmu_init_vm(struct kvm *kvm);
> > void kvm_mmu_uninit_vm(struct kvm *kvm);
> > void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index cbc84c6abc2e..41da2cb1e3f1 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -3882,6 +3882,18 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
> > return r;
> > }
> > +static int kvm_mmu_alloc_page_hash(struct kvm *kvm)
> > +{
> > + typeof(kvm->arch.mmu_page_hash) h;
>
> Out of curiousity, it is uncommon in KVM to use typeof() given that we know
> what the type actually is. Is there some specific reason?
I'm pretty sure it's a leftover from various experiments. IIRC, I was trying to
do something odd and was having a hard time getting the type right :-)
I'll drop the typeof() in favor of "struct hlist_head *", using typeof here isn't
justified and IMO makes the code a bit harder to read.
On Wed, May 28, 2025, Sean Christopherson wrote:
> On Wed, May 28, 2025, Xiaoyao Li wrote:
> > On 5/23/2025 8:11 AM, Sean Christopherson wrote:
> > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > index cbc84c6abc2e..41da2cb1e3f1 100644
> > > --- a/arch/x86/kvm/mmu/mmu.c
> > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > @@ -3882,6 +3882,18 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
> > > return r;
> > > }
> > > +static int kvm_mmu_alloc_page_hash(struct kvm *kvm)
> > > +{
> > > + typeof(kvm->arch.mmu_page_hash) h;
> >
> > Out of curiousity, it is uncommon in KVM to use typeof() given that we know
> > what the type actually is. Is there some specific reason?
>
> I'm pretty sure it's a leftover from various experiments. IIRC, I was trying to
> do something odd and was having a hard time getting the type right :-)
>
> I'll drop the typeof() in favor of "struct hlist_head *", using typeof here isn't
> justified and IMO makes the code a bit harder to read.
Gah, I forgot to switch to address this when applying. I'll fixup the commit
and force push; it'll only affect this series (hooray for topic branches).
© 2016 - 2025 Red Hat, Inc.