KVM: SVM: Rework ASID management

[RFC PATCH 01/24] KVM: VMX: Generalize VPID allocation to be vendor-neutral

Posted by Yosry Ahmed 10 months, 2 weeks ago

Generalize the VMX VPID allocation code and make move it to common code
in preparation for sharing with SVM. Create a generic struct
kvm_tlb_tags, representing a factory for VPIDs (or ASIDs later), and use
one for VPIDs.

Most of the functionality remains the same, with the following
differences:
- The enable_vpid checks are moved to the callers for allocate_vpid()
  and free_vpid(), as they are specific to VMX.
- The bitmap allocation is now dynamic (which will be required for SVM),
  so it is initialized and cleaned up in vmx_hardware_{setup/unsetup}().
- The range of valid TLB tags is expressed in terms of min/max instead
  of the number of tags to support SVM use cases.

Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
---
 arch/x86/kvm/vmx/nested.c |  4 +--
 arch/x86/kvm/vmx/vmx.c    | 38 +++++--------------------
 arch/x86/kvm/vmx/vmx.h    |  4 +--
 arch/x86/kvm/x86.c        | 58 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.h        | 13 +++++++++
 5 files changed, 82 insertions(+), 35 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index d06e50d9c0e79..b017bd2eb2382 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -343,7 +343,7 @@ static void free_nested(struct kvm_vcpu *vcpu)
 	vmx->nested.vmxon = false;
 	vmx->nested.smm.vmxon = false;
 	vmx->nested.vmxon_ptr = INVALID_GPA;
-	free_vpid(vmx->nested.vpid02);
+	kvm_tlb_tags_free(&vmx_vpids, vmx->nested.vpid02);
 	vmx->nested.posted_intr_nv = -1;
 	vmx->nested.current_vmptr = INVALID_GPA;
 	if (enable_shadow_vmcs) {
@@ -5333,7 +5333,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 		     HRTIMER_MODE_ABS_PINNED);
 	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
 
-	vmx->nested.vpid02 = allocate_vpid();
+	vmx->nested.vpid02 = enable_vpid ? kvm_tlb_tags_alloc(&vmx_vpids) : 0;
 
 	vmx->nested.vmcs02_initialized = false;
 	vmx->nested.vmxon = true;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b70ed72c1783d..f7ce75842fa26 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -496,8 +496,7 @@ DEFINE_PER_CPU(struct vmcs *, current_vmcs);
  */
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 
-static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
-static DEFINE_SPINLOCK(vmx_vpid_lock);
+struct kvm_tlb_tags vmx_vpids;
 
 struct vmcs_config vmcs_config __ro_after_init;
 struct vmx_capability vmx_capability __ro_after_init;
@@ -3972,31 +3971,6 @@ static void seg_setup(int seg)
 	vmcs_write32(sf->ar_bytes, ar);
 }
 
-int allocate_vpid(void)
-{
-	int vpid;
-
-	if (!enable_vpid)
-		return 0;
-	spin_lock(&vmx_vpid_lock);
-	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
-	if (vpid < VMX_NR_VPIDS)
-		__set_bit(vpid, vmx_vpid_bitmap);
-	else
-		vpid = 0;
-	spin_unlock(&vmx_vpid_lock);
-	return vpid;
-}
-
-void free_vpid(int vpid)
-{
-	if (!enable_vpid || vpid == 0)
-		return;
-	spin_lock(&vmx_vpid_lock);
-	__clear_bit(vpid, vmx_vpid_bitmap);
-	spin_unlock(&vmx_vpid_lock);
-}
-
 static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
 {
 	/*
@@ -7559,7 +7533,7 @@ void vmx_vcpu_free(struct kvm_vcpu *vcpu)
 
 	if (enable_pml)
 		vmx_destroy_pml_buffer(vmx);
-	free_vpid(vmx->vpid);
+	kvm_tlb_tags_free(&vmx_vpids, vmx->vpid);
 	nested_vmx_free_vcpu(vcpu);
 	free_loaded_vmcs(vmx->loaded_vmcs);
 	free_page((unsigned long)vmx->ve_info);
@@ -7578,7 +7552,7 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu)
 
 	err = -ENOMEM;
 
-	vmx->vpid = allocate_vpid();
+	vmx->vpid = enable_vpid ? kvm_tlb_tags_alloc(&vmx_vpids) : 0;
 
 	/*
 	 * If PML is turned on, failure on enabling PML just results in failure
@@ -7681,7 +7655,7 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu)
 free_pml:
 	vmx_destroy_pml_buffer(vmx);
 free_vpid:
-	free_vpid(vmx->vpid);
+	kvm_tlb_tags_free(&vmx_vpids, vmx->vpid);
 	return err;
 }
 
@@ -8373,6 +8347,7 @@ void vmx_hardware_unsetup(void)
 		nested_vmx_hardware_unsetup();
 
 	free_kvm_area();
+	kvm_tlb_tags_destroy(&vmx_vpids);
 }
 
 void vmx_vm_destroy(struct kvm *kvm)
@@ -8591,7 +8566,8 @@ __init int vmx_hardware_setup(void)
 	kvm_caps.has_bus_lock_exit = cpu_has_vmx_bus_lock_detection();
 	kvm_caps.has_notify_vmexit = cpu_has_notify_vmexit();
 
-	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+	/* VPID 0 is reserved for host, so min=1  */
+	kvm_tlb_tags_init(&vmx_vpids, 1, VMX_NR_VPIDS - 1);
 
 	if (enable_ept)
 		kvm_mmu_set_ept_masks(enable_ept_ad_bits,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 951e44dc9d0ea..9bece3ea63eaa 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -376,10 +376,10 @@ struct kvm_vmx {
 	u64 *pid_table;
 };
 
+extern struct kvm_tlb_tags vmx_vpids;
+
 void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
 			struct loaded_vmcs *buddy);
-int allocate_vpid(void);
-void free_vpid(int vpid);
 void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
 void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
 void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 69c20a68a3f01..182f18ebc62f3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13992,6 +13992,64 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
 }
 EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
 
+int kvm_tlb_tags_init(struct kvm_tlb_tags *tlb_tags, unsigned int min,
+		      unsigned int max)
+{
+	/*
+	 * 0 is assumed to be the host's TLB tag and is returned on failed
+	 * allocations.
+	 */
+	if (WARN_ON_ONCE(min == 0))
+		return -1;
+
+	/*
+	 * Allocate enough bits to index the bitmap directly by the tag,
+	 * potentially wasting a bit of memory.
+	 */
+	tlb_tags->bitmap = bitmap_zalloc(max + 1, GFP_KERNEL);
+	if (!tlb_tags->bitmap)
+		return -1;
+
+	tlb_tags->min = min;
+	tlb_tags->max = max;
+	spin_lock_init(&tlb_tags->lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_tlb_tags_init);
+
+void kvm_tlb_tags_destroy(struct kvm_tlb_tags *tlb_tags)
+{
+	bitmap_free(tlb_tags->bitmap);
+}
+EXPORT_SYMBOL_GPL(kvm_tlb_tags_destroy);
+
+unsigned int kvm_tlb_tags_alloc(struct kvm_tlb_tags *tlb_tags)
+{
+	unsigned int tag;
+
+	spin_lock(&tlb_tags->lock);
+	tag = find_next_zero_bit(tlb_tags->bitmap, tlb_tags->max + 1,
+				 tlb_tags->min);
+	if (tag <= tlb_tags->max)
+		__set_bit(tag, tlb_tags->bitmap);
+	else
+		tag = 0;
+	spin_unlock(&tlb_tags->lock);
+	return tag;
+}
+EXPORT_SYMBOL_GPL(kvm_tlb_tags_alloc);
+
+void kvm_tlb_tags_free(struct kvm_tlb_tags *tlb_tags, unsigned int tag)
+{
+	if (tag < tlb_tags->min || WARN_ON_ONCE(tag > tlb_tags->max))
+		return;
+
+	spin_lock(&tlb_tags->lock);
+	__clear_bit(tag, tlb_tags->bitmap);
+	spin_unlock(&tlb_tags->lock);
+}
+EXPORT_SYMBOL_GPL(kvm_tlb_tags_free);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 9dc32a4090761..9f84e933d189b 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -652,4 +652,17 @@ int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
+struct kvm_tlb_tags {
+	spinlock_t	lock;
+	unsigned long	*bitmap;
+	unsigned int	min;
+	unsigned int	max;
+};
+
+int kvm_tlb_tags_init(struct kvm_tlb_tags *tlb_tags, unsigned int min,
+		      unsigned int max);
+void kvm_tlb_tags_destroy(struct kvm_tlb_tags *tlb_tags);
+unsigned int kvm_tlb_tags_alloc(struct kvm_tlb_tags *tlb_tags);
+void kvm_tlb_tags_free(struct kvm_tlb_tags *tlb_tags, unsigned int tag);
+
 #endif
-- 
2.49.0.395.g12beb8f557-goog

Re: [RFC PATCH 01/24] KVM: VMX: Generalize VPID allocation to be vendor-neutral

Posted by Sean Christopherson 7 months, 2 weeks ago

On Wed, Mar 26, 2025, Yosry Ahmed wrote:
> Generalize the VMX VPID allocation code and make move it to common code
> in preparation for sharing with SVM. Create a generic struct
> kvm_tlb_tags, representing a factory for VPIDs (or ASIDs later), and use
> one for VPIDs.

I don't see any reason in creating a factory, just have common KVM provide the
structure.


> Most of the functionality remains the same, with the following
> differences:
> - The enable_vpid checks are moved to the callers for allocate_vpid()
>   and free_vpid(), as they are specific to VMX.
> - The bitmap allocation is now dynamic (which will be required for SVM),
>   so it is initialized and cleaned up in vmx_hardware_{setup/unsetup}().
> - The range of valid TLB tags is expressed in terms of min/max instead
>   of the number of tags to support SVM use cases.
> 
> Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
> ---
>  arch/x86/kvm/vmx/nested.c |  4 +--
>  arch/x86/kvm/vmx/vmx.c    | 38 +++++--------------------
>  arch/x86/kvm/vmx/vmx.h    |  4 +--
>  arch/x86/kvm/x86.c        | 58 +++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/x86.h        | 13 +++++++++
>  5 files changed, 82 insertions(+), 35 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index d06e50d9c0e79..b017bd2eb2382 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -343,7 +343,7 @@ static void free_nested(struct kvm_vcpu *vcpu)
>  	vmx->nested.vmxon = false;
>  	vmx->nested.smm.vmxon = false;
>  	vmx->nested.vmxon_ptr = INVALID_GPA;
> -	free_vpid(vmx->nested.vpid02);
> +	kvm_tlb_tags_free(&vmx_vpids, vmx->nested.vpid02);
>  	vmx->nested.posted_intr_nv = -1;
>  	vmx->nested.current_vmptr = INVALID_GPA;
>  	if (enable_shadow_vmcs) {
> @@ -5333,7 +5333,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
>  		     HRTIMER_MODE_ABS_PINNED);
>  	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
>  
> -	vmx->nested.vpid02 = allocate_vpid();
> +	vmx->nested.vpid02 = enable_vpid ? kvm_tlb_tags_alloc(&vmx_vpids) : 0;

Since the tag allocator already needs to handle "no tag available", it should also
handle the "tagging" not enabled scenario.  That way this can simply be:

	vmx->nested.vpid02 = kvm_tlb_tags_alloc(&vmx_vpids);

> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 69c20a68a3f01..182f18ebc62f3 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -13992,6 +13992,64 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
>  }
>  EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
>  
> +int kvm_tlb_tags_init(struct kvm_tlb_tags *tlb_tags, unsigned int min,
> +		      unsigned int max)

I'd much prefer we don't create a "kvm_tlb_tags" namespace, and insteaad go with:

  kvm_init_tlb_tags()
  kvm_alloc_tlb_tag()
  kvm_free_tlb_tag()

Because kvm_tlb_tags_alloc() in particular reads like it allocates *multiple*
tags.

I also think it's probably worth a typedef for the tag, mostly to help with
understanding what's being pased around, e.g.

typedef unsigned int kvm_tlb_tag_t;

void kvm_init_tlb_tags(kvm_tlb_tag_t min, kvm_tlb_tag_t max);
kvm_tlb_tag_t kvm_alloc_tlb_tag(void);
void kvm_free_tlb_tag(kvm_tlb_tag_t tag);

> +{
> +	/*
> +	 * 0 is assumed to be the host's TLB tag and is returned on failed

Not assumed, *is*.

> +	 * allocations.
> +	 */
> +	if (WARN_ON_ONCE(min == 0))
> +		return -1;
> +
> +	/*
> +	 * Allocate enough bits to index the bitmap directly by the tag,
> +	 * potentially wasting a bit of memory.
> +	 */
> +	tlb_tags->bitmap = bitmap_zalloc(max + 1, GFP_KERNEL);

Rather than blindly allocate SVM's theoretical max of 4 *billion* tags, I think
we should statically reserve space for 65k tags, i.e. for the max possible VMX
VPID.

As mentioned in a different reply, current AMD CPUs support 32k ASIDs, so in
practice it's not even a meaningful limit.  And I strongly suspect that pushing
past ~4k active vCPUs, let alone 32k vCPUs, will run into other bottlenecks long
before the number of ASIDs becomes problematic.

That way KVM doesn't need to bail on failure, or as is done for VMX, silently
disable VPID usage.

Untested, but this is what I'm thinking:

---
 arch/x86/kvm/mmu.h        |  6 ++++
 arch/x86/kvm/mmu/mmu.c    | 61 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/nested.c |  4 +--
 arch/x86/kvm/vmx/vmx.c    | 38 +++++-------------------
 arch/x86/kvm/vmx/vmx.h    |  2 --
 5 files changed, 76 insertions(+), 35 deletions(-)

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index b4b6860ab971..9e7343722530 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -78,6 +78,12 @@ static inline gfn_t kvm_mmu_max_gfn(void)
 
 u8 kvm_mmu_get_max_tdp_level(void);
 
+typedef unsigned int kvm_tlb_tag_t;
+
+void kvm_init_tlb_tags(kvm_tlb_tag_t min, kvm_tlb_tag_t max);
+kvm_tlb_tag_t kvm_alloc_tlb_tag(void);
+void kvm_free_tlb_tag(kvm_tlb_tag_t tag);
+
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
 void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value);
 void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4e06e2e89a8f..e58d998ed10a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -121,6 +121,63 @@ static int max_tdp_level __read_mostly;
 
 #include <trace/events/kvm.h>
 
+#define KVM_MAX_TLB_TAG			0xffff
+
+struct kvm_tlb_tags {
+	spinlock_t	lock;
+	DECLARE_BITMAP(used, KVM_MAX_TLB_TAG + 1);
+	kvm_tlb_tag_t	min;
+	kvm_tlb_tag_t	max;
+};
+
+struct kvm_tlb_tags kvm_tlb_tags;
+
+void kvm_init_tlb_tags(kvm_tlb_tag_t min, kvm_tlb_tag_t max)
+{
+	/*
+	 * 0 is the host's TLB tag for both VMX's VPID and SVM's ASID, and is
+	 * returned on failed allocations, e.g. if there are no more tags left.
+	 */
+	if (WARN_ON_ONCE(!min || max < min))
+		return;
+
+	kvm_tlb_tags.min = min;
+	kvm_tlb_tags.max = min(max, KVM_MAX_TLB_TAG);
+}
+EXPORT_SYMBOL_GPL(kvm_init_tlb_tags);
+
+kvm_tlb_tag_t kvm_alloc_tlb_tag(void)
+{
+	struct kvm_tlb_tags *tags = &kvm_tlb_tags;
+	kvm_tlb_tag_t tag;
+
+	if (!kvm_tlb_tags.min)
+		return 0;
+
+	guard(spinlock)(&kvm_tlb_tags.lock);
+
+	tag = find_next_zero_bit(tags->used, tags->max + 1, tags->min);
+	if (tag > tags->max)
+		return 0;
+
+	__set_bit(tag, tags->used);
+	return tag;
+}
+EXPORT_SYMBOL_GPL(kvm_alloc_tlb_tag);
+
+void kvm_free_tlb_tag(kvm_tlb_tag_t tag)
+{
+	struct kvm_tlb_tags *tags = &kvm_tlb_tags;
+
+	if (!tag || WARN_ON_ONCE(tag < tags->min || tag > tags->max))
+		return;
+
+	guard(spinlock)(&tags->lock);
+
+	__clear_bit(tag, tags->used);
+}
+EXPORT_SYMBOL_GPL(kvm_free_tlb_tag);
+
 /* make pte_list_desc fit well in cache lines */
 #define PTE_LIST_EXT 14
 
@@ -7426,6 +7483,10 @@ int kvm_mmu_vendor_module_init(void)
 
 	kvm_mmu_reset_all_pte_masks();
 
+	kvm_tlb_tags.min = 0;
+	kvm_tlb_tags.max = 0;
+	bitmap_zero(kvm_tlb_tags.used, KVM_MAX_TLB_TAG + 1);
+
 	pte_list_desc_cache = KMEM_CACHE(pte_list_desc, SLAB_ACCOUNT);
 	if (!pte_list_desc_cache)
 		goto out;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7211c71d4241..7f02dbe196e3 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -344,7 +344,7 @@ static void free_nested(struct kvm_vcpu *vcpu)
 	vmx->nested.vmxon = false;
 	vmx->nested.smm.vmxon = false;
 	vmx->nested.vmxon_ptr = INVALID_GPA;
-	free_vpid(vmx->nested.vpid02);
+	kvm_tlb_tags_free(&vmx_vpids, vmx->nested.vpid02);
 	vmx->nested.posted_intr_nv = -1;
 	vmx->nested.current_vmptr = INVALID_GPA;
 	if (enable_shadow_vmcs) {
@@ -5333,7 +5333,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 	hrtimer_setup(&vmx->nested.preemption_timer, vmx_preemption_timer_fn, CLOCK_MONOTONIC,
 		      HRTIMER_MODE_ABS_PINNED);
 
-	vmx->nested.vpid02 = allocate_vpid();
+	vmx->nested.vpid02 = kvm_alloc_tlb_tag();
 
 	vmx->nested.vmcs02_initialized = false;
 	vmx->nested.vmxon = true;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4953846cb30d..4f3d78e71461 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -501,8 +501,7 @@ DEFINE_PER_CPU(struct vmcs *, current_vmcs);
  */
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 
-static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
-static DEFINE_SPINLOCK(vmx_vpid_lock);
+struct kvm_tlb_tags vmx_vpids;
 
 struct vmcs_config vmcs_config __ro_after_init;
 struct vmx_capability vmx_capability __ro_after_init;
@@ -3970,31 +3969,6 @@ static void seg_setup(int seg)
 	vmcs_write32(sf->ar_bytes, ar);
 }
 
-int allocate_vpid(void)
-{
-	int vpid;
-
-	if (!enable_vpid)
-		return 0;
-	spin_lock(&vmx_vpid_lock);
-	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
-	if (vpid < VMX_NR_VPIDS)
-		__set_bit(vpid, vmx_vpid_bitmap);
-	else
-		vpid = 0;
-	spin_unlock(&vmx_vpid_lock);
-	return vpid;
-}
-
-void free_vpid(int vpid)
-{
-	if (!enable_vpid || vpid == 0)
-		return;
-	spin_lock(&vmx_vpid_lock);
-	__clear_bit(vpid, vmx_vpid_bitmap);
-	spin_unlock(&vmx_vpid_lock);
-}
-
 static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
 {
 	/*
@@ -7480,7 +7454,7 @@ void vmx_vcpu_free(struct kvm_vcpu *vcpu)
 
 	if (enable_pml)
 		vmx_destroy_pml_buffer(vmx);
-	free_vpid(vmx->vpid);
+	kvm_tlb_tags_free(&vmx_vpids, vmx->vpid);
 	nested_vmx_free_vcpu(vcpu);
 	free_loaded_vmcs(vmx->loaded_vmcs);
 	free_page((unsigned long)vmx->ve_info);
@@ -7499,7 +7473,7 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu)
 
 	err = -ENOMEM;
 
-	vmx->vpid = allocate_vpid();
+	vmx->vpid = kvm_alloc_tlb_tag();
 
 	/*
 	 * If PML is turned on, failure on enabling PML just results in failure
@@ -7602,7 +7576,7 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu)
 free_pml:
 	vmx_destroy_pml_buffer(vmx);
 free_vpid:
-	free_vpid(vmx->vpid);
+	kvm_tlb_tags_free(&vmx_vpids, vmx->vpid);
 	return err;
 }
 
@@ -8522,7 +8496,9 @@ __init int vmx_hardware_setup(void)
 	kvm_caps.has_bus_lock_exit = cpu_has_vmx_bus_lock_detection();
 	kvm_caps.has_notify_vmexit = cpu_has_notify_vmexit();
 
-	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+	/* VPID 0 is reserved for host, so min=1  */
+	if (enable_vpid)
+		kvm_init_tlb_tags(1, VMX_NR_VPIDS - 1);
 
 	if (enable_ept)
 		kvm_mmu_set_ept_masks(enable_ept_ad_bits,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index b5758c33c60f..5feec05de9b4 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -355,8 +355,6 @@ static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu)
 }
 
 void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu);
-int allocate_vpid(void);
-void free_vpid(int vpid);
 void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
 void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
 void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,

base-commit: ecff148f29dade8416abee4d492d2a7a6d7cd610
--

Re: [RFC PATCH 01/24] KVM: VMX: Generalize VPID allocation to be vendor-neutral

Posted by Nikunj A Dadhania 10 months, 2 weeks ago

Yosry Ahmed <yosry.ahmed@linux.dev> writes:

> Generalize the VMX VPID allocation code and make move it to common
> code

s/make//

> in preparation for sharing with SVM. Create a generic struct
> kvm_tlb_tags, representing a factory for VPIDs (or ASIDs later), and use
> one for VPIDs.
>
> Most of the functionality remains the same, with the following
> differences:
> - The enable_vpid checks are moved to the callers for allocate_vpid()
>   and free_vpid(), as they are specific to VMX.
> - The bitmap allocation is now dynamic (which will be required for SVM),
>   so it is initialized and cleaned up in vmx_hardware_{setup/unsetup}().
> - The range of valid TLB tags is expressed in terms of min/max instead
>   of the number of tags to support SVM use cases.
>
> Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
> ---
>  arch/x86/kvm/vmx/nested.c |  4 +--
>  arch/x86/kvm/vmx/vmx.c    | 38 +++++--------------------
>  arch/x86/kvm/vmx/vmx.h    |  4 +--
>  arch/x86/kvm/x86.c        | 58 +++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/x86.h        | 13 +++++++++
>  5 files changed, 82 insertions(+), 35 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index d06e50d9c0e79..b017bd2eb2382 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -343,7 +343,7 @@ static void free_nested(struct kvm_vcpu *vcpu)
>  	vmx->nested.vmxon = false;
>  	vmx->nested.smm.vmxon = false;
>  	vmx->nested.vmxon_ptr = INVALID_GPA;
> -	free_vpid(vmx->nested.vpid02);
> +	kvm_tlb_tags_free(&vmx_vpids, vmx->nested.vpid02);
>  	vmx->nested.posted_intr_nv = -1;
>  	vmx->nested.current_vmptr = INVALID_GPA;
>  	if (enable_shadow_vmcs) {
> @@ -5333,7 +5333,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
>  		     HRTIMER_MODE_ABS_PINNED);
>  	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
>  
> -	vmx->nested.vpid02 = allocate_vpid();
> +	vmx->nested.vpid02 = enable_vpid ? kvm_tlb_tags_alloc(&vmx_vpids) : 0;
>  
>  	vmx->nested.vmcs02_initialized = false;
>  	vmx->nested.vmxon = true;
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index b70ed72c1783d..f7ce75842fa26 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -496,8 +496,7 @@ DEFINE_PER_CPU(struct vmcs *, current_vmcs);
>   */
>  static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
>  
> -static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
> -static DEFINE_SPINLOCK(vmx_vpid_lock);
> +struct kvm_tlb_tags vmx_vpids;
>  
>  struct vmcs_config vmcs_config __ro_after_init;
>  struct vmx_capability vmx_capability __ro_after_init;
> @@ -3972,31 +3971,6 @@ static void seg_setup(int seg)
>  	vmcs_write32(sf->ar_bytes, ar);
>  }
>  
> -int allocate_vpid(void)
> -{
> -	int vpid;
> -
> -	if (!enable_vpid)
> -		return 0;
> -	spin_lock(&vmx_vpid_lock);
> -	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> -	if (vpid < VMX_NR_VPIDS)
> -		__set_bit(vpid, vmx_vpid_bitmap);
> -	else
> -		vpid = 0;
> -	spin_unlock(&vmx_vpid_lock);
> -	return vpid;
> -}
> -
> -void free_vpid(int vpid)
> -{
> -	if (!enable_vpid || vpid == 0)
> -		return;
> -	spin_lock(&vmx_vpid_lock);
> -	__clear_bit(vpid, vmx_vpid_bitmap);
> -	spin_unlock(&vmx_vpid_lock);
> -}
> -
>  static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
>  {
>  	/*
> @@ -7559,7 +7533,7 @@ void vmx_vcpu_free(struct kvm_vcpu *vcpu)
>  
>  	if (enable_pml)
>  		vmx_destroy_pml_buffer(vmx);
> -	free_vpid(vmx->vpid);
> +	kvm_tlb_tags_free(&vmx_vpids, vmx->vpid);
>  	nested_vmx_free_vcpu(vcpu);
>  	free_loaded_vmcs(vmx->loaded_vmcs);
>  	free_page((unsigned long)vmx->ve_info);
> @@ -7578,7 +7552,7 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu)
>  
>  	err = -ENOMEM;
>  
> -	vmx->vpid = allocate_vpid();
> +	vmx->vpid = enable_vpid ? kvm_tlb_tags_alloc(&vmx_vpids) : 0;
>  
>  	/*
>  	 * If PML is turned on, failure on enabling PML just results in failure
> @@ -7681,7 +7655,7 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu)
>  free_pml:
>  	vmx_destroy_pml_buffer(vmx);
>  free_vpid:
> -	free_vpid(vmx->vpid);
> +	kvm_tlb_tags_free(&vmx_vpids, vmx->vpid);
>  	return err;
>  }
>  
> @@ -8373,6 +8347,7 @@ void vmx_hardware_unsetup(void)
>  		nested_vmx_hardware_unsetup();
>  
>  	free_kvm_area();
> +	kvm_tlb_tags_destroy(&vmx_vpids);
>  }
>  
>  void vmx_vm_destroy(struct kvm *kvm)
> @@ -8591,7 +8566,8 @@ __init int vmx_hardware_setup(void)
>  	kvm_caps.has_bus_lock_exit = cpu_has_vmx_bus_lock_detection();
>  	kvm_caps.has_notify_vmexit = cpu_has_notify_vmexit();
>  
> -	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
> +	/* VPID 0 is reserved for host, so min=1  */
> +	kvm_tlb_tags_init(&vmx_vpids, 1, VMX_NR_VPIDS - 1);

This needs to handle errors from kvm_tlb_tags_init().

>  
>  	if (enable_ept)
>  		kvm_mmu_set_ept_masks(enable_ept_ad_bits,
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 951e44dc9d0ea..9bece3ea63eaa 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -376,10 +376,10 @@ struct kvm_vmx {
>  	u64 *pid_table;
>  };
>  
> +extern struct kvm_tlb_tags vmx_vpids;
> +
>  void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
>  			struct loaded_vmcs *buddy);
> -int allocate_vpid(void);
> -void free_vpid(int vpid);
>  void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
>  void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
>  void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 69c20a68a3f01..182f18ebc62f3 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -13992,6 +13992,64 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
>  }
>  EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
>  
> +int kvm_tlb_tags_init(struct kvm_tlb_tags *tlb_tags, unsigned int min,
> +		      unsigned int max)
> +{
> +	/*
> +	 * 0 is assumed to be the host's TLB tag and is returned on failed
> +	 * allocations.
> +	 */
> +	if (WARN_ON_ONCE(min == 0))
> +		return -1;

Probably -EINVAL ?

> +
> +	/*
> +	 * Allocate enough bits to index the bitmap directly by the tag,
> +	 * potentially wasting a bit of memory.
> +	 */
> +	tlb_tags->bitmap = bitmap_zalloc(max + 1, GFP_KERNEL);
> +	if (!tlb_tags->bitmap)
> +		return -1;

-ENOMEM ?

> +
> +	tlb_tags->min = min;
> +	tlb_tags->max = max;
> +	spin_lock_init(&tlb_tags->lock);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_tlb_tags_init);
> +
> +void kvm_tlb_tags_destroy(struct kvm_tlb_tags *tlb_tags)
> +{
> +	bitmap_free(tlb_tags->bitmap);

Do we need to take tlb_tabs->lock here ?

> +}
> +EXPORT_SYMBOL_GPL(kvm_tlb_tags_destroy);
> +
> +unsigned int kvm_tlb_tags_alloc(struct kvm_tlb_tags *tlb_tags)
> +{
> +	unsigned int tag;
> +
> +	spin_lock(&tlb_tags->lock);
> +	tag = find_next_zero_bit(tlb_tags->bitmap, tlb_tags->max + 1,
> +				 tlb_tags->min);
> +	if (tag <= tlb_tags->max)
> +		__set_bit(tag, tlb_tags->bitmap);
> +	else
> +		tag = 0;

In the event that the KVM runs out of tags, adding WARN_ON_ONCE() here will
help debugging.

Regards
Nikunj

Re: [RFC PATCH 01/24] KVM: VMX: Generalize VPID allocation to be vendor-neutral

Posted by Yosry Ahmed 10 months, 2 weeks ago

On Thu, Mar 27, 2025 at 10:58:31AM +0000, Nikunj A Dadhania wrote:
> Yosry Ahmed <yosry.ahmed@linux.dev> writes:
> 
> > Generalize the VMX VPID allocation code and make move it to common
> > code
> 
> s/make//
> 
> > in preparation for sharing with SVM. Create a generic struct
> > kvm_tlb_tags, representing a factory for VPIDs (or ASIDs later), and use
> > one for VPIDs.
> >
> > Most of the functionality remains the same, with the following
> > differences:
> > - The enable_vpid checks are moved to the callers for allocate_vpid()
> >   and free_vpid(), as they are specific to VMX.
> > - The bitmap allocation is now dynamic (which will be required for SVM),
> >   so it is initialized and cleaned up in vmx_hardware_{setup/unsetup}().
> > - The range of valid TLB tags is expressed in terms of min/max instead
> >   of the number of tags to support SVM use cases.
> >
> > Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
> > ---
> >  arch/x86/kvm/vmx/nested.c |  4 +--
> >  arch/x86/kvm/vmx/vmx.c    | 38 +++++--------------------
> >  arch/x86/kvm/vmx/vmx.h    |  4 +--
> >  arch/x86/kvm/x86.c        | 58 +++++++++++++++++++++++++++++++++++++++
> >  arch/x86/kvm/x86.h        | 13 +++++++++
> >  5 files changed, 82 insertions(+), 35 deletions(-)
> >
> > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> > index d06e50d9c0e79..b017bd2eb2382 100644
> > --- a/arch/x86/kvm/vmx/nested.c
> > +++ b/arch/x86/kvm/vmx/nested.c
> > @@ -343,7 +343,7 @@ static void free_nested(struct kvm_vcpu *vcpu)
> >  	vmx->nested.vmxon = false;
> >  	vmx->nested.smm.vmxon = false;
> >  	vmx->nested.vmxon_ptr = INVALID_GPA;
> > -	free_vpid(vmx->nested.vpid02);
> > +	kvm_tlb_tags_free(&vmx_vpids, vmx->nested.vpid02);
> >  	vmx->nested.posted_intr_nv = -1;
> >  	vmx->nested.current_vmptr = INVALID_GPA;
> >  	if (enable_shadow_vmcs) {
> > @@ -5333,7 +5333,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
> >  		     HRTIMER_MODE_ABS_PINNED);
> >  	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
> >  
> > -	vmx->nested.vpid02 = allocate_vpid();
> > +	vmx->nested.vpid02 = enable_vpid ? kvm_tlb_tags_alloc(&vmx_vpids) : 0;
> >  
> >  	vmx->nested.vmcs02_initialized = false;
> >  	vmx->nested.vmxon = true;
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index b70ed72c1783d..f7ce75842fa26 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -496,8 +496,7 @@ DEFINE_PER_CPU(struct vmcs *, current_vmcs);
> >   */
> >  static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
> >  
> > -static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
> > -static DEFINE_SPINLOCK(vmx_vpid_lock);
> > +struct kvm_tlb_tags vmx_vpids;
> >  
> >  struct vmcs_config vmcs_config __ro_after_init;
> >  struct vmx_capability vmx_capability __ro_after_init;
> > @@ -3972,31 +3971,6 @@ static void seg_setup(int seg)
> >  	vmcs_write32(sf->ar_bytes, ar);
> >  }
> >  
> > -int allocate_vpid(void)
> > -{
> > -	int vpid;
> > -
> > -	if (!enable_vpid)
> > -		return 0;
> > -	spin_lock(&vmx_vpid_lock);
> > -	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> > -	if (vpid < VMX_NR_VPIDS)
> > -		__set_bit(vpid, vmx_vpid_bitmap);
> > -	else
> > -		vpid = 0;
> > -	spin_unlock(&vmx_vpid_lock);
> > -	return vpid;
> > -}
> > -
> > -void free_vpid(int vpid)
> > -{
> > -	if (!enable_vpid || vpid == 0)
> > -		return;
> > -	spin_lock(&vmx_vpid_lock);
> > -	__clear_bit(vpid, vmx_vpid_bitmap);
> > -	spin_unlock(&vmx_vpid_lock);
> > -}
> > -
> >  static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
> >  {
> >  	/*
> > @@ -7559,7 +7533,7 @@ void vmx_vcpu_free(struct kvm_vcpu *vcpu)
> >  
> >  	if (enable_pml)
> >  		vmx_destroy_pml_buffer(vmx);
> > -	free_vpid(vmx->vpid);
> > +	kvm_tlb_tags_free(&vmx_vpids, vmx->vpid);
> >  	nested_vmx_free_vcpu(vcpu);
> >  	free_loaded_vmcs(vmx->loaded_vmcs);
> >  	free_page((unsigned long)vmx->ve_info);
> > @@ -7578,7 +7552,7 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu)
> >  
> >  	err = -ENOMEM;
> >  
> > -	vmx->vpid = allocate_vpid();
> > +	vmx->vpid = enable_vpid ? kvm_tlb_tags_alloc(&vmx_vpids) : 0;
> >  
> >  	/*
> >  	 * If PML is turned on, failure on enabling PML just results in failure
> > @@ -7681,7 +7655,7 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu)
> >  free_pml:
> >  	vmx_destroy_pml_buffer(vmx);
> >  free_vpid:
> > -	free_vpid(vmx->vpid);
> > +	kvm_tlb_tags_free(&vmx_vpids, vmx->vpid);
> >  	return err;
> >  }
> >  
> > @@ -8373,6 +8347,7 @@ void vmx_hardware_unsetup(void)
> >  		nested_vmx_hardware_unsetup();
> >  
> >  	free_kvm_area();
> > +	kvm_tlb_tags_destroy(&vmx_vpids);
> >  }
> >  
> >  void vmx_vm_destroy(struct kvm *kvm)
> > @@ -8591,7 +8566,8 @@ __init int vmx_hardware_setup(void)
> >  	kvm_caps.has_bus_lock_exit = cpu_has_vmx_bus_lock_detection();
> >  	kvm_caps.has_notify_vmexit = cpu_has_notify_vmexit();
> >  
> > -	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
> > +	/* VPID 0 is reserved for host, so min=1  */
> > +	kvm_tlb_tags_init(&vmx_vpids, 1, VMX_NR_VPIDS - 1);
> 
> This needs to handle errors from kvm_tlb_tags_init().
> 
> >  
> >  	if (enable_ept)
> >  		kvm_mmu_set_ept_masks(enable_ept_ad_bits,
> > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> > index 951e44dc9d0ea..9bece3ea63eaa 100644
> > --- a/arch/x86/kvm/vmx/vmx.h
> > +++ b/arch/x86/kvm/vmx/vmx.h
> > @@ -376,10 +376,10 @@ struct kvm_vmx {
> >  	u64 *pid_table;
> >  };
> >  
> > +extern struct kvm_tlb_tags vmx_vpids;
> > +
> >  void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
> >  			struct loaded_vmcs *buddy);
> > -int allocate_vpid(void);
> > -void free_vpid(int vpid);
> >  void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
> >  void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
> >  void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 69c20a68a3f01..182f18ebc62f3 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -13992,6 +13992,64 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
> >  
> > +int kvm_tlb_tags_init(struct kvm_tlb_tags *tlb_tags, unsigned int min,
> > +		      unsigned int max)
> > +{
> > +	/*
> > +	 * 0 is assumed to be the host's TLB tag and is returned on failed
> > +	 * allocations.
> > +	 */
> > +	if (WARN_ON_ONCE(min == 0))
> > +		return -1;
> 
> Probably -EINVAL ?

Yeah we can use error codes for clarity, thanks.

> 
> > +
> > +	/*
> > +	 * Allocate enough bits to index the bitmap directly by the tag,
> > +	 * potentially wasting a bit of memory.
> > +	 */
> > +	tlb_tags->bitmap = bitmap_zalloc(max + 1, GFP_KERNEL);
> > +	if (!tlb_tags->bitmap)
> > +		return -1;
> 
> -ENOMEM ?
> 
> > +
> > +	tlb_tags->min = min;
> > +	tlb_tags->max = max;
> > +	spin_lock_init(&tlb_tags->lock);
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_tlb_tags_init);
> > +
> > +void kvm_tlb_tags_destroy(struct kvm_tlb_tags *tlb_tags)
> > +{
> > +	bitmap_free(tlb_tags->bitmap);
> 
> Do we need to take tlb_tabs->lock here ?

Hmm we could, but I think it's a bug from the caller if they allow
kvm_tlb_tags_destroy() and any of the other functions (init/alloc/free)
to race. kvm_tlb_tags_destroy() should be called when we are done using
the factory.

> 
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_tlb_tags_destroy);
> > +
> > +unsigned int kvm_tlb_tags_alloc(struct kvm_tlb_tags *tlb_tags)
> > +{
> > +	unsigned int tag;
> > +
> > +	spin_lock(&tlb_tags->lock);
> > +	tag = find_next_zero_bit(tlb_tags->bitmap, tlb_tags->max + 1,
> > +				 tlb_tags->min);
> > +	if (tag <= tlb_tags->max)
> > +		__set_bit(tag, tlb_tags->bitmap);
> > +	else
> > +		tag = 0;
> 
> In the event that the KVM runs out of tags, adding WARN_ON_ONCE() here will
> help debugging.

Yeah I wanted to do that, but we do not currently WARN in VMX if we run
out of VPIDs. I am fine with doing adding it if others are. My main
concern was if there's some existing use case that routinely runs out of
VPIDs (although I cannot imagine one).

> 
> Regards
> Nikunj
>

Re: [RFC PATCH 01/24] KVM: VMX: Generalize VPID allocation to be vendor-neutral

Posted by Sean Christopherson 10 months, 2 weeks ago

On Thu, Mar 27, 2025, Yosry Ahmed wrote:
> On Thu, Mar 27, 2025 at 10:58:31AM +0000, Nikunj A Dadhania wrote:
> > > +unsigned int kvm_tlb_tags_alloc(struct kvm_tlb_tags *tlb_tags)
> > > +{
> > > +	unsigned int tag;
> > > +
> > > +	spin_lock(&tlb_tags->lock);
> > > +	tag = find_next_zero_bit(tlb_tags->bitmap, tlb_tags->max + 1,
> > > +				 tlb_tags->min);
> > > +	if (tag <= tlb_tags->max)
> > > +		__set_bit(tag, tlb_tags->bitmap);
> > > +	else
> > > +		tag = 0;
> > 
> > In the event that the KVM runs out of tags, adding WARN_ON_ONCE() here will
> > help debugging.
> 
> Yeah I wanted to do that, but we do not currently WARN in VMX if we run
> out of VPIDs. I am fine with doing adding it if others are. My main
> concern was if there's some existing use case that routinely runs out of
> VPIDs (although I cannot imagine one).

No WARNs, it would be userspace triggerable (hello, syzkaller).  If we really
want to harden things against performance issues due to unexpected VPID/ASID
allocation, I would rather do something like add a knob to fail VM or vCPU
creation if allocation fails (nested would just have to suffer).